/*
 * Decompiled with CFR 0.152.
 */
package me.autolang.detect;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Pattern;
import me.autolang.config.ConfigManager;
import me.autolang.detect.LanguageConfidence;
import me.autolang.detect.LanguageDetector;
import me.autolang.locale.Language;

public class HeuristicDetector
implements LanguageDetector {
    private final ConfigManager cfg;
    private static final Set<String> ES_COMMON = Set.of("hola", "gracias", "por", "favor", "buenos", "buenas", "noches", "d\u00edas", "adios", "adi\u00f3s", "si", "s\u00ed", "no", "de", "la", "el", "y", "que", "como", "c\u00f3mo", "cuando", "cu\u00e1ndo", "un", "una", "los", "las", "del", "al", "con", "en", "para", "por", "sin", "sobre", "yo", "t\u00fa", "\u00e9l", "ella", "nosotros", "ustedes", "ellos", "ellas", "me", "te", "se", "nos", "es", "est\u00e1", "son", "est\u00e1n", "ser", "estar", "hacer", "tener", "ir", "venir", "ver", "saber", "poder", "querer", "deber", "decir", "hablar", "comer", "beber", "vivir", "hoy", "ayer", "ma\u00f1ana", "ahora", "despu\u00e9s", "antes", "siempre", "nunca", "todo", "nada", "uno", "dos", "tres", "cuatro", "cinco", "seis", "siete", "ocho", "nueve", "diez");
    private static final Set<String> FR_COMMON = Set.of("bonjour", "bonsoir", "salut", "merci", "s'il", "sil", "vous", "plait", "pla\u00eet", "oui", "non", "comment", "\u00e7a", "ca", "va", "bien", "mal", "tr\u00e8s", "beaucoup", "le", "la", "les", "un", "une", "des", "du", "de", "avec", "dans", "sur", "pour", "par", "sans", "sous", "entre", "pendant", "depuis", "jusqu", "jusque", "je", "tu", "il", "elle", "nous", "vous", "ils", "elles", "me", "te", "se", "nous", "est", "sont", "\u00eatre", "avoir", "faire", "aller", "venir", "voir", "savoir", "pouvoir", "vouloir", "devoir", "dire", "parler", "manger", "boire", "vivre", "donner", "prendre", "aujourd", "hui", "hier", "demain", "maintenant", "apr\u00e8s", "avant", "toujours", "jamais", "tout", "rien", "quelque", "chose", "temps", "fois", "ann\u00e9e", "jour", "heure", "un", "deux", "trois", "quatre", "cinq", "six", "sept", "huit", "neuf", "dix");
    private static final Set<String> DE_COMMON = Set.of("hallo", "guten", "tag", "morgen", "abend", "nacht", "danke", "bitte", "ja", "nein", "wie", "was", "wo", "wann", "warum", "wer", "welche", "welcher", "welches", "der", "die", "das", "ein", "eine", "den", "dem", "des", "einen", "einer", "eines", "ich", "du", "er", "sie", "es", "wir", "ihr", "sie", "mich", "dich", "sich", "uns", "euch", "mit", "ohne", "f\u00fcr", "gegen", "durch", "um", "an", "auf", "in", "zu", "von", "bei", "ist", "sind", "war", "waren", "sein", "haben", "werden", "k\u00f6nnen", "m\u00fcssen", "sollen", "wollen", "d\u00fcrfen", "m\u00f6gen", "gehen", "kommen", "machen", "sagen", "sehen", "wissen", "eins", "zwei", "drei", "vier", "f\u00fcnf", "sechs", "sieben", "acht", "neun", "zehn", "heute", "gestern", "morgen", "jetzt", "immer", "nie", "schon", "noch", "auch", "nur");
    private static final Set<String> IT_COMMON = Set.of("ciao", "buongiorno", "buonasera", "buonanotte", "grazie", "prego", "scusi", "s\u00ec", "no", "come", "cosa", "dove", "quando", "perch\u00e9", "chi", "quale", "quanto", "il", "la", "lo", "gli", "le", "un", "uno", "una", "del", "della", "dello", "degli", "delle", "io", "tu", "lui", "lei", "noi", "voi", "loro", "mi", "ti", "si", "ci", "vi", "con", "senza", "per", "da", "in", "su", "di", "a", "tra", "fra", "sotto", "sopra", "\u00e8", "sono", "essere", "avere", "fare", "andare", "venire", "vedere", "sapere", "potere", "volere", "dovere", "dire", "parlare", "mangiare", "bere", "vivere", "dare", "stare", "uno", "due", "tre", "quattro", "cinque", "sei", "sette", "otto", "nove", "dieci", "oggi", "ieri", "domani", "ora", "adesso", "sempre", "mai", "tutto", "niente", "molto", "poco");
    private static final Set<String> PT_COMMON = Set.of("ol\u00e1", "oi", "bom", "dia", "tarde", "noite", "obrigado", "obrigada", "por", "favor", "sim", "n\u00e3o", "como", "que", "onde", "quando", "por", "porque", "quem", "qual", "o", "a", "os", "as", "um", "uma", "uns", "umas", "do", "da", "dos", "das", "no", "na", "eu", "tu", "voc\u00ea", "ele", "ela", "n\u00f3s", "voc\u00eas", "eles", "elas", "me", "te", "se", "nos", "com", "sem", "para", "por", "de", "em", "sobre", "entre", "durante", "desde", "at\u00e9", "\u00e9", "s\u00e3o", "estar", "est\u00e1", "est\u00e3o", "ser", "ter", "fazer", "ir", "vir", "ver", "saber", "poder", "querer", "dever", "dizer", "falar", "comer", "beber", "viver", "dar", "um", "dois", "tr\u00eas", "quatro", "cinco", "seis", "sete", "oito", "nove", "dez", "hoje", "ontem", "amanh\u00e3", "agora", "sempre", "nunca", "tudo", "nada", "muito", "pouco");
    private static final Set<String> RU_COMMON = Set.of("\u043f\u0440\u0438\u0432\u0435\u0442", "\u0437\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439", "\u0437\u0434\u0440\u0430\u0432\u0441\u0442\u0432\u0443\u0439\u0442\u0435", "\u043f\u043e\u043a\u0430", "\u0434\u043e", "\u0441\u0432\u0438\u0434\u0430\u043d\u0438\u044f", "\u0441\u043f\u0430\u0441\u0438\u0431\u043e", "\u043f\u043e\u0436\u0430\u043b\u0443\u0439\u0441\u0442\u0430", "\u0434\u0430", "\u043d\u0435\u0442", "\u043a\u0430\u043a", "\u0447\u0442\u043e", "\u0433\u0434\u0435", "\u043a\u043e\u0433\u0434\u0430", "\u043f\u043e\u0447\u0435\u043c\u0443", "\u043a\u0442\u043e", "\u043a\u0430\u043a\u043e\u0439", "\u043a\u0430\u043a\u0430\u044f", "\u043a\u0430\u043a\u043e\u0435", "\u044f", "\u0442\u044b", "\u043e\u043d", "\u043e\u043d\u0430", "\u043c\u044b", "\u0432\u044b", "\u043e\u043d\u0438", "\u043c\u0435\u043d\u044f", "\u0442\u0435\u0431\u044f", "\u0435\u0433\u043e", "\u0435\u0451", "\u043d\u0430\u0441", "\u0432\u0430\u0441", "\u0438\u0445", "\u0438", "\u0438\u043b\u0438", "\u043d\u043e", "\u0430", "\u0432", "\u043d\u0430", "\u0441", "\u043a", "\u043e\u0442", "\u0434\u043b\u044f", "\u0431\u0435\u0437", "\u0447\u0435\u0440\u0435\u0437", "\u043f\u0440\u0438", "\u043f\u043e", "\u0437\u0430", "\u044d\u0442\u043e", "\u044d\u0442\u043e\u0442", "\u044d\u0442\u0430", "\u044d\u0442\u0438", "\u0442\u043e\u0442", "\u0442\u0430", "\u0442\u0435", "\u0432\u0441\u0435", "\u0432\u0441\u0451", "\u043e\u0434\u0438\u043d", "\u043e\u0434\u043d\u0430", "\u043e\u0434\u043d\u043e", "\u0431\u044b\u0442\u044c", "\u0435\u0441\u0442\u044c", "\u0431\u044b\u043b", "\u0431\u044b\u043b\u0430", "\u0431\u044b\u043b\u043e", "\u0431\u044b\u043b\u0438", "\u0431\u0443\u0434\u0443", "\u0431\u0443\u0434\u0435\u0448\u044c", "\u0431\u0443\u0434\u0435\u0442", "\u0431\u0443\u0434\u0435\u043c", "\u0431\u0443\u0434\u0435\u0442\u0435");
    private static final Pattern ES_DIACRITICS = Pattern.compile("[\u00f1\u00e1\u00e9\u00ed\u00f3\u00fa\u00fc\u00d1\u00c1\u00c9\u00cd\u00d3\u00da\u00dc]");
    private static final Pattern FR_DIACRITICS = Pattern.compile("[\u00e0\u00e2\u00e7\u00e9\u00e8\u00ea\u00eb\u00ee\u00ef\u00f4\u00f9\u00fb\u00fc\u00ff\u0153\u00c0\u00c2\u00c7\u00c9\u00c8\u00ca\u00cb\u00ce\u00cf\u00d4\u00d9\u00db\u00dc\u0178\u0152]");
    private static final Pattern DE_DIACRITICS = Pattern.compile("[\u00e4\u00f6\u00fc\u00df\u00c4\u00d6\u00dc]");
    private static final Pattern IT_DIACRITICS = Pattern.compile("[\u00e0\u00e8\u00e9\u00ec\u00ee\u00ed\u00f2\u00f3\u00f9\u00fa\u00c0\u00c8\u00c9\u00cc\u00ce\u00cd\u00d2\u00d3\u00d9\u00da]");
    private static final Pattern PT_DIACRITICS = Pattern.compile("[\u00e3\u00e2\u00e1\u00e0\u00e7\u00ea\u00e9\u00e8\u00ed\u00ee\u00ec\u00f5\u00f4\u00f3\u00f2\u00fa\u00fb\u00f9\u00c3\u00c2\u00c1\u00c0\u00c7\u00ca\u00c9\u00c8\u00cd\u00ce\u00cc\u00d5\u00d4\u00d3\u00d2\u00da\u00db\u00d9]");
    private static final Pattern CYRILLIC_PATTERN = Pattern.compile("[\u0430-\u044f\u0451]+", 2);
    private static final Pattern ARABIC_PATTERN = Pattern.compile("[\\u0600-\\u06FF\\u0750-\\u077F]+");
    private static final Pattern CHINESE_PATTERN = Pattern.compile("[\\u4e00-\\u9fff]+");
    private static final Pattern JAPANESE_HIRAGANA = Pattern.compile("[\\u3040-\\u309f]+");
    private static final Pattern JAPANESE_KATAKANA = Pattern.compile("[\\u30a0-\\u30ff]+");
    private static final Pattern KOREAN_PATTERN = Pattern.compile("[\\uac00-\\ud7af]+");
    private static final Pattern EN_COMMON_ENDINGS = Pattern.compile("(ing|tion|ness|ment|able|ible|ful|less)$");
    private static final Pattern ES_COMMON_ENDINGS = Pattern.compile("(ci\u00f3n|si\u00f3n|mente|able|ible|ado|ida|ero|era)$");
    private static final Pattern FR_COMMON_ENDINGS = Pattern.compile("(tion|sion|ment|able|ible|eur|euse|ique|age)$");
    private static final Pattern DE_COMMON_ENDINGS = Pattern.compile("(ung|keit|heit|schaft|lich|isch|bar|los)$");
    private static final Pattern IT_COMMON_ENDINGS = Pattern.compile("(zione|sione|mente|bile|abile|ario|iero|oso)$");
    private static final Pattern PT_COMMON_ENDINGS = Pattern.compile("(\u00e7\u00e3o|s\u00e3o|mente|\u00e1vel|\u00edvel|oso|osa|eiro|eira)$");

    public HeuristicDetector(ConfigManager cfg) {
        this.cfg = cfg;
    }

    @Override
    public Optional<LanguageConfidence> detectLanguage(String text) {
        double confidence;
        if (text == null || text.isBlank()) {
            return Optional.empty();
        }
        String lower = text.toLowerCase(Locale.ROOT);
        String[] tokens = lower.split("\\W+");
        HashMap<String, Integer> scores = new HashMap<String, Integer>();
        for (Language lang : this.cfg.getLanguages()) {
            scores.put(lang.getCode(), 0);
        }
        if (CYRILLIC_PATTERN.matcher(text).find()) {
            scores.merge("ru", 10, Integer::sum);
        }
        if (ARABIC_PATTERN.matcher(text).find()) {
            scores.merge("ar", 10, Integer::sum);
        }
        if (CHINESE_PATTERN.matcher(text).find()) {
            scores.merge("zh", 10, Integer::sum);
        }
        if (JAPANESE_HIRAGANA.matcher(text).find() || JAPANESE_KATAKANA.matcher(text).find()) {
            scores.merge("ja", 10, Integer::sum);
        }
        if (KOREAN_PATTERN.matcher(text).find()) {
            scores.merge("ko", 10, Integer::sum);
        }
        if (ES_DIACRITICS.matcher(text).find()) {
            scores.merge("es", 3, Integer::sum);
        }
        if (FR_DIACRITICS.matcher(text).find()) {
            scores.merge("fr", 3, Integer::sum);
        }
        if (DE_DIACRITICS.matcher(text).find()) {
            scores.merge("de", 3, Integer::sum);
        }
        if (IT_DIACRITICS.matcher(text).find()) {
            scores.merge("it", 3, Integer::sum);
        }
        if (PT_DIACRITICS.matcher(text).find()) {
            scores.merge("pt", 3, Integer::sum);
        }
        for (String token : tokens) {
            if (token.length() < 2) continue;
            if (ES_COMMON.contains(token)) {
                scores.merge("es", 2, Integer::sum);
            }
            if (FR_COMMON.contains(token)) {
                scores.merge("fr", 2, Integer::sum);
            }
            if (DE_COMMON.contains(token)) {
                scores.merge("de", 2, Integer::sum);
            }
            if (IT_COMMON.contains(token)) {
                scores.merge("it", 2, Integer::sum);
            }
            if (PT_COMMON.contains(token)) {
                scores.merge("pt", 2, Integer::sum);
            }
            if (!RU_COMMON.contains(token)) continue;
            scores.merge("ru", 2, Integer::sum);
        }
        for (String token : tokens) {
            if (token.length() < 4) continue;
            if (EN_COMMON_ENDINGS.matcher(token).find()) {
                scores.merge("en", 1, Integer::sum);
            }
            if (ES_COMMON_ENDINGS.matcher(token).find()) {
                scores.merge("es", 1, Integer::sum);
            }
            if (FR_COMMON_ENDINGS.matcher(token).find()) {
                scores.merge("fr", 1, Integer::sum);
            }
            if (DE_COMMON_ENDINGS.matcher(token).find()) {
                scores.merge("de", 1, Integer::sum);
            }
            if (IT_COMMON_ENDINGS.matcher(token).find()) {
                scores.merge("it", 1, Integer::sum);
            }
            if (!PT_COMMON_ENDINGS.matcher(token).find()) continue;
            scores.merge("pt", 1, Integer::sum);
        }
        Map<String, Double> charFreqs = this.calculateCharacterFrequencies(lower);
        double enCharScore = charFreqs.getOrDefault("e", 0.0) * 2.0 + charFreqs.getOrDefault("t", 0.0) + charFreqs.getOrDefault("a", 0.0) + charFreqs.getOrDefault("o", 0.0);
        scores.merge("en", (int)(enCharScore * 10.0), Integer::sum);
        double esCharScore = charFreqs.getOrDefault("a", 0.0) * 2.0 + charFreqs.getOrDefault("e", 0.0) + charFreqs.getOrDefault("o", 0.0);
        scores.merge("es", (int)(esCharScore * 10.0), Integer::sum);
        double frCharScore = charFreqs.getOrDefault("e", 0.0) * 1.5 + charFreqs.getOrDefault("a", 0.0) + charFreqs.getOrDefault("i", 0.0);
        scores.merge("fr", (int)(frCharScore * 10.0), Integer::sum);
        double deCharScore = charFreqs.getOrDefault("e", 0.0) * 1.5 + charFreqs.getOrDefault("n", 0.0) + charFreqs.getOrDefault("i", 0.0);
        scores.merge("de", (int)(deCharScore * 10.0), Integer::sum);
        ArrayList<LanguageScore> candidates = new ArrayList<LanguageScore>();
        for (Language lang : this.cfg.getLanguages()) {
            int score = scores.getOrDefault(lang.getCode(), 0);
            if ("en".equals(lang.getCode()) && this.containsLatinScript(text) && score == 0) {
                score = 1;
            }
            candidates.add(new LanguageScore(lang, score));
        }
        candidates.sort(Comparator.comparingInt(ls -> ls.score).reversed());
        if (candidates.isEmpty()) {
            return Optional.empty();
        }
        LanguageScore best = (LanguageScore)candidates.get(0);
        if (best.score <= 0) {
            return Optional.empty();
        }
        int totalScore = candidates.stream().mapToInt(ls -> Math.max(0, ls.score)).sum();
        if (candidates.size() > 1) {
            int secondBest = ((LanguageScore)candidates.get((int)1)).score;
            double separation = Math.max(0, best.score - secondBest);
            double baseConfidence = totalScore > 0 ? (double)best.score / (double)totalScore : 0.0;
            confidence = Math.min(1.0, baseConfidence + separation / (double)Math.max(1, best.score) * 0.3);
        } else {
            double d = confidence = totalScore > 0 ? Math.min(1.0, (double)best.score / (double)(totalScore + 1)) : 0.0;
        }
        if (confidence < 0.1) {
            return Optional.empty();
        }
        return Optional.of(new LanguageConfidence(best.language, confidence));
    }

    private boolean containsLatinScript(String text) {
        return text.chars().anyMatch(c -> c >= 65 && c <= 90 || c >= 97 && c <= 122);
    }

    private Map<String, Double> calculateCharacterFrequencies(String text) {
        HashMap<String, Integer> counts = new HashMap<String, Integer>();
        int totalChars = 0;
        for (char c : text.toCharArray()) {
            if (!Character.isLetter(c)) continue;
            counts.merge(String.valueOf(c), 1, Integer::sum);
            ++totalChars;
        }
        HashMap<String, Double> frequencies = new HashMap<String, Double>();
        for (Map.Entry entry : counts.entrySet()) {
            frequencies.put((String)entry.getKey(), (double)((Integer)entry.getValue()).intValue() / (double)totalChars);
        }
        return frequencies;
    }

    private static class LanguageScore {
        final Language language;
        final int score;

        LanguageScore(Language language, int score) {
            this.language = language;
            this.score = score;
        }
    }
}

