/*
 * Decompiled with CFR 0.152.
 */
package org.texboobcat.autolang.detect;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.texboobcat.autolang.config.ConfigManager;
import org.texboobcat.autolang.detect.LanguageConfidence;
import org.texboobcat.autolang.detect.LanguageDetector;
import org.texboobcat.autolang.locale.Language;

public class HeuristicDetector
implements LanguageDetector {
    private final ConfigManager config;
    private static final Set<String> ES_COMMON = Set.of("hola", "gracias", "por", "favor", "buenos", "buenas", "d\u00edas", "s\u00ed", "no", "de", "la", "el", "y", "que", "como", "es", "est\u00e1", "son");
    private static final Set<String> FR_COMMON = Set.of("bonjour", "merci", "oui", "non", "comment", "\u00e7a", "va", "bien", "tr\u00e8s", "le", "la", "les", "de", "un", "une", "est", "sont", "avec", "pour");
    private static final Set<String> DE_COMMON = Set.of("hallo", "danke", "bitte", "ja", "nein", "wie", "was", "der", "die", "das", "ist", "sind", "und", "mit", "f\u00fcr", "von", "zu", "auf", "in");
    private static final Set<String> IT_COMMON = Set.of("ciao", "grazie", "s\u00ec", "no", "come", "cosa", "il", "la", "\u00e8", "sono", "con", "per", "di", "a", "da", "in", "che", "una", "uno");
    private static final Set<String> PT_COMMON = Set.of("ol\u00e1", "obrigado", "sim", "n\u00e3o", "como", "que", "o", "a", "\u00e9", "s\u00e3o", "com", "para", "de", "em", "um", "uma", "do", "da", "por");
    private static final Map<String, Set<String>> LANGUAGE_PATTERNS = new HashMap<String, Set<String>>();

    public HeuristicDetector(ConfigManager config) {
        this.config = config;
    }

    @Override
    public Optional<LanguageConfidence> detectLanguage(String text) {
        if (text == null || text.trim().isEmpty()) {
            return Optional.empty();
        }
        String lower = text.toLowerCase(Locale.ROOT);
        String[] words = lower.split("\\s+");
        if (words.length < 2) {
            return Optional.empty();
        }
        HashMap<String, Integer> scores = new HashMap<String, Integer>();
        for (String word : words) {
            for (Map.Entry<String, Set<String>> entry : LANGUAGE_PATTERNS.entrySet()) {
                String langCode = entry.getKey();
                Set<String> patterns = entry.getValue();
                if (!patterns.contains(word)) continue;
                scores.merge(langCode, 1, Integer::sum);
            }
        }
        if (scores.isEmpty()) {
            return Optional.empty();
        }
        String bestLang = null;
        int bestScore = 0;
        for (Map.Entry entry : scores.entrySet()) {
            if ((Integer)entry.getValue() <= bestScore) continue;
            bestScore = (Integer)entry.getValue();
            bestLang = (String)entry.getKey();
        }
        if (bestLang == null) {
            return Optional.empty();
        }
        double confidence = (double)bestScore / (double)words.length;
        if (confidence < 0.3) {
            return Optional.empty();
        }
        Language language = this.config.getLanguageByCode(bestLang);
        if (language == null) {
            language = new Language(bestLang, bestLang);
        }
        return Optional.of(new LanguageConfidence(language, confidence));
    }

    static {
        LANGUAGE_PATTERNS.put("es", ES_COMMON);
        LANGUAGE_PATTERNS.put("fr", FR_COMMON);
        LANGUAGE_PATTERNS.put("de", DE_COMMON);
        LANGUAGE_PATTERNS.put("it", IT_COMMON);
        LANGUAGE_PATTERNS.put("pt", PT_COMMON);
    }
}

