package org.languagetool.tagging.nl;

import com.google.common.collect.ImmutableSet;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.language.Dutch;
import org.languagetool.rules.nl.CompoundAcceptor;
import org.languagetool.tagging.BaseTagger;
import org.languagetool.tools.StringTools;

/* loaded from: input_file:META-INF/jars/language-nl-6.4.jar:org/languagetool/tagging/nl/DutchTagger.class */
public class DutchTagger extends BaseTagger {
    public static final DutchTagger INSTANCE = new DutchTagger();
    private static final Pattern PATTERN1_A = Pattern.compile("([^aeiouáéíóú])(á)([^aeiouáéíóú])");
    private static final Pattern PATTERN1_E = Pattern.compile("([^aeiouáéíóú])(é)([^aeiouáéíóú])");
    private static final Pattern PATTERN1_I = Pattern.compile("([^aeiouáéíóú])(í)([^aeiouáéíóú])");
    private static final Pattern PATTERN1_O = Pattern.compile("([^aeiouáéíóú])(ó)([^aeiouáéíóú])");
    private static final Pattern PATTERN1_U = Pattern.compile("([^aeiouáéíóú])(ú)([^aeiouáéíóú])");
    private static final Pattern CHAR_PATTERN_AA = Pattern.compile("áá");
    private static final Pattern CHAR_PATTERN_AE = Pattern.compile("áé");
    private static final Pattern CHAR_PATTERN_AI = Pattern.compile("áí");
    private static final Pattern CHAR_PATTERN_AU = Pattern.compile("áú");
    private static final Pattern CHAR_PATTERN_EE = Pattern.compile("éé");
    private static final Pattern CHAR_PATTERN_EI = Pattern.compile("éí");
    private static final Pattern CHAR_PATTERN_EU = Pattern.compile("éú");
    private static final Pattern CHAR_PATTERN_IE = Pattern.compile("íé");
    private static final Pattern CHAR_PATTERN_OE = Pattern.compile("óé");
    private static final Pattern CHAR_PATTERN_OI = Pattern.compile("óí");
    private static final Pattern CHAR_PATTERN_OO = Pattern.compile("óó");
    private static final Pattern CHAR_PATTERN_OU = Pattern.compile("óú");
    private static final Pattern CHAR_PATTERN_UI = Pattern.compile("úí");
    private static final Pattern CHAR_PATTERN_UU = Pattern.compile("úú");
    private static final Pattern CHAR_PATTERN_IJ = Pattern.compile("íj");
    private static final Pattern PATTERN2_A = Pattern.compile("(^|[^aeiou])á([^aeiou]|$)");
    private static final Pattern PATTERN2_E = Pattern.compile("(^|[^aeiou])é([^aeiou]|$)");
    private static final Pattern PATTERN2_I = Pattern.compile("(^|[^aeiou])í([^aeiou]|$)");
    private static final Pattern PATTERN2_O = Pattern.compile("(^|[^aeiou])ó([^aeiou]|$)");
    private static final Pattern PATTERN2_U = Pattern.compile("(^|[^aeiou])ú([^aeiou]|$)");
    private static final Pattern HYPHEN1_PATTERN = Pattern.compile("(^.*)-(.*$)");
    private static final Pattern HYPHEN2_PATTERN = Pattern.compile("([a-z])-([a-z])");
    private static final Set<String> alwaysNeedsHet = ImmutableSet.of("patroon", "punt", "gemaal", "weer", "kussen", "deel", new String[0]);
    private static final Set<String> alwaysNeedsDe = ImmutableSet.of("keten", "boor", "dans");
    private static final Set<String> alwaysNeedsMrv = ImmutableSet.of("pies", "koeken", "heden");

    public DutchTagger() {
        super("/nl/dutch.dict", new Locale("nl"));
    }

    @Override // org.languagetool.tagging.BaseTagger, org.languagetool.tagging.Tagger
    public List<AnalyzedTokenReadings> tag(List<String> list) {
        ArrayList arrayList = new ArrayList();
        int i = 0;
        CompoundAcceptor compoundAcceptor = Dutch.getCompoundAcceptor();
        for (String str : list) {
            boolean z = false;
            String replace = str.replace('`', '\'').replace((char) 8217, '\'').replace((char) 8216, '\'').replace((char) 180, '\'');
            ArrayList arrayList2 = new ArrayList();
            String lowerCase = replace.toLowerCase(this.locale);
            boolean equals = replace.equals(lowerCase);
            boolean isMixedCase = StringTools.isMixedCase(replace);
            boolean isAllUppercase = StringTools.isAllUppercase(replace);
            addTokens(asAnalyzedTokenListForTaggedWords(str, getWordTagger().tag(replace)), arrayList2);
            if (!equals && !isMixedCase) {
                addTokens(asAnalyzedTokenListForTaggedWords(str, getWordTagger().tag(lowerCase)), arrayList2);
            }
            if (arrayList2.isEmpty() && isAllUppercase) {
                addTokens(asAnalyzedTokenListForTaggedWords(str, getWordTagger().tag(StringTools.uppercaseFirstChar(lowerCase))), arrayList2);
            }
            if (arrayList2.isEmpty()) {
                String replaceAll = PATTERN2_U.matcher(PATTERN2_O.matcher(PATTERN2_I.matcher(PATTERN2_E.matcher(PATTERN2_A.matcher(CHAR_PATTERN_IJ.matcher(CHAR_PATTERN_UU.matcher(CHAR_PATTERN_UI.matcher(CHAR_PATTERN_OU.matcher(CHAR_PATTERN_OO.matcher(CHAR_PATTERN_OI.matcher(CHAR_PATTERN_OE.matcher(CHAR_PATTERN_IE.matcher(CHAR_PATTERN_EU.matcher(CHAR_PATTERN_EI.matcher(CHAR_PATTERN_EE.matcher(CHAR_PATTERN_AU.matcher(CHAR_PATTERN_AI.matcher(CHAR_PATTERN_AE.matcher(CHAR_PATTERN_AA.matcher(PATTERN1_U.matcher(PATTERN1_O.matcher(PATTERN1_I.matcher(PATTERN1_E.matcher(PATTERN1_A.matcher(replace).replaceAll("$1a$3")).replaceAll("$1e$3")).replaceAll("$1i$3")).replaceAll("$1o$3")).replaceAll("$1u$3")).replaceAll("aa")).replaceAll("ae")).replaceAll("ai")).replaceAll("au")).replaceAll("ee")).replaceAll("ei")).replaceAll("eu")).replaceAll("ie")).replaceAll("oe")).replaceAll("oi")).replaceAll("oo")).replaceAll("ou")).replaceAll("ui")).replaceAll("uu")).replaceAll("ij")).replaceAll("$1a$2")).replaceAll("$1e$2")).replaceAll("$1i$2")).replaceAll("$1o$2")).replaceAll("$1u$2");
                if (replaceAll.contains("-")) {
                    if (!asAnalyzedTokenListForTaggedWords(str, getWordTagger().tag(HYPHEN1_PATTERN.matcher(replaceAll).replaceAll("$2"))).isEmpty()) {
                        replaceAll = HYPHEN2_PATTERN.matcher(replaceAll).replaceAll("$1$2");
                    }
                }
                if (!replaceAll.equals(replace)) {
                    List<AnalyzedToken> asAnalyzedTokenListForTaggedWords = asAnalyzedTokenListForTaggedWords(str, getWordTagger().tag(replaceAll));
                    if (!asAnalyzedTokenListForTaggedWords.isEmpty()) {
                        addTokens(asAnalyzedTokenListForTaggedWords, arrayList2);
                        z = true;
                    }
                }
                if (arrayList2.isEmpty() && replace.length() > 5) {
                    List<String> parts = compoundAcceptor.getParts(replace);
                    if (parts.size() == 2) {
                        String str2 = parts.get(0);
                        String str3 = parts.get(1);
                        AnalyzedTokenReadings analyzedTokenReadings = tag(Collections.singletonList(str3)).get(0);
                        String lowerCase2 = str2.toLowerCase();
                        Iterator<AnalyzedToken> it = analyzedTokenReadings.iterator();
                        while (true) {
                            if (!it.hasNext()) {
                                break;
                            }
                            AnalyzedToken next = it.next();
                            if (next.getPOSTag() != null) {
                                if (str2.endsWith("-") && next.getPOSTag().startsWith("ENM:LOC")) {
                                    arrayList2.add(new AnalyzedToken(replace, next.getPOSTag(), str3));
                                    break;
                                }
                                if (next.getPOSTag().startsWith("ZNW")) {
                                    arrayList2.add(new AnalyzedToken(replace, alwaysNeedsHet.contains(str3) ? "ZNW:EKV:HET" : alwaysNeedsDe.contains(str3) ? "ZNW:EKV:DE_" : alwaysNeedsMrv.contains(str3) ? "ZNW:MRV:DE_" : next.getPOSTag(), lowerCase2 + next.getLemma()));
                                    if (alwaysNeedsHet.contains(str3) || alwaysNeedsDe.contains(str3) || alwaysNeedsMrv.contains(str3)) {
                                        break;
                                    }
                                } else {
                                    continue;
                                }
                            }
                        }
                    }
                }
            }
            if (arrayList2.isEmpty()) {
                arrayList2.add(new AnalyzedToken(str, null, null));
            }
            AnalyzedTokenReadings analyzedTokenReadings2 = new AnalyzedTokenReadings(arrayList2, i);
            if (z) {
                if (!equals) {
                    analyzedTokenReadings2.ignoreSpelling();
                } else if (asAnalyzedTokenListForTaggedWords(StringTools.uppercaseFirstChar(str), getWordTagger().tag(StringTools.uppercaseFirstChar(str))).isEmpty()) {
                    analyzedTokenReadings2.ignoreSpelling();
                } else {
                    arrayList2.clear();
                    arrayList2.add(new AnalyzedToken(str, null, null));
                }
            }
            arrayList.add(analyzedTokenReadings2);
            i += str.length();
        }
        return arrayList;
    }

    public List<AnalyzedToken> getPostags(String str) {
        return asAnalyzedTokenListForTaggedWords(str, getWordTagger().tag(str));
    }

    private void addTokens(List<AnalyzedToken> list, List<AnalyzedToken> list2) {
        if (list != null) {
            list2.addAll(list);
        }
    }
}
