package org.languagetool.rules.nl;

import com.google.common.collect.ImmutableSet;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.Languages;
import org.languagetool.rules.spelling.CachingWordListLoader;
import org.languagetool.tagging.nl.DutchTagger;

/* loaded from: input_file:META-INF/jars/language-nl-6.4.jar:org/languagetool/rules/nl/CompoundAcceptor.class */
public class CompoundAcceptor {
    private static final Pattern acronymPattern;
    private static final Pattern specialAcronymPattern;
    private static final Pattern normalCasePattern;
    private static final int MAX_WORD_SIZE = 35;
    public static final CompoundAcceptor INSTANCE;
    private static final String COMPOUND_NO_S_FILE = "nl/compound_acceptor/no_s.txt";
    private static final String COMPOUND_NEEDS_S_FILE = "nl/compound_acceptor/needs_s.txt";
    private static final String COMPOUND_DIRECTIONS_FILE = "nl/compound_acceptor/directions.txt";
    private static final String COMPOUND_ALWAYS_NEEDS_S_FILE = "nl/compound_acceptor/always_needs_s.txt";
    private static final String COMPOUND_ALWAYS_NEEDS_HYPHEN_FILE = "nl/compound_acceptor/always_needs_hyphen.txt";
    private static final String COMPOUND_PART1_EXCEPTIONS_FILE = "nl/compound_acceptor/part1_exceptions.txt";
    private static final String COMPOUND_PART2_EXCEPTIONS_FILE = "nl/compound_acceptor/part2_exceptions.txt";
    private static final String COMPOUND_ACRONYM_EXCEPTIONS_FILE = "nl/compound_acceptor/acronym_exceptions.txt";
    private static final MorfologikDutchSpellerRule speller;
    static final /* synthetic */ boolean $assertionsDisabled;
    protected final CachingWordListLoader wordListLoader = new CachingWordListLoader();
    protected final Set<String> noS = new ObjectOpenHashSet();
    protected final Set<String> needsS = new ObjectOpenHashSet();
    protected final Set<String> geographicalDirections = new ObjectOpenHashSet();
    protected final Set<String> alwaysNeedsS = new ObjectOpenHashSet();
    protected final Set<String> alwaysNeedsHyphen = new ObjectOpenHashSet();
    protected final Set<String> part1Exceptions = new ObjectOpenHashSet();
    protected final Set<String> part2Exceptions = new ObjectOpenHashSet();
    protected final Set<String> acronymExceptions = new ObjectOpenHashSet();
    private final Set<String> collidingVowels = ImmutableSet.of("aa", "ae", "ai", "au", "ee", "ée", "ei", "éi", "eu", "éu", "ie", "ii", "ij", "oe", "oi", "oo", "ou", "ui", "uu");
    private final DutchTagger dutchTagger = DutchTagger.INSTANCE;

    public CompoundAcceptor() {
        this.noS.addAll(this.wordListLoader.loadWords(COMPOUND_NO_S_FILE));
        this.needsS.addAll(this.wordListLoader.loadWords(COMPOUND_NEEDS_S_FILE));
        this.geographicalDirections.addAll(this.wordListLoader.loadWords(COMPOUND_DIRECTIONS_FILE));
        this.alwaysNeedsS.addAll(this.wordListLoader.loadWords(COMPOUND_ALWAYS_NEEDS_S_FILE));
        this.alwaysNeedsHyphen.addAll(this.wordListLoader.loadWords(COMPOUND_ALWAYS_NEEDS_HYPHEN_FILE));
        this.part1Exceptions.addAll(this.wordListLoader.loadWords(COMPOUND_PART1_EXCEPTIONS_FILE));
        this.part2Exceptions.addAll(this.wordListLoader.loadWords(COMPOUND_PART2_EXCEPTIONS_FILE));
        this.acronymExceptions.addAll(this.wordListLoader.loadWords(COMPOUND_ACRONYM_EXCEPTIONS_FILE));
    }

    /* JADX INFO: Access modifiers changed from: package-private */
    public boolean acceptCompound(String str) {
        if (str.length() > 35) {
            return false;
        }
        for (int i = 3; i < str.length() - 3; i++) {
            String substring = str.substring(0, i);
            String substring2 = str.substring(i);
            if (!substring.equals(substring2) && acceptCompound(substring, substring2)) {
                return true;
            }
        }
        return false;
    }

    public List<String> getParts(String str) {
        if (str.length() > 35) {
            return Collections.emptyList();
        }
        for (int i = 3; i < str.length() - 3; i++) {
            String substring = str.substring(0, i);
            String substring2 = str.substring(i);
            if (!substring.equals(substring2) && acceptCompound(substring, substring2)) {
                return Arrays.asList(substring, substring2);
            }
        }
        return Collections.emptyList();
    }

    boolean acceptCompound(String str, String str2) {
        try {
            String lowerCase = str.toLowerCase();
            if (str.endsWith("s") && !this.part1Exceptions.contains(str.substring(0, str.length() - 1)) && !this.alwaysNeedsS.contains(str) && !this.noS.contains(str) && !str.contains("-")) {
                Iterator<String> it = this.alwaysNeedsS.iterator();
                while (it.hasNext()) {
                    if (lowerCase.endsWith(it.next())) {
                        return isNoun(str2) && isExistingWord(lowerCase.substring(0, lowerCase.length() - 1)) && spellingOk(str2);
                    }
                }
                return this.needsS.contains(lowerCase) && isNoun(str2) && spellingOk(str.substring(0, str.length() - 1)) && spellingOk(str2);
            }
            if (this.geographicalDirections.contains(str)) {
                return isGeographicalCompound(str2);
            }
            if (str.endsWith("-")) {
                return (acronymOk(str) || this.alwaysNeedsHyphen.contains(lowerCase)) && spellingOk(str2);
            }
            if (!str2.startsWith("-")) {
                return (this.noS.contains(lowerCase) || this.part1Exceptions.contains(lowerCase)) && isNoun(str2) && spellingOk(str) && !hasCollidingVowels(str, str2);
            }
            String substring = str2.substring(1);
            return this.noS.contains(lowerCase) && isNoun(substring) && spellingOk(str) && spellingOk(substring) && hasCollidingVowels(str, substring);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private boolean isNoun(String str) throws IOException {
        return this.dutchTagger.getPostags(str).stream().anyMatch(analyzedToken -> {
            if ($assertionsDisabled || analyzedToken.getPOSTag() != null) {
                return analyzedToken.getPOSTag().startsWith("ZNW") && !this.part2Exceptions.contains(str);
            }
            throw new AssertionError();
        });
    }

    private boolean isExistingWord(String str) throws IOException {
        return this.dutchTagger.getPostags(str).stream().anyMatch(analyzedToken -> {
            return analyzedToken.getPOSTag() != null;
        });
    }

    private boolean isGeographicalCompound(String str) throws IOException {
        return this.dutchTagger.getPostags(str).stream().anyMatch(analyzedToken -> {
            if ($assertionsDisabled || analyzedToken.getPOSTag() != null) {
                return analyzedToken.getPOSTag().startsWith("ENM:LOC");
            }
            throw new AssertionError();
        });
    }

    private boolean hasCollidingVowels(String str, String str2) {
        return this.collidingVowels.contains((String.valueOf(str.charAt(str.length() - 1)) + str2.charAt(0)).toLowerCase());
    }

    private boolean acronymOk(String str) {
        if (acronymPattern.matcher(str).matches()) {
            return this.acronymExceptions.stream().noneMatch(str2 -> {
                return str2.toUpperCase().equals(str.substring(0, str.length() - 1));
            });
        }
        if (specialAcronymPattern.matcher(str).matches()) {
            return this.acronymExceptions.contains(str.substring(0, str.length() - 1));
        }
        return false;
    }

    private boolean spellingOk(String str) throws IOException {
        return normalCasePattern.matcher(str).matches() && speller.match(new AnalyzedSentence(new AnalyzedTokenReadings[]{new AnalyzedTokenReadings(new AnalyzedToken(str.toLowerCase(), "FAKE_POS", "fakeLemma"))})).length == 0;
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length != 1) {
            System.out.println("Usage: " + CompoundAcceptor.class.getName() + " <file>");
            System.exit(1);
        }
        CompoundAcceptor compoundAcceptor = new CompoundAcceptor();
        for (String str : Files.readAllLines(Paths.get(strArr[0], new String[0]))) {
            System.out.println(compoundAcceptor.acceptCompound(str) + " " + str);
        }
    }

    static {
        $assertionsDisabled = !CompoundAcceptor.class.desiredAssertionStatus();
        acronymPattern = Pattern.compile("[A-Z]{2,4}-");
        specialAcronymPattern = Pattern.compile("[A-Za-z]{2,4}-");
        normalCasePattern = Pattern.compile("[A-Za-z][a-zé]*");
        INSTANCE = new CompoundAcceptor();
        try {
            speller = new MorfologikDutchSpellerRule(JLanguageTool.getMessageBundle(), Languages.getLanguageForShortCode("nl"), null);
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}
