package opennlp.tools.postag;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:META-INF/jars/opennlp-tools-2.5.4.jar:opennlp/tools/postag/POSTagFormatMapper.class */
public class POSTagFormatMapper {
    private static final Logger logger = LoggerFactory.getLogger(POSTagFormatMapper.class);
    private static final Map<String, String> CONVERSION_TABLE_PENN_TO_UD = new HashMap();
    private static final Map<String, String> CONVERSION_TABLE_UD_TO_PENN = new HashMap();
    private final POSTagFormat modelFormat;

    /* loaded from: input_file:META-INF/jars/opennlp-tools-2.5.4.jar:opennlp/tools/postag/POSTagFormatMapper$NoOp.class */
    public static class NoOp extends POSTagFormatMapper {
        /* JADX INFO: Access modifiers changed from: protected */
        public NoOp() {
            super(new String[0]);
        }

        @Override // opennlp.tools.postag.POSTagFormatMapper
        public String[] convertTags(List<String> list) {
            Objects.requireNonNull(list, "tags must not be NULL.");
            return (String[]) list.toArray(new String[0]);
        }

        @Override // opennlp.tools.postag.POSTagFormatMapper
        public String convertTag(String str) {
            return str;
        }

        @Override // opennlp.tools.postag.POSTagFormatMapper
        public POSTagFormat getGuessedFormat() {
            return POSTagFormat.CUSTOM;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public POSTagFormatMapper(String[] strArr) {
        this.modelFormat = guessModelTagFormat(strArr);
    }

    public String[] convertTags(List<String> list) {
        Objects.requireNonNull(list, "Supplied tags must not be NULL.");
        return (String[]) list.stream().map(this::convertTag).toArray(i -> {
            return new String[i];
        });
    }

    public String convertTag(String str) {
        switch (this.modelFormat) {
            case UD:
                return CONVERSION_TABLE_UD_TO_PENN.getOrDefault(str, "?");
            case PENN:
                if ("NOUN".equals(str)) {
                    logger.warn("Ambiguity detected: NN can be 'NN' or 'NNS' depending on the number. Returning 'NN'.");
                }
                if ("PART".equals(str)) {
                    logger.warn("Ambiguity detected: PART can be 'RP' or 'TO'. Returning 'RP'.");
                }
                if ("PROPN".equals(str)) {
                    logger.warn("Ambiguity detected: Can be 'NNP' or 'NNPS. Returning 'NNP'");
                }
                if ("PUNCT".equals(str)) {
                    logger.warn("Ambiguity detected: PUNCT needs specific punctuation mapping. Returning '.'");
                }
                if ("VERB".equals(str)) {
                    logger.warn("Ambiguity detected: VERB can be 'VB', 'VBD', 'VBG', 'VBN', 'VBP', 'VBZ'. Returning 'VERB'.");
                }
                return CONVERSION_TABLE_PENN_TO_UD.getOrDefault(str, "?");
            default:
                return str;
        }
    }

    public POSTagFormat getGuessedFormat() {
        return this.modelFormat;
    }

    private POSTagFormat guessModelTagFormat(String[] strArr) {
        Objects.requireNonNull(strArr, "Outcomes must not be NULL.");
        int i = 0;
        int i2 = 0;
        for (String str : strArr) {
            if (CONVERSION_TABLE_UD_TO_PENN.containsKey(str)) {
                i++;
            }
            if (CONVERSION_TABLE_PENN_TO_UD.containsKey(str)) {
                i2++;
            }
        }
        if (i > i2) {
            return POSTagFormat.UD;
        }
        if (i2 > i) {
            return POSTagFormat.PENN;
        }
        logger.warn("Detected an unknown POS format.");
        return POSTagFormat.UNKNOWN;
    }

    public static POSTagFormat guessFormat(POSModel pOSModel) {
        Objects.requireNonNull(pOSModel, "POSModel must not be NULL.");
        Objects.requireNonNull(pOSModel.getPosSequenceModel(), "POSSequenceModel must not be NULL.");
        return new POSTagFormatMapper(pOSModel.getPosSequenceModel().getOutcomes()).getGuessedFormat();
    }

    static {
        CONVERSION_TABLE_PENN_TO_UD.put("#", "SYM");
        CONVERSION_TABLE_PENN_TO_UD.put("$", "SYM");
        CONVERSION_TABLE_PENN_TO_UD.put("''", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put(",", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put("-LRB-", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put("-RRB-", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put(".", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put(":", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put("AFX", "ADJ");
        CONVERSION_TABLE_PENN_TO_UD.put("CC", "CCONJ");
        CONVERSION_TABLE_PENN_TO_UD.put("CD", "NUM");
        CONVERSION_TABLE_PENN_TO_UD.put("DT", "DET");
        CONVERSION_TABLE_PENN_TO_UD.put("EX", "PRON");
        CONVERSION_TABLE_PENN_TO_UD.put("FW", "X");
        CONVERSION_TABLE_PENN_TO_UD.put("HYPH", "PUNCT");
        CONVERSION_TABLE_PENN_TO_UD.put("IN", "ADP");
        CONVERSION_TABLE_PENN_TO_UD.put("JJ", "ADJ");
        CONVERSION_TABLE_PENN_TO_UD.put("JJR", "ADJ");
        CONVERSION_TABLE_PENN_TO_UD.put("JJS", "ADJ");
        CONVERSION_TABLE_PENN_TO_UD.put("LS", "X");
        CONVERSION_TABLE_PENN_TO_UD.put("MD", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("NIL", "X");
        CONVERSION_TABLE_PENN_TO_UD.put("NN", "NOUN");
        CONVERSION_TABLE_PENN_TO_UD.put("NNP", "PROPN");
        CONVERSION_TABLE_PENN_TO_UD.put("NNPS", "PROPN");
        CONVERSION_TABLE_PENN_TO_UD.put("NNS", "NOUN");
        CONVERSION_TABLE_PENN_TO_UD.put("PDT", "DET");
        CONVERSION_TABLE_PENN_TO_UD.put("POS", "PART");
        CONVERSION_TABLE_PENN_TO_UD.put("PRP", "PRON");
        CONVERSION_TABLE_PENN_TO_UD.put("PRP$", "DET");
        CONVERSION_TABLE_PENN_TO_UD.put("RB", "ADV");
        CONVERSION_TABLE_PENN_TO_UD.put("RBR", "ADV");
        CONVERSION_TABLE_PENN_TO_UD.put("RBS", "ADV");
        CONVERSION_TABLE_PENN_TO_UD.put("RP", "ADP");
        CONVERSION_TABLE_PENN_TO_UD.put("SYM", "SYM");
        CONVERSION_TABLE_PENN_TO_UD.put("TO", "PART");
        CONVERSION_TABLE_PENN_TO_UD.put("UH", "INTJ");
        CONVERSION_TABLE_PENN_TO_UD.put("VB", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("VBD", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("VBG", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("VBN", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("VBP", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("VBZ", "VERB");
        CONVERSION_TABLE_PENN_TO_UD.put("WDT", "DET");
        CONVERSION_TABLE_PENN_TO_UD.put("WP", "PRON");
        CONVERSION_TABLE_PENN_TO_UD.put("WP$", "DET");
        CONVERSION_TABLE_PENN_TO_UD.put("WRB", "ADV");
        CONVERSION_TABLE_UD_TO_PENN.put("ADJ", "JJ");
        CONVERSION_TABLE_UD_TO_PENN.put("ADP", "IN");
        CONVERSION_TABLE_UD_TO_PENN.put("ADV", "RB");
        CONVERSION_TABLE_UD_TO_PENN.put("AUX", "MD");
        CONVERSION_TABLE_UD_TO_PENN.put("CCONJ", "CC");
        CONVERSION_TABLE_UD_TO_PENN.put("DET", "DT");
        CONVERSION_TABLE_UD_TO_PENN.put("INTJ", "UH");
        CONVERSION_TABLE_UD_TO_PENN.put("NOUN", "NN");
        CONVERSION_TABLE_UD_TO_PENN.put("NUM", "CD");
        CONVERSION_TABLE_UD_TO_PENN.put("PART", "RP");
        CONVERSION_TABLE_UD_TO_PENN.put("PRON", "PRP");
        CONVERSION_TABLE_UD_TO_PENN.put("PROPN", "NNP");
        CONVERSION_TABLE_UD_TO_PENN.put("PUNCT", ".");
        CONVERSION_TABLE_UD_TO_PENN.put("SCONJ", "IN");
        CONVERSION_TABLE_UD_TO_PENN.put("SYM", "SYM");
        CONVERSION_TABLE_UD_TO_PENN.put("VERB", "VB");
        CONVERSION_TABLE_UD_TO_PENN.put("X", "FW");
    }
}
