package io.github.beardedManZhao.algorithmStar.algorithm.featureExtraction;

import io.github.beardedManZhao.algorithmStar.algorithm.OperationAlgorithm;
import io.github.beardedManZhao.algorithmStar.algorithm.OperationAlgorithmManager;
import io.github.beardedManZhao.algorithmStar.exception.TargetNotRealizedException;
import io.github.beardedManZhao.algorithmStar.operands.matrix.ColumnIntegerMatrix;
import io.github.beardedManZhao.algorithmStar.utils.ASClass;
import io.github.beardedManZhao.algorithmStar.utils.ASStr;
import java.util.HashMap;
import java.util.HashSet;

/* loaded from: input_file:META-INF/jars/algorithmStar-1.44.jar:io/github/beardedManZhao/algorithmStar/algorithm/featureExtraction/WordFrequency.class */
public class WordFrequency extends StringArrayFeature<ColumnIntegerMatrix> {
    public static final HashSet<String> stopWordSet = new HashSet<>();
    protected static final char[] SPLIT_CHARS_GENERAL_PUNCTUATION = {' ', '\t', '\n', '\"', ',', '.', ':', ';', '?'};

    public WordFrequency(String str) {
        super(str);
    }

    public static WordFrequency getInstance(String str) {
        if (!OperationAlgorithmManager.containsAlgorithmName(str)) {
            WordFrequency wordFrequency = new WordFrequency(str);
            OperationAlgorithmManager.getInstance().register(wordFrequency);
            return wordFrequency;
        }
        OperationAlgorithm operationAlgorithm = OperationAlgorithmManager.getInstance().get(str);
        if (operationAlgorithm instanceof WordFrequency) {
            return (WordFrequency) ASClass.transform(operationAlgorithm);
        }
        throw new TargetNotRealizedException("您提取的[" + str + "]算法被找到了，但是它不属于WordFrequency类型，请您为这个算法重新定义一个名称。\nThe [" + str + "] algorithm you extracted has been found, but it does not belong to the WordFrequency type. Please redefine a name for this algorithm.");
    }

    @Override // io.github.beardedManZhao.algorithmStar.algorithm.featureExtraction.FeatureExtractionAlgorithm
    public ColumnIntegerMatrix extract(String[] strArr) {
        return extract(strArr, true, true);
    }

    public ColumnIntegerMatrix extract(String[] strArr, boolean z, boolean z2) {
        HashMap<String, int[]> extractHashMap = extractHashMap(strArr);
        return ColumnIntegerMatrix.parse(z2 ? strArr : null, z ? (String[]) extractHashMap.keySet().toArray(new String[0]) : null, (int[][]) extractHashMap.values().toArray((Object[]) new int[0]));
    }

    public HashMap<String, int[]> extractHashMap(String[] strArr) {
        if (strArr == null) {
            return new HashMap<>();
        }
        HashMap<String, int[]> hashMap = new HashMap<>(strArr.length << 1);
        for (int i = 0; i < strArr.length; i++) {
            for (String str : ASStr.splitBySortChars(strArr[i], SPLIT_CHARS_GENERAL_PUNCTUATION, stopWordSet)) {
                int[] iArr = hashMap.get(str);
                if (iArr == null) {
                    int[] iArr2 = new int[strArr.length];
                    iArr2[i] = 1;
                    iArr = iArr2;
                } else {
                    int i2 = i;
                    iArr[i2] = iArr[i2] + 1;
                }
                hashMap.put(str, iArr);
            }
        }
        return hashMap;
    }
}
