package edu.cmu.sphinx.linguist.language.ngram.trie;

import edu.cmu.sphinx.instrumentation.ConfigMonitor;
import edu.cmu.sphinx.linguist.WordSequence;
import edu.cmu.sphinx.linguist.dictionary.Dictionary;
import edu.cmu.sphinx.linguist.dictionary.Word;
import edu.cmu.sphinx.linguist.language.ngram.LanguageModel;
import edu.cmu.sphinx.linguist.util.LRUCache;
import edu.cmu.sphinx.util.LogMath;
import edu.cmu.sphinx.util.TimerPool;
import edu.cmu.sphinx.util.props.ConfigurationManagerUtils;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import edu.cmu.sphinx.util.props.S4Boolean;
import edu.cmu.sphinx.util.props.S4Double;
import edu.cmu.sphinx.util.props.S4Integer;
import edu.cmu.sphinx.util.props.S4String;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;

/* loaded from: input_file:META-INF/jars/sphinx4-core-5prealpha-SNAPSHOT.jar:edu/cmu/sphinx/linguist/language/ngram/trie/NgramTrieModel.class */
public class NgramTrieModel implements LanguageModel {

    @S4String(mandatory = false)
    public static final String PROP_QUERY_LOG_FILE = "queryLogFile";

    @S4Integer(defaultValue = 100000)
    public static final String PROP_NGRAM_CACHE_SIZE = "ngramCacheSize";

    @S4Boolean(defaultValue = false)
    public static final String PROP_CLEAR_CACHES_AFTER_UTTERANCE = "clearCachesAfterUtterance";

    @S4Double(defaultValue = 1.0d)
    public static final String PROP_LANGUAGE_WEIGHT = "languageWeight";

    @S4Boolean(defaultValue = false)
    public static final String PROP_APPLY_LANGUAGE_WEIGHT_AND_WIP = "applyLanguageWeightAndWip";

    @S4Double(defaultValue = 1.0d)
    public static final String PROP_WORD_INSERTION_PROBABILITY = "wordInsertionProbability";
    URL location;
    protected Logger logger;
    protected LogMath logMath;
    protected int maxDepth;
    protected int curDepth;
    protected int[] counts;
    protected int ngramCacheSize;
    protected boolean clearCacheAfterUtterance;
    protected Dictionary dictionary;
    protected String format;
    protected boolean applyLanguageWeightAndWip;
    protected float languageWeight;
    protected float unigramWeight;
    protected float logWip;
    protected String ngramLogFile;
    private int ngramMisses;
    private int ngramHits;
    private PrintWriter logFile;
    protected TrieUnigram[] unigrams;
    protected String[] words;
    protected NgramTrieQuant quant;
    protected NgramTrie trie;
    protected Map<Word, Integer> unigramIDMap;
    private LRUCache<WordSequence, Float> ngramProbCache;

    /* loaded from: input_file:META-INF/jars/sphinx4-core-5prealpha-SNAPSHOT.jar:edu/cmu/sphinx/linguist/language/ngram/trie/NgramTrieModel$TrieRange.class */
    public static class TrieRange {
        int begin;
        int end;
        boolean found = true;

        /* JADX INFO: Access modifiers changed from: package-private */
        public TrieRange(int i, int i2) {
            this.begin = i;
            this.end = i2;
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public int getWidth() {
            return this.end - this.begin;
        }

        /* JADX INFO: Access modifiers changed from: package-private */
        public void setFound(boolean z) {
            this.found = z;
        }

        boolean getFound() {
            return this.found;
        }

        boolean isSearchable() {
            return getWidth() > 0;
        }
    }

    /* loaded from: input_file:META-INF/jars/sphinx4-core-5prealpha-SNAPSHOT.jar:edu/cmu/sphinx/linguist/language/ngram/trie/NgramTrieModel$TrieUnigram.class */
    public static class TrieUnigram {
        public float prob;
        public float backoff;
        public int next;
    }

    public NgramTrieModel(String str, URL url, String str2, int i, boolean z, int i2, Dictionary dictionary, boolean z2, float f, double d, float f2) {
        this.logger = Logger.getLogger(getClass().getName());
        this.format = str;
        this.location = url;
        this.ngramLogFile = str2;
        this.ngramCacheSize = i;
        this.clearCacheAfterUtterance = z;
        this.maxDepth = i2;
        this.logMath = LogMath.getLogMath();
        this.dictionary = dictionary;
        this.applyLanguageWeightAndWip = z2;
        this.languageWeight = f;
        this.logWip = this.logMath.linearToLog(d);
        this.unigramWeight = f2;
    }

    public NgramTrieModel() {
    }

    @Override // edu.cmu.sphinx.util.props.Configurable
    public void newProperties(PropertySheet propertySheet) throws PropertyException {
        this.logger = propertySheet.getLogger();
        this.logMath = LogMath.getLogMath();
        this.location = ConfigurationManagerUtils.getResource("location", propertySheet);
        this.ngramLogFile = propertySheet.getString("queryLogFile");
        this.maxDepth = propertySheet.getInt(LanguageModel.PROP_MAX_DEPTH);
        this.ngramCacheSize = propertySheet.getInt("ngramCacheSize");
        this.clearCacheAfterUtterance = propertySheet.getBoolean("clearCachesAfterUtterance").booleanValue();
        this.dictionary = (Dictionary) propertySheet.getComponent("dictionary");
        this.applyLanguageWeightAndWip = propertySheet.getBoolean("applyLanguageWeightAndWip").booleanValue();
        this.languageWeight = propertySheet.getFloat("languageWeight");
        this.logWip = this.logMath.linearToLog(propertySheet.getDouble("wordInsertionProbability"));
        this.unigramWeight = propertySheet.getFloat(LanguageModel.PROP_UNIGRAM_WEIGHT);
    }

    private void buildUnigramIDMap() {
        int i = 0;
        if (this.unigramIDMap == null) {
            this.unigramIDMap = new HashMap();
        }
        for (int i2 = 0; i2 < this.words.length; i2++) {
            Word word = this.dictionary.getWord(this.words[i2]);
            if (word == null) {
                this.logger.warning("The dictionary is missing a phonetic transcription for the word '" + this.words[i2] + "'");
                i++;
            }
            this.unigramIDMap.put(word, Integer.valueOf(i2));
            if (this.logger.isLoggable(Level.FINE)) {
                this.logger.fine("Word: " + word);
            }
        }
        if (i > 0) {
            this.logger.warning("Dictionary is missing " + i + " words that are contained in the language model.");
        }
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public void allocate() throws IOException {
        BinaryLoader binaryLoader;
        TimerPool.getTimer(this, "Load LM").start();
        this.logger.info("Loading n-gram language model from: " + this.location);
        if (this.ngramLogFile != null) {
            this.logFile = new PrintWriter(new FileOutputStream(this.ngramLogFile));
        }
        if (this.location.getProtocol() == null || this.location.getProtocol().equals(ConfigMonitor.PROP_OUTFILE)) {
            try {
                binaryLoader = new BinaryLoader(new File(this.location.toURI()));
            } catch (Exception e) {
                binaryLoader = new BinaryLoader(new File(this.location.getPath()));
            }
        } else {
            binaryLoader = new BinaryLoader(this.location);
        }
        binaryLoader.verifyHeader();
        this.counts = binaryLoader.readCounts();
        if (this.maxDepth <= 0 || this.maxDepth > this.counts.length) {
            this.maxDepth = this.counts.length;
        }
        if (this.maxDepth > 1) {
            this.quant = binaryLoader.readQuant(this.maxDepth);
        }
        this.unigrams = binaryLoader.readUnigrams(this.counts[0]);
        if (this.maxDepth > 1) {
            this.trie = new NgramTrie(this.counts, this.quant.getProbBoSize(), this.quant.getProbSize());
            binaryLoader.readTrieByteArr(this.trie.getMem());
        }
        this.words = binaryLoader.readWords(this.counts[0]);
        buildUnigramIDMap();
        this.ngramProbCache = new LRUCache<>(this.ngramCacheSize);
        binaryLoader.close();
        TimerPool.getTimer(this, "Load LM").stop();
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public void deallocate() throws IOException {
        if (this.logFile != null) {
            this.logFile.flush();
        }
    }

    private float getAvailableProb(WordSequence wordSequence, TrieRange trieRange, float f) {
        if (!trieRange.isSearchable()) {
            return f;
        }
        for (int size = wordSequence.size() - 2; size >= 0; size--) {
            int size2 = (wordSequence.size() - 2) - size;
            if (size2 + 1 == this.maxDepth) {
                break;
            }
            float readNgramProb = this.trie.readNgramProb(this.unigramIDMap.get(wordSequence.getWord(size)).intValue(), size2, trieRange, this.quant);
            if (!trieRange.getFound()) {
                break;
            }
            f = readNgramProb;
            this.curDepth++;
            if (!trieRange.isSearchable()) {
                break;
            }
        }
        return f;
    }

    private float getAvailableBackoff(WordSequence wordSequence) {
        float f = 0.0f;
        int size = wordSequence.size();
        int intValue = this.unigramIDMap.get(wordSequence.getWord(size - 2)).intValue();
        TrieRange trieRange = new TrieRange(this.unigrams[intValue].next, this.unigrams[intValue + 1].next);
        if (this.curDepth == 1) {
            f = 0.0f + this.unigrams[intValue].backoff;
        }
        int i = size - 3;
        int i2 = 0;
        while (i >= 0) {
            float readNgramBackoff = this.trie.readNgramBackoff(this.unigramIDMap.get(wordSequence.getWord(i)).intValue(), i2, trieRange, this.quant);
            if (!trieRange.getFound()) {
                break;
            }
            f += readNgramBackoff;
            if (!trieRange.isSearchable()) {
                break;
            }
            i--;
            i2++;
        }
        return f;
    }

    private float getProbabilityRaw(WordSequence wordSequence) {
        int size = wordSequence.size();
        int intValue = this.unigramIDMap.get(wordSequence.getWord(size - 1)).intValue();
        TrieRange trieRange = new TrieRange(this.unigrams[intValue].next, this.unigrams[intValue + 1].next);
        float f = this.unigrams[intValue].prob;
        this.curDepth = 1;
        if (size == 1) {
            return f;
        }
        float availableProb = getAvailableProb(wordSequence, trieRange, f);
        if (this.curDepth < size) {
            availableProb += getAvailableBackoff(wordSequence);
        }
        return availableProb;
    }

    private float applyWeights(float f) {
        return this.applyLanguageWeightAndWip ? (f * this.languageWeight) + this.logWip : f;
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public float getProbability(WordSequence wordSequence) {
        int size = wordSequence.size();
        if (size > this.maxDepth) {
            throw new Error("Unsupported NGram: " + wordSequence.size());
        }
        if (size == this.maxDepth) {
            Float f = this.ngramProbCache.get(wordSequence);
            if (f != null) {
                this.ngramHits++;
                return f.floatValue();
            }
            this.ngramMisses++;
        }
        float applyWeights = applyWeights(getProbabilityRaw(wordSequence));
        if (size == this.maxDepth) {
            this.ngramProbCache.put(wordSequence, Float.valueOf(applyWeights));
        }
        if (this.logFile != null) {
            this.logFile.println(wordSequence.toString().replace("][", " ") + " : " + Float.toString(applyWeights));
        }
        return applyWeights;
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public float getSmear(WordSequence wordSequence) {
        return 0.0f;
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public Set<String> getVocabulary() {
        return Collections.unmodifiableSet(new HashSet(Arrays.asList(this.words)));
    }

    public int getNGramMisses() {
        return this.ngramMisses;
    }

    public int getNGramHits() {
        return this.ngramHits;
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public int getMaxDepth() {
        return this.maxDepth;
    }

    private void clearCache() {
        this.logger.info("LM Cache Size: " + this.ngramProbCache.size() + " Hits: " + this.ngramHits + " Misses: " + this.ngramMisses);
        if (this.clearCacheAfterUtterance) {
            this.ngramProbCache = new LRUCache<>(this.ngramCacheSize);
        }
    }

    @Override // edu.cmu.sphinx.linguist.language.ngram.LanguageModel
    public void onUtteranceEnd() {
        clearCache();
        if (this.logFile != null) {
            this.logFile.println("<END_UTT>");
            this.logFile.flush();
        }
    }
}
