package edu.cmu.sphinx.linguist.dictionary;

import edu.cmu.sphinx.alignment.UsEnglish;
import edu.cmu.sphinx.linguist.acoustic.Context;
import edu.cmu.sphinx.linguist.acoustic.Unit;
import edu.cmu.sphinx.linguist.acoustic.UnitManager;
import edu.cmu.sphinx.linguist.g2p.G2PConverter;
import edu.cmu.sphinx.linguist.g2p.Path;
import edu.cmu.sphinx.util.Timer;
import edu.cmu.sphinx.util.TimerPool;
import edu.cmu.sphinx.util.props.ConfigurationManagerUtils;
import edu.cmu.sphinx.util.props.PropertyException;
import edu.cmu.sphinx.util.props.PropertySheet;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeMap;
import java.util.logging.Logger;

/* loaded from: input_file:META-INF/jars/sphinx4-core-5prealpha-SNAPSHOT.jar:edu/cmu/sphinx/linguist/dictionary/TextDictionary.class */
public class TextDictionary implements Dictionary {
    protected Logger logger;
    protected URL wordDictionaryFile;
    protected URL fillerDictionaryFile;
    protected List<URL> addendaUrlList;
    private String wordReplacement;
    protected URL g2pModelFile;
    protected int g2pMaxPron;
    protected UnitManager unitManager;
    protected Map<String, String> dictionary;
    protected Map<String, Word> wordDictionary;
    protected G2PConverter g2pDecoder;
    protected static final String FILLER_TAG = "-F-";
    protected Set<String> fillerWords;
    protected boolean allocated;

    public TextDictionary(String str, String str2, List<URL> list, boolean z, String str3, UnitManager unitManager) throws MalformedURLException, ClassNotFoundException {
        this(ConfigurationManagerUtils.resourceToURL(str), ConfigurationManagerUtils.resourceToURL(str2), list, str3, unitManager);
    }

    public TextDictionary(URL url, URL url2, List<URL> list, String str, UnitManager unitManager) {
        this.g2pMaxPron = 0;
        this.logger = Logger.getLogger(getClass().getName());
        this.wordDictionaryFile = url;
        this.fillerDictionaryFile = url2;
        this.addendaUrlList = list;
        this.wordReplacement = str;
        this.unitManager = unitManager;
    }

    public TextDictionary(URL url, URL url2, List<URL> list, boolean z, String str, UnitManager unitManager, URL url3, int i) {
        this(url, url2, list, str, unitManager);
        this.g2pModelFile = url3;
        this.g2pMaxPron = i;
    }

    public TextDictionary() {
        this.g2pMaxPron = 0;
    }

    @Override // edu.cmu.sphinx.util.props.Configurable
    public void newProperties(PropertySheet propertySheet) throws PropertyException {
        this.logger = propertySheet.getLogger();
        this.wordDictionaryFile = ConfigurationManagerUtils.getResource(Dictionary.PROP_DICTIONARY, propertySheet);
        this.fillerDictionaryFile = ConfigurationManagerUtils.getResource(Dictionary.PROP_FILLER_DICTIONARY, propertySheet);
        this.addendaUrlList = propertySheet.getResourceList(Dictionary.PROP_ADDENDA);
        this.wordReplacement = propertySheet.getString(Dictionary.PROP_WORD_REPLACEMENT);
        this.unitManager = (UnitManager) propertySheet.getComponent("unitManager");
        this.g2pModelFile = ConfigurationManagerUtils.getResource(Dictionary.PROP_G2P_MODEL_PATH, propertySheet);
        this.g2pMaxPron = propertySheet.getInt(Dictionary.PROP_G2P_MAX_PRONUNCIATIONS);
    }

    public URL getWordDictionaryFile() {
        return this.wordDictionaryFile;
    }

    public URL getFillerDictionaryFile() {
        return this.fillerDictionaryFile;
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public void allocate() throws IOException {
        if (this.allocated) {
            return;
        }
        this.dictionary = new HashMap();
        this.wordDictionary = new HashMap();
        Timer timer = TimerPool.getTimer(this, "Load Dictionary");
        this.fillerWords = new HashSet();
        timer.start();
        this.logger.info("Loading dictionary from: " + this.wordDictionaryFile);
        loadDictionary(this.wordDictionaryFile.openStream(), false);
        loadCustomDictionaries(this.addendaUrlList);
        this.logger.info("Loading filler dictionary from: " + this.fillerDictionaryFile);
        loadDictionary(this.fillerDictionaryFile.openStream(), true);
        if (this.g2pModelFile != null && !this.g2pModelFile.getPath().equals(UsEnglish.SINGLE_CHAR_SYMBOLS)) {
            this.g2pDecoder = new G2PConverter(this.g2pModelFile);
        }
        timer.stop();
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public void deallocate() {
        if (this.allocated) {
            this.dictionary = null;
            this.g2pDecoder = null;
            this.allocated = false;
        }
    }

    protected void loadDictionary(InputStream inputStream, boolean z) throws IOException {
        String format;
        InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
        BufferedReader bufferedReader = new BufferedReader(inputStreamReader);
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                bufferedReader.close();
                inputStreamReader.close();
                inputStream.close();
                return;
            }
            String trim = readLine.trim();
            if (!trim.isEmpty()) {
                int spaceIndex = getSpaceIndex(trim);
                if (spaceIndex < 0) {
                    throw new Error("Error loading word: " + trim);
                }
                String substring = trim.substring(0, spaceIndex);
                if (this.dictionary.containsKey(substring)) {
                    int i = 2;
                    do {
                        int i2 = i;
                        i++;
                        format = String.format("%s(%d)", substring, Integer.valueOf(i2));
                    } while (this.dictionary.containsKey(format));
                    substring = format;
                }
                if (z) {
                    this.dictionary.put(substring, FILLER_TAG + trim);
                    this.fillerWords.add(substring);
                } else {
                    this.dictionary.put(substring, trim);
                }
            }
        }
    }

    private int getSpaceIndex(String str) {
        for (int i = 0; i < str.length(); i++) {
            if (str.charAt(i) == ' ' || str.charAt(i) == '\t') {
                return i;
            }
        }
        return -1;
    }

    protected Unit getCIUnit(String str, boolean z) {
        return this.unitManager.getUnit(str, z, Context.EMPTY_CONTEXT);
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public Word getSentenceStartWord() {
        return getWord(Dictionary.SENTENCE_START_SPELLING);
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public Word getSentenceEndWord() {
        return getWord(Dictionary.SENTENCE_END_SPELLING);
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public Word getSilenceWord() {
        return getWord(Dictionary.SILENCE_SPELLING);
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public Word getWord(String str) {
        Word word = this.wordDictionary.get(str);
        if (word != null) {
            return word;
        }
        if (this.dictionary.get(str) == null) {
            this.logger.info("The dictionary is missing a phonetic transcription for the word '" + str + "'");
            if (this.wordReplacement != null) {
                word = getWord(this.wordReplacement);
            } else if (this.g2pModelFile != null && !this.g2pModelFile.getPath().equals(UsEnglish.SINGLE_CHAR_SYMBOLS)) {
                this.logger.info("Generating phonetic transcription(s) for the word '" + str + "' using g2p model");
                word = extractPronunciation(str);
                this.wordDictionary.put(str, word);
            }
        } else {
            word = processEntry(str);
        }
        return word;
    }

    private Word extractPronunciation(String str) {
        ArrayList<Path> phoneticize = this.g2pDecoder.phoneticize(str, this.g2pMaxPron);
        LinkedList linkedList = new LinkedList();
        Iterator<Path> it = phoneticize.iterator();
        while (it.hasNext()) {
            Path next = it.next();
            ArrayList arrayList = new ArrayList(next.getPath().size());
            Iterator<String> it2 = next.getPath().iterator();
            while (it2.hasNext()) {
                arrayList.add(getCIUnit(it2.next(), false));
            }
            if (arrayList.size() == 0) {
                arrayList.add(UnitManager.SILENCE);
            }
            linkedList.add(new Pronunciation(arrayList));
        }
        Pronunciation[] pronunciationArr = (Pronunciation[]) linkedList.toArray(new Pronunciation[linkedList.size()]);
        Word createWord = createWord(str, pronunciationArr, false);
        for (Pronunciation pronunciation : pronunciationArr) {
            pronunciation.setWord(createWord);
        }
        return createWord;
    }

    private Word createWord(String str, Pronunciation[] pronunciationArr, boolean z) {
        Word word = new Word(str, pronunciationArr, z);
        this.dictionary.put(str, word.toString());
        return word;
    }

    private Word processEntry(String str) {
        String str2;
        LinkedList linkedList = new LinkedList();
        int i = 0;
        boolean z = false;
        do {
            i++;
            String str3 = str;
            if (i > 1) {
                str3 = str3 + '(' + i + ')';
            }
            str2 = this.dictionary.get(str3);
            if (str2 != null) {
                StringTokenizer stringTokenizer = new StringTokenizer(str2);
                z = stringTokenizer.nextToken().startsWith(FILLER_TAG);
                int countTokens = stringTokenizer.countTokens();
                ArrayList arrayList = new ArrayList(countTokens);
                for (int i2 = 0; i2 < countTokens; i2++) {
                    arrayList.add(getCIUnit(stringTokenizer.nextToken(), z));
                }
                linkedList.add(new Pronunciation(arrayList));
            }
        } while (str2 != null);
        Pronunciation[] pronunciationArr = (Pronunciation[]) linkedList.toArray(new Pronunciation[linkedList.size()]);
        Word createWord = createWord(str, pronunciationArr, z);
        for (Pronunciation pronunciation : pronunciationArr) {
            pronunciation.setWord(createWord);
        }
        this.wordDictionary.put(str, createWord);
        return createWord;
    }

    public String toString() {
        TreeMap treeMap = new TreeMap(this.dictionary);
        StringBuilder sb = new StringBuilder();
        for (Map.Entry entry : treeMap.entrySet()) {
            sb.append((String) entry.getKey());
            sb.append("   ").append((String) entry.getValue()).append('\n');
        }
        return sb.toString();
    }

    @Override // edu.cmu.sphinx.linguist.dictionary.Dictionary
    public Word[] getFillerWords() {
        Word[] wordArr = new Word[this.fillerWords.size()];
        int i = 0;
        Iterator<String> it = this.fillerWords.iterator();
        while (it.hasNext()) {
            int i2 = i;
            i++;
            wordArr[i2] = getWord(it.next());
        }
        return wordArr;
    }

    public void dump() {
        System.out.print(toString());
    }

    private void loadCustomDictionaries(List<URL> list) throws IOException {
        if (list != null) {
            for (URL url : list) {
                this.logger.info("Loading addendum dictionary from: " + url);
                loadDictionary(url.openStream(), false);
            }
        }
    }
}
