/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.international.Languages;
import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.french.FrenchMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.parser.lexparser.BaseLexicon;
import edu.stanford.nlp.parser.lexparser.FactoredLexiconEvent;
import edu.stanford.nlp.parser.lexparser.IntTaggedWord;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalIntCounter;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.util.HashIndex;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;

public class FactoredLexicon
extends BaseLexicon {
    private static final long serialVersionUID = -744693222804176489L;
    private static final boolean DEBUG = false;
    private MorphoFeatureSpecification morphoSpec;
    private static final String NO_MORPH_ANALYSIS = "xXxNONExXx";
    private Index<String> morphIndex = new HashIndex<String>();
    private TwoDimensionalIntCounter<Integer, Integer> wordTag = new TwoDimensionalIntCounter(40000);
    private Counter<Integer> wordTagUnseen = new ClassicCounter<Integer>(500);
    private TwoDimensionalIntCounter<Integer, Integer> lemmaTag = new TwoDimensionalIntCounter(40000);
    private Counter<Integer> lemmaTagUnseen = new ClassicCounter<Integer>(500);
    private TwoDimensionalIntCounter<Integer, Integer> morphTag = new TwoDimensionalIntCounter(500);
    private Counter<Integer> morphTagUnseen = new ClassicCounter<Integer>(500);
    private Counter<Integer> tagCounter = new ClassicCounter<Integer>(300);

    public FactoredLexicon(MorphoFeatureSpecification morphoSpec, Index<String> wordIndex, Index<String> tagIndex) {
        super(wordIndex, tagIndex);
        this.morphoSpec = morphoSpec;
    }

    public FactoredLexicon(Options op, MorphoFeatureSpecification morphoSpec, Index<String> wordIndex, Index<String> tagIndex) {
        super(op, wordIndex, tagIndex);
        this.morphoSpec = morphoSpec;
    }

    @Override
    public Iterator<IntTaggedWord> ruleIteratorByWord(int word, int loc, String featureSpec) {
        if (word == this.wordIndex.indexOf(".$.")) {
            return this.rulesWithWord[word].iterator();
        }
        if (this.isKnown(word)) {
            return this.rulesWithWord[word].iterator();
        }
        HashSet<IntTaggedWord> lexRules = new HashSet<IntTaggedWord>(10);
        List uwRules = this.rulesWithWord[this.wordIndex.indexOf("UNK")];
        for (IntTaggedWord iTW : uwRules) {
            lexRules.add(new IntTaggedWord(word, iTW.tag));
        }
        return lexRules.iterator();
    }

    @Override
    public float score(IntTaggedWord iTW, int loc, String word, String featureSpec) {
        int wordId = iTW.word();
        int tagId = iTW.tag();
        int boundaryId = this.wordIndex.indexOf(".$.");
        int boundaryTagId = this.tagIndex.indexOf(".$$.");
        if (wordId == boundaryId && tagId == boundaryTagId) {
            return 0.0f;
        }
        String tag = (String)this.tagIndex.get(iTW.tag());
        Pair<String, String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, featureSpec);
        String lemma = lemmaMorph.first();
        int lemmaId = this.wordIndex.indexOf(lemma);
        String richMorphTag = lemmaMorph.second();
        String reducedMorphTag = this.morphoSpec.strToFeatures(richMorphTag).toString().trim();
        reducedMorphTag = reducedMorphTag.length() == 0 ? NO_MORPH_ANALYSIS : reducedMorphTag;
        int morphId = this.morphIndex.indexOf(reducedMorphTag, true);
        double p_W_Tf = Math.log(this.probWordTag(word, loc, wordId, tagId));
        double p_L_T = 0.0;
        double p_M_T = Math.log(this.probMorphTag(tagId, morphId));
        double p_W_T = p_W_Tf + p_L_T + p_M_T;
        return p_W_T > -100.0 ? (float)p_W_T : Float.NEGATIVE_INFINITY;
    }

    private double probWordTag(String word, int loc, int wordId, int tagId) {
        double cW = this.wordTag.totalCount(wordId);
        double cWT = this.wordTag.getCount(wordId, tagId);
        double p_W = cW / (double)this.wordTag.totalCount();
        double cTseen = this.tagCounter.getCount(tagId);
        double p_T = cTseen / this.tagCounter.totalCount();
        double p_W_T = 0.0;
        if (cW > 0.0) {
            double p_T_W = 0.0;
            if (cW > 100.0 && cWT > 0.0) {
                p_T_W = cWT / cW;
            } else {
                double cTunseen = this.wordTagUnseen.getCount(tagId);
                double p_T_U = cTunseen / this.wordTagUnseen.totalCount();
                p_T_W = (cWT + this.smooth[1] * p_T_U) / (cW + this.smooth[1]);
            }
            p_W_T = p_T_W * p_W / p_T;
        } else {
            IntTaggedWord iTW = new IntTaggedWord(wordId, tagId);
            double c_T = this.tagCounter.getCount(tagId);
            p_W_T = Math.exp(this.getUnknownWordModel().score(iTW, loc, c_T, this.tagCounter.totalCount(), this.smooth[0], word));
        }
        return p_W_T;
    }

    private double probLemmaTag(String word, int loc, int tagId, int lemmaId) {
        double cL = this.lemmaTag.totalCount(lemmaId);
        double cLT = this.lemmaTag.getCount(lemmaId, tagId);
        double p_L = cL / (double)this.lemmaTag.totalCount();
        double cTseen = this.tagCounter.getCount(tagId);
        double p_T = cTseen / this.tagCounter.totalCount();
        double p_L_T = 0.0;
        if (cL > 0.0) {
            double p_T_L = 0.0;
            if (cL > 100.0 && cLT > 0.0) {
                p_T_L = cLT / cL;
            } else {
                double cTunseen = this.lemmaTagUnseen.getCount(tagId);
                double p_T_U = cTunseen / this.lemmaTagUnseen.totalCount();
                p_T_L = (cLT + this.smooth[1] * p_T_U) / (cL + this.smooth[1]);
            }
            p_L_T = p_T_L * p_L / p_T;
        } else {
            double cTunseen = this.lemmaTagUnseen.getCount(tagId);
            p_L_T = cTunseen / this.tagCounter.totalCount();
        }
        return p_L_T;
    }

    private double probMorphTag(int tagId, int morphId) {
        double cM = this.morphTag.totalCount(morphId);
        double cMT = this.morphTag.getCount(morphId, tagId);
        double p_M = cM / (double)this.morphTag.totalCount();
        double cTseen = this.tagCounter.getCount(tagId);
        double p_T = cTseen / this.tagCounter.totalCount();
        double p_M_T = 0.0;
        if (cM > 100.0 && cMT > 0.0) {
            double p_T_M = cMT / cM;
            p_M_T = p_T_M * p_M / p_T;
        } else {
            p_M_T = 1.0 / ((double)(this.morphTag.totalCount() + this.tagIndex.size()) + 1.0);
        }
        return p_M_T;
    }

    @Override
    public void train(Collection<Tree> trees, Collection<Tree> rawTrees) {
        double weight2 = 1.0;
        this.uwModelTrainer.train(trees, weight2);
        double numTrees = trees.size();
        Iterator<Tree> rawTreesItr = rawTrees == null ? null : rawTrees.iterator();
        Iterator<Tree> treeItr = trees.iterator();
        int treeId = 0;
        while (treeItr.hasNext()) {
            Tree tree = treeItr.next();
            ArrayList<Label> yield = rawTrees == null ? tree.yield() : rawTreesItr.next().yield();
            List<Label> pretermYield = tree.preTerminalYield();
            int yieldLen = yield.size();
            for (int i = 0; i < yieldLen; ++i) {
                String word = ((Label)yield.get(i)).value();
                int wordId = this.wordIndex.indexOf(word, true);
                String tag = pretermYield.get(i).value();
                int tagId = this.tagIndex.indexOf(tag, true);
                String featureStr = ((CoreLabel)yield.get(i)).originalText();
                Pair<String, String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, featureStr);
                String lemma = lemmaMorph.first();
                int lemmaId = this.wordIndex.indexOf(lemma, true);
                String richMorphTag = lemmaMorph.second();
                String reducedMorphTag = this.morphoSpec.strToFeatures(richMorphTag).toString().trim();
                reducedMorphTag = reducedMorphTag.length() == 0 ? NO_MORPH_ANALYSIS : reducedMorphTag;
                int morphId = this.morphIndex.indexOf(reducedMorphTag, true);
                this.wordTag.incrementCount(wordId, tagId);
                this.lemmaTag.incrementCount(lemmaId, tagId);
                this.morphTag.incrementCount(morphId, tagId);
                this.tagCounter.incrementCount(tagId);
                if (!((double)treeId > this.op.trainOptions.fractionBeforeUnseenCounting * numTrees)) continue;
                if (!this.wordTag.firstKeySet().contains(wordId) || this.wordTag.getCounter(wordId).totalCount() < 2.0) {
                    this.wordTagUnseen.incrementCount(tagId);
                }
                if (!this.lemmaTag.firstKeySet().contains(lemmaId) || this.lemmaTag.getCounter(lemmaId).totalCount() < 2.0) {
                    this.lemmaTagUnseen.incrementCount(tagId);
                }
                if (this.morphTag.firstKeySet().contains(morphId) && !(this.morphTag.getCounter(morphId).totalCount() < 2.0)) continue;
                this.morphTagUnseen.incrementCount(tagId);
            }
            ++treeId;
        }
    }

    @Override
    protected void initRulesWithWord() {
        int unkWord = this.wordIndex.indexOf("UNK", true);
        int boundaryWordId = this.wordIndex.indexOf(".$.", true);
        int boundaryTagId = this.tagIndex.indexOf(".$$.", true);
        int numWords = this.wordIndex.size();
        this.rulesWithWord = new List[numWords];
        for (int w = 0; w < numWords; ++w) {
            this.rulesWithWord[w] = new ArrayList(1);
        }
        HashSet<IntTaggedWord> lexRules = new HashSet<IntTaggedWord>(40000);
        for (int wordId : this.wordTag.firstKeySet()) {
            for (int tagId : this.wordTag.getCounter(wordId).keySet()) {
                lexRules.add(new IntTaggedWord(wordId, tagId));
                lexRules.add(new IntTaggedWord(-1, tagId));
            }
        }
        for (IntTaggedWord iTW : lexRules) {
            if (iTW.word() == -1) {
                IntTaggedWord iTU;
                double types = this.uwModel.unSeenCounter().getCount(iTW);
                if (!(types > (double)this.trainOptions.openClassTypesThreshold) || this.rulesWithWord[unkWord].contains(iTU = new IntTaggedWord(unkWord, iTW.tag))) continue;
                this.rulesWithWord[unkWord].add(iTU);
                continue;
            }
            this.rulesWithWord[iTW.word].add(iTW);
        }
        System.err.print("The " + this.rulesWithWord[unkWord].size() + " open class tags are: [");
        for (IntTaggedWord item : this.rulesWithWord[unkWord]) {
            System.err.print(" " + (String)this.tagIndex.get(item.tag()));
        }
        System.err.println(" ] ");
        this.rulesWithWord[boundaryWordId].add(new IntTaggedWord(boundaryWordId, boundaryTagId));
    }

    private static List<FactoredLexiconEvent> treebankToLexiconEvents(List<Tree> treebank, FactoredLexicon lexicon) {
        ArrayList<FactoredLexiconEvent> events = new ArrayList<FactoredLexiconEvent>(70000);
        for (Tree tree : treebank) {
            ArrayList<Label> yield = tree.yield();
            List<Label> preterm = tree.preTerminalYield();
            assert (yield.size() == preterm.size());
            int yieldLen = yield.size();
            for (int i = 0; i < yieldLen; ++i) {
                String tag = preterm.get(i).value();
                int tagId = lexicon.tagIndex.indexOf(tag);
                String word = ((Label)yield.get(i)).value();
                int wordId = lexicon.wordIndex.indexOf(word);
                if (tagId < 0) {
                    System.err.println("Discarding training example: " + word + " " + tag);
                    continue;
                }
                String featureStr = ((CoreLabel)yield.get(i)).originalText();
                Pair<String, String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(word, featureStr);
                String lemma = lemmaMorph.first();
                String richTag = lemmaMorph.second();
                String reducedTag = lexicon.morphoSpec.strToFeatures(richTag).toString();
                reducedTag = reducedTag.length() == 0 ? NO_MORPH_ANALYSIS : reducedTag;
                int lemmaId = lexicon.wordIndex.indexOf(lemma);
                int morphId = lexicon.morphIndex.indexOf(reducedTag);
                FactoredLexiconEvent event = new FactoredLexiconEvent(wordId, tagId, lemmaId, morphId, i, word, featureStr);
                events.add(event);
            }
        }
        return events;
    }

    private static List<FactoredLexiconEvent> getTuningSet(Treebank devTreebank, FactoredLexicon lexicon, TreebankLangParserParams tlpp) {
        ArrayList<Tree> devTrees = new ArrayList<Tree>(3000);
        for (Tree tree : devTreebank) {
            for (Tree subTree : tree) {
                if (subTree.isLeaf()) continue;
                tlpp.transformTree(subTree, tree);
            }
            devTrees.add(tree);
        }
        List<FactoredLexiconEvent> tuningSet = FactoredLexicon.treebankToLexiconEvents(devTrees, lexicon);
        return tuningSet;
    }

    private static Options getOptions(Languages.Language language) {
        Options options = new Options();
        if (language.equals((Object)Languages.Language.Arabic)) {
            options.lexOptions.useUnknownWordSignatures = 9;
            options.lexOptions.unknownPrefixSize = 1;
            options.lexOptions.unknownSuffixSize = 1;
            options.lexOptions.uwModelTrainer = "edu.stanford.nlp.parser.lexparser.ArabicUnknownWordModelTrainer";
        } else if (language.equals((Object)Languages.Language.French)) {
            options.lexOptions.useUnknownWordSignatures = 1;
            options.lexOptions.unknownPrefixSize = 1;
            options.lexOptions.unknownSuffixSize = 2;
            options.lexOptions.uwModelTrainer = "edu.stanford.nlp.parser.lexparser.FrenchUnknownWordModelTrainer";
        } else {
            throw new UnsupportedOperationException();
        }
        return options;
    }

    public static void main(String[] args) {
        String[] features;
        String[] languageOptions;
        MorphoFeatureSpecification morphoSpec;
        if (args.length != 4) {
            System.err.printf("Usage: java %s language features train_file dev_file%n", FactoredLexicon.class.getName());
            System.exit(-1);
        }
        Languages.Language language = Languages.Language.valueOf(args[0]);
        TreebankLangParserParams tlpp = Languages.getLanguageParams(language);
        DiskTreebank trainTreebank = tlpp.diskTreebank();
        trainTreebank.loadPath(args[2]);
        DiskTreebank devTreebank = tlpp.diskTreebank();
        devTreebank.loadPath(args[3]);
        Options options = FactoredLexicon.getOptions(language);
        if (language.equals((Object)Languages.Language.Arabic)) {
            morphoSpec = new ArabicMorphoFeatureSpecification();
            languageOptions = new String[]{"-arabicFactored"};
            tlpp.setOptionFlag(languageOptions, 0);
        } else if (language.equals((Object)Languages.Language.French)) {
            morphoSpec = new FrenchMorphoFeatureSpecification();
            languageOptions = new String[]{"-frenchFactored"};
            tlpp.setOptionFlag(languageOptions, 0);
        } else {
            throw new UnsupportedOperationException();
        }
        String featureList = args[1];
        for (String feature : features = featureList.trim().split(",")) {
            morphoSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.valueOf(feature));
        }
        System.out.println("Language: " + language.toString());
        System.out.println("Features: " + args[1]);
        System.out.print("Loading training trees...");
        ArrayList<Tree> trainTrees = new ArrayList<Tree>(19000);
        HashIndex<String> wordIndex = new HashIndex<String>();
        HashIndex<String> tagIndex = new HashIndex<String>();
        for (Tree tree : trainTreebank) {
            for (Tree subTree : tree) {
                if (subTree.isLeaf()) continue;
                tlpp.transformTree(subTree, tree);
            }
            trainTrees.add(tree);
        }
        System.out.printf("Done! (%d trees)%n", trainTrees.size());
        System.out.print("Collecting sufficient statistics for lexicon...");
        FactoredLexicon lexicon = new FactoredLexicon(options, morphoSpec, wordIndex, tagIndex);
        lexicon.initializeTraining(trainTrees.size());
        lexicon.train(trainTrees, null);
        lexicon.finishTraining();
        System.out.println("Done!");
        trainTrees = null;
        System.out.print("Loading tuning set...");
        List<FactoredLexiconEvent> tuningSet = FactoredLexicon.getTuningSet(devTreebank, lexicon, tlpp);
        System.out.printf("...Done! (%d events)%n", tuningSet.size());
        int nCorrect = 0;
        ClassicCounter<String> errors = new ClassicCounter<String>();
        for (FactoredLexiconEvent event : tuningSet) {
            Iterator<IntTaggedWord> itr = lexicon.ruleIteratorByWord(event.word(), event.getLoc(), event.featureStr());
            ClassicCounter<Integer> logScores = new ClassicCounter<Integer>();
            boolean noRules = true;
            int goldTagId = -1;
            while (itr.hasNext()) {
                noRules = false;
                IntTaggedWord iTW = itr.next();
                if (iTW.tag() == event.tagId()) {
                    System.err.print("GOLD-");
                    goldTagId = iTW.tag();
                }
                float tagScore = lexicon.score(iTW, event.getLoc(), event.word(), event.featureStr());
                logScores.incrementCount(iTW.tag(), tagScore);
            }
            if (noRules) {
                System.err.printf("NO TAGGINGS: %s %s%n", event.word(), event.featureStr());
            } else {
                int hypTagId = (Integer)Counters.argmax(logScores);
                if (hypTagId == goldTagId) {
                    ++nCorrect;
                } else {
                    String goldTag = goldTagId < 0 ? "UNSEEN" : (String)lexicon.tagIndex.get(goldTagId);
                    errors.incrementCount(goldTag);
                }
            }
            System.err.println();
        }
        double acc = (double)nCorrect / (double)tuningSet.size();
        System.err.printf("%n%nACCURACY: %.2f%n%n", acc * 100.0);
        System.err.println("% of errors by type:");
        ArrayList biggestKeys = new ArrayList(errors.keySet());
        Collections.sort(biggestKeys, Counters.toComparator(errors, false, true));
        Counters.normalize(errors);
        for (String key : biggestKeys) {
            System.err.printf("%s\t%.2f%n", key, errors.getCount(key) * 100.0);
        }
    }
}

