/*
 * Decompiled with CFR 0.152.
 */
package opennlp.ccg.parse.supertagger.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.text.StringCharacterIterator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import opennlp.ccg.lexicon.Word;
import opennlp.ccg.parse.tagger.io.PipeDelimitedFactoredBundleCorpusIterator;
import opennlp.ccg.parse.tagger.io.SRILMFactoredBundleCorpusIterator;
import opennlp.ccg.util.Pair;

public class TaggingDictionaryExtractor {
    private Map<String, Pair<Integer, Set<String>>> wdmap = new HashMap<String, Pair<Integer, Set<String>>>();
    private Map<String, Set<String>> posmap = new HashMap<String, Set<String>>();
    private Iterator<List<Word>> incorp = null;
    private BufferedWriter wbr = null;
    private BufferedWriter pbr = null;
    private int minCatFreq = 1;

    public static String forXML(String aText) {
        if (aText == null) {
            return null;
        }
        StringBuilder result = new StringBuilder();
        StringCharacterIterator iterator = new StringCharacterIterator(aText);
        char character = iterator.current();
        while (character != '\uffff') {
            if (character == '<') {
                result.append("&lt;");
            } else if (character == '>') {
                result.append("&gt;");
            } else if (character == '\"') {
                result.append("&quot;");
            } else if (character == '\'') {
                result.append("&#039;");
            } else if (character == '&') {
                result.append("&amp;");
            } else {
                result.append(character);
            }
            character = iterator.next();
        }
        return result.toString();
    }

    public TaggingDictionaryExtractor(File corpus, File wd, File posd, String tokenisationType) {
        this(corpus, wd, posd, tokenisationType, 10);
    }

    public TaggingDictionaryExtractor(File corpus, File wd, File posd, String tokenisationType, int catFreq) {
        try {
            this.wbr = new BufferedWriter(new FileWriter(wd));
            this.pbr = new BufferedWriter(new FileWriter(posd));
            this.minCatFreq = catFreq;
            this.incorp = tokenisationType.equalsIgnoreCase("srilm") ? new SRILMFactoredBundleCorpusIterator(new BufferedReader(new FileReader(corpus))) : new PipeDelimitedFactoredBundleCorpusIterator(new BufferedReader(new FileReader(corpus)));
        }
        catch (IOException ex) {
            Logger.getLogger(TaggingDictionaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
        }
    }

    public void extract() {
        try {
            Object wfetch;
            List<Word> currsent = null;
            String currForm = null;
            String currPOS = null;
            Pair currFormFetch = null;
            Set<String> currFormSet = null;
            Set<String> currPOSSet = null;
            HashMap<String, Integer> catCount = new HashMap<String, Integer>();
            while (this.incorp.hasNext()) {
                currsent = this.incorp.next();
                for (Word w : currsent) {
                    String stag;
                    currForm = w.getForm();
                    currPOS = w.getPOS();
                    wfetch = this.wdmap.get(currForm);
                    catCount.put(stag, catCount.get(stag = w.getSupertag()) == null ? 1 : (Integer)catCount.get(stag) + 1);
                    if (wfetch == null) {
                        currFormSet = new HashSet<String>();
                        currFormSet.add(w.getSupertag());
                        this.wdmap.put(currForm, new Pair(new Integer(1), currFormSet));
                    } else {
                        currFormFetch = (Pair)wfetch;
                        currFormSet = (Set)currFormFetch.b;
                        currFormSet.add(w.getSupertag());
                        this.wdmap.put(currForm, new Pair(new Integer((Integer)currFormFetch.a + 1), currFormSet));
                    }
                    Set<String> pfetch = this.posmap.get(currPOS);
                    if (pfetch == null) {
                        currPOSSet = new HashSet<String>();
                        currPOSSet.add(w.getSupertag());
                    } else {
                        currPOSSet = pfetch;
                        currPOSSet.add(w.getSupertag());
                    }
                    this.posmap.put(currPOS, currPOSSet);
                }
            }
            String wrd = null;
            Pair<Integer, Set<String>> lkup = null;
            this.wbr.write("<?xml version=\"1.0\"?>\n");
            this.wbr.write("<wdict>\n");
            wfetch = this.wdmap.keySet().iterator();
            while (wfetch.hasNext()) {
                String wdobj;
                wrd = wdobj = wfetch.next();
                lkup = this.wdmap.get(wdobj);
                this.wbr.write("\t<entry word=\"" + TaggingDictionaryExtractor.forXML(wrd) + "\" freq=\"" + (Integer)lkup.a + "\">\n");
                for (String st : (Set)lkup.b) {
                    if ((Integer)catCount.get(st) < this.minCatFreq) continue;
                    this.wbr.write("\t\t<supertag> " + TaggingDictionaryExtractor.forXML(st) + " </supertag>\n");
                }
                this.wbr.write("\t</entry>\n");
            }
            this.wbr.write("</wdict>");
            String pos = null;
            Set<String> plkup = null;
            this.pbr.write("<?xml version=\"1.0\"?>\n");
            this.pbr.write("<posdict>\n");
            Iterator<String> iterator = this.posmap.keySet().iterator();
            while (iterator.hasNext()) {
                String pobj;
                pos = pobj = iterator.next();
                plkup = this.posmap.get(pobj);
                this.pbr.write("\t<entry pos=\"" + TaggingDictionaryExtractor.forXML(pos) + "\">\n");
                for (String st : plkup) {
                    if ((Integer)catCount.get(st) < this.minCatFreq) continue;
                    this.pbr.write("\t\t<supertag> " + TaggingDictionaryExtractor.forXML(st) + " </supertag>\n");
                }
                this.pbr.write("\t</entry>\n");
            }
            this.pbr.write("</posdict>");
            this.wbr.flush();
            this.wbr.close();
            this.pbr.flush();
            this.pbr.close();
        }
        catch (FileNotFoundException ex) {
            Logger.getLogger(TaggingDictionaryExtractor.class.getName()).log(Level.SEVERE, null, ex);
        }
        catch (IOException e) {
            Logger.getLogger(TaggingDictionaryExtractor.class.getName()).log(Level.SEVERE, null, e);
        }
    }

    public static void main(String[] args) throws Exception {
        String usage = "\nTaggingDictionaryExtractor -i <inputCorpus> -f <catFreqCutoff> -p <POSOutputXMLFile> -w <wordOutputXMLFile>\n\n";
        if (args.length > 0 && args[0].equals("-h")) {
            System.out.print(usage);
            System.exit(0);
        }
        String inputCorp = null;
        String wOutput = null;
        String pOutput = null;
        int catFreq = 10;
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-i")) {
                inputCorp = args[++i];
                continue;
            }
            if (args[i].equals("-w")) {
                wOutput = args[++i];
                continue;
            }
            if (args[i].equals("-p")) {
                pOutput = args[++i];
                continue;
            }
            if (args[i].equals("-f")) {
                catFreq = Integer.parseInt(args[++i]);
                continue;
            }
            System.err.println("Unknown command-line option: " + args[i]);
        }
        File in = new File(inputCorp);
        File wout = new File(wOutput);
        File pout = new File(pOutput);
        TaggingDictionaryExtractor tde = new TaggingDictionaryExtractor(in, wout, pout, "SRILM", catFreq);
        tde.extract();
    }
}

