/*
 * Decompiled with CFR 0.152.
 */
package opennlp.ccg.parse.tagger.util;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import opennlp.ccg.lexicon.DefaultTokenizer;

public class CCGBankToSRILMFLM {
    public static void main(String[] args) throws FileNotFoundException, IOException {
        String usage = "\nCCGBankToSRILMFLM -input <inputCorpus> -o <outputCorpus> \n";
        if (args.length > 0 && args[0].equals("-h") || args.length == 0) {
            System.out.println(usage);
            System.exit(0);
        }
        BufferedReader reader = null;
        BufferedWriter writer = null;
        String inputCorp = "train.auto";
        String output = "train.srilm";
        for (int i = 0; i < args.length; ++i) {
            if (args[i].equals("-i")) {
                inputCorp = args[++i];
                continue;
            }
            if (args[i].equals("-o")) {
                output = args[++i];
                continue;
            }
            System.out.println("Unrecognized option: " + args[i]);
        }
        reader = new BufferedReader(new FileReader(new File(inputCorp)));
        writer = new BufferedWriter(new FileWriter(new File(output)));
        String parseIDHeader = "ID=";
        Pattern p = Pattern.compile("(<L\\s+.*?>)+?");
        String line = reader.readLine();
        while (line != null) {
            if (line.startsWith(parseIDHeader)) {
                line = reader.readLine();
                continue;
            }
            line = line.trim();
            Matcher m = p.matcher(line);
            String word = null;
            String pos = null;
            String cat = null;
            int cnt = 0;
            while (m.find()) {
                String toks = m.group();
                String[] parts = toks.split(" ");
                word = parts[4];
                pos = parts[2];
                cat = parts[1];
                if (cnt++ > 0) {
                    writer.write(" ");
                }
                writer.write("W-" + DefaultTokenizer.escape(word) + ":S-" + DefaultTokenizer.escape(word) + ":P-" + DefaultTokenizer.escape(pos) + ":T-" + DefaultTokenizer.escape(cat));
            }
            writer.write(System.getProperty("line.separator"));
            line = reader.readLine();
        }
        writer.close();
        reader.close();
    }
}

