/*
 * Decompiled with CFR 0.152.
 */
package it.uniroma2.dtk.main;

import it.uniroma2.dtk.dt.GenericDT;
import it.uniroma2.dtk.op.convolution.ShuffledCircularConvolution;
import it.uniroma2.svd.writer.DenseBinaryMatrix;
import it.uniroma2.util.math.ArrayMath;
import it.uniroma2.util.tree.Tree;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.Enumeration;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import weka.core.Attribute;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.SparseInstance;
import weka.core.converters.ConverterUtils;
import weka.core.converters.Saver;
import weka.core.matrix.Matrix;

public class DTBuilder {
    protected static Options options = new Options();
    protected static OutputTypes output_type = OutputTypes.dsm;
    protected String WekaConverter = "weka.core.converters.ArffSaver";
    protected boolean useWeka = false;
    private boolean useWekaDense = false;
    private boolean verbose = false;
    private long elapsed = 0L;
    protected GenericDT dt = null;
    protected File inputFile = null;
    protected File outputFile = null;
    protected File outputFileType = null;

    public static void main(String[] args) {
        try {
            DTBuilder dtb = new DTBuilder(args);
            dtb.process();
        }
        catch (ParseException e) {
            System.err.println("Parsing failed: " + e.getMessage());
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("DTBuilder", options);
        }
        catch (NumberFormatException e) {
            System.err.println("Parsing numeric fields failed: " + e.getMessage());
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        System.exit(1);
    }

    private DTBuilder(String[] args) throws ParseException, NumberFormatException, Exception {
        GnuParser parser2 = new GnuParser();
        CommandLine line = parser2.parse(options, args);
        this.dt = new GenericDT(line.hasOption("randomSeed") ? Integer.parseInt(line.getOptionValue("randomSeed")) : 0, line.hasOption("vectorSize") ? Integer.parseInt(line.getOptionValue("vectorSize")) : 4096, line.hasOption("pos"), !line.hasOption("not_lexicalized"), line.hasOption("lambda") ? Double.parseDouble(line.getOptionValue("lambda")) : 1.0, line.hasOption("op") ? Class.forName(line.getOptionValue("op")) : ShuffledCircularConvolution.class);
        if (line.hasOption("of")) {
            output_type = OutputTypes.valueOf(line.getOptionValue("of"));
        }
        if (line.hasOption("weka")) {
            this.useWeka = true;
        }
        if (line.hasOption("verbose")) {
            this.verbose = true;
        }
        if (line.hasOption("wekadense")) {
            this.useWekaDense = true;
        }
        if (line.hasOption("wekaconverter")) {
            this.WekaConverter = line.getOptionValue("wekaconverter");
            Object o = Class.forName(this.WekaConverter).newInstance();
            if (!(o instanceof Saver)) {
                throw new Exception("Error \n\t" + this.WekaConverter + "\nThis weka converter does not exist!");
            }
        }
        this.inputFile = new File(line.getOptionValue("input"));
        this.outputFile = new File(line.getOptionValue("output"));
    }

    private void process() throws Exception {
        if (this.useWeka) {
            this.process_weka();
        } else if (this.useWekaDense) {
            this.process_weka_matrix_dense();
        } else if (output_type == OutputTypes.dsm) {
            this.process_dsm();
        } else if (output_type == OutputTypes.dbm) {
            this.process_dbm();
        } else {
            throw new Exception("Unknown output type");
        }
        if (this.verbose) {
            System.out.println("Elapsed time (for computing distributed trees) = " + this.elapsed + "ms");
        }
    }

    private void process_dsm() throws Exception {
        String treeString;
        BufferedReader input = new BufferedReader(new FileReader(this.inputFile));
        FileWriter output = new FileWriter(this.outputFile);
        while ((treeString = input.readLine()) != null) {
            Tree in = Tree.fromPennTree(treeString);
            long begin = System.currentTimeMillis();
            double[] v = this.dt.dt(in);
            long end = System.currentTimeMillis();
            this.elapsed += end - begin;
            output.write(ArrayMath.arrayToString(v) + "\n");
            System.out.print('.');
        }
        System.out.println("done");
        input.close();
        output.close();
    }

    private void process_dbm() throws Exception {
        String treeString;
        BufferedReader input = new BufferedReader(new FileReader(this.inputFile));
        DenseBinaryMatrix<Float> output = new DenseBinaryMatrix<Float>(Float.class);
        output.openFile(this.outputFile.getAbsolutePath(), "rw");
        output.setCol(this.dt.getVectorSize());
        int row = 0;
        long time_only_dt = 0L;
        long time_full = 0L;
        while ((treeString = input.readLine()) != null) {
            long start = System.currentTimeMillis();
            double[] v = this.dt.dt(Tree.fromPennTree(treeString));
            long only_dt_end = System.currentTimeMillis();
            output.setFullRowFloat(row, ArrayMath.convertToFloatArray(v));
            long full_end = System.currentTimeMillis();
            time_only_dt += only_dt_end - start;
            time_full += full_end - start;
            if (++row % 1000 != 0) continue;
            System.out.println(row - 1 + "\tdt processing time (1000rows) =\t" + time_only_dt + "\tfull processing time (1000rows) =\t" + time_full);
            time_only_dt = 0L;
            time_full = 0L;
        }
        System.out.println(row + "\tdt processing time (" + row % 1000 + "rows) =\t" + time_only_dt + "\tfull processing time (" + row % 1000 + "rows) =\t" + time_full);
        System.out.println("done");
        input.close();
        output.setRows(row);
        output.writeSize();
        output.closeFile();
    }

    private void process_weka_matrix_dense() throws Exception {
        String treeString;
        String treeString2;
        BufferedReader input = new BufferedReader(new FileReader(this.inputFile));
        int row = 0;
        while ((treeString2 = input.readLine()) != null) {
            ++row;
        }
        input.close();
        input = new BufferedReader(new FileReader(this.inputFile));
        double[][] matrix = new double[row][this.dt.getVectorSize()];
        row = 0;
        while ((treeString = input.readLine()) != null) {
            matrix[row] = this.dt.dt(Tree.fromPennTree(treeString));
            System.out.print('.');
            ++row;
        }
        input.close();
        System.out.println("Saving");
        Matrix m = new Matrix(matrix);
        m.write(new FileWriter(this.outputFile));
        System.out.println("Done");
    }

    private void process_weka() throws Exception {
        Instance instance;
        ConverterUtils.DataSource input = new ConverterUtils.DataSource(this.inputFile.getAbsolutePath());
        Instances input_instances = input.getStructure();
        Enumeration attributes = input_instances.enumerateAttributes();
        ArrayList<Attribute> trees = new ArrayList<Attribute>();
        ArrayList<Attribute> other_attributes = new ArrayList<Attribute>();
        while (attributes.hasMoreElements()) {
            Attribute a = (Attribute)attributes.nextElement();
            if (a.name().endsWith(":tree")) {
                trees.add(a);
                continue;
            }
            other_attributes.add(a);
        }
        ArrayList<Attribute> dimensions = new ArrayList<Attribute>();
        for (Attribute a : trees) {
            for (int i = 0; i < this.dt.getVectorSize(); ++i) {
                dimensions.add(new Attribute(a.name() + "_" + i));
            }
        }
        for (Attribute a : other_attributes) {
            dimensions.add(a);
        }
        ArrayList<SparseInstance> instances = new ArrayList<SparseInstance>();
        while ((instance = input.nextElement(input_instances)) != null) {
            Instance subpart_of_trees = null;
            for (Attribute a : trees) {
                DenseInstance i = new DenseInstance(1.0, this.dt.dt(Tree.fromPennTree(instance.stringValue(a))));
                if (subpart_of_trees == null) {
                    subpart_of_trees = i;
                    continue;
                }
                subpart_of_trees = subpart_of_trees.mergeInstance(i);
            }
            SparseInstance sparseInstance = new SparseInstance(other_attributes.size());
            int num = 0;
            for (Attribute a : other_attributes) {
                if (a.isString()) {
                    sparseInstance.setValue(num, instance.stringValue(a));
                } else {
                    sparseInstance.setValue(num, instance.value(a));
                }
                ++num;
            }
            Instance final_instance = null;
            final_instance = subpart_of_trees != null ? subpart_of_trees.mergeInstance(sparseInstance) : sparseInstance;
            instances.add((SparseInstance)final_instance);
            System.out.print('.');
        }
        Instances instances_2 = new Instances(input_instances.relationName() + "_dt", dimensions, input_instances.size());
        for (Instance instance2 : instances) {
            instances_2.add(instance2);
        }
        Saver output = (Saver)Class.forName(this.WekaConverter).newInstance();
        output.setFile(new File(this.outputFile.getAbsoluteFile() + output.getFileExtension()));
        ConverterUtils.DataSink.write(output, instances_2);
    }

    static {
        options.addOption("not_lexicalized", false, "does not consider leaf nodes");
        options.addOption("pos", false, "use pos augmented labels for leaf nodes in (lexicalized) syntactic trees");
        OptionBuilder.withArgName("seed");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("use given random seed (default = 0)");
        options.addOption(OptionBuilder.create("randomSeed"));
        OptionBuilder.withArgName("size");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("use given vector size (default = 4096)");
        options.addOption(OptionBuilder.create("vectorSize"));
        OptionBuilder.withArgName("lambda");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("use given lambda to weight tree fragments (default = 1)");
        options.addOption(OptionBuilder.create("lambda"));
        OptionBuilder.withArgName("input file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("load trees (in Penn Treebank notation) from the given file");
        OptionBuilder.isRequired();
        options.addOption(OptionBuilder.create("input"));
        OptionBuilder.withArgName("output file");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("print distributed trees to the given file");
        OptionBuilder.isRequired();
        options.addOption(OptionBuilder.create("output"));
        OptionBuilder.withArgName("operation class name");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("use given class as vector composition function implementation, default is shuffled circular convolution");
        options.addOption(OptionBuilder.create("op"));
        OptionBuilder.withArgName("[dsm|dbm]");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("the format of the output file:  dense string matrix (dsm) and dense binary matrix (dbm), default is dsm");
        options.addOption(OptionBuilder.create("of"));
        options.addOption("weka", false, "use a weka input type and a weka output converter (default is weka.core.converters.ArffSaver but it can be specified) ");
        OptionBuilder.withArgName("weka format name");
        OptionBuilder.hasArg();
        OptionBuilder.withDescription("select the weka output format: use the full name of the weka.core.converters.AbstractFileSaver.");
        options.addOption(OptionBuilder.create("wekaconverter"));
        options.addOption("verbose", false, "print messages");
    }

    /*
     * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
     */
    protected static enum OutputTypes {
        dsm,
        dbm;

    }
}

