/*
 * Decompiled with CFR 0.152.
 */
package com.github.pmerienne.trident.ml.nlp;

import backtype.storm.tuple.Values;
import com.github.pmerienne.trident.ml.classification.Classifier;
import com.github.pmerienne.trident.ml.classification.PAClassifier;
import com.github.pmerienne.trident.ml.core.TextInstance;
import com.github.pmerienne.trident.ml.nlp.TFIDF;
import com.github.pmerienne.trident.ml.nlp.TextFeaturesExtractor;
import com.github.pmerienne.trident.ml.preprocessing.TwitterTokenizer;
import com.github.pmerienne.trident.ml.testing.data.Datasets;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import org.codehaus.jackson.map.ObjectMapper;
import storm.trident.operation.BaseFunction;
import storm.trident.operation.TridentCollector;
import storm.trident.tuple.TridentTuple;

public class TwitterSentimentClassifier
extends BaseFunction
implements Serializable {
    private static final long serialVersionUID = 1553274753609262633L;
    protected TextFeaturesExtractor featuresExtractor;
    protected Classifier<Boolean> classifier;
    private TwitterTokenizer tokenizer = new TwitterTokenizer(2, 2);

    public TwitterSentimentClassifier() {
        try {
            this.featuresExtractor = Builder.loadFeatureExtractor();
            this.classifier = Builder.loadClassifier();
        }
        catch (IOException e) {
            throw new RuntimeException("Unable to load TwitterSentimentClassifier : " + e.getMessage(), e);
        }
    }

    public void execute(TridentTuple tuple, TridentCollector collector) {
        String text = tuple.getString(0);
        boolean prediction = this.classify(text);
        collector.emit((List)new Values(new Object[]{prediction}));
    }

    protected Boolean classify(String text) {
        List<String> tokens = this.tokenizer.tokenize(text);
        double[] features = this.featuresExtractor.extractFeatures(tokens);
        Boolean prediction = this.classifier.classify(features);
        return prediction;
    }

    protected static class Builder {
        private static final File TEXT_FEATURES_EXTRACTOR_FILE = new File(Builder.class.getResource("/twitter-sentiment-classifier-extractor.json").getFile());
        private static final File CLASSIFIER_FILE = new File(Builder.class.getResource("/twitter-sentiment-classifier-classifier.json").getFile());
        private static final ObjectMapper MAPPER = new ObjectMapper();

        protected Builder() {
        }

        public static void main(String[] args) throws IOException {
            List<TextInstance<Boolean>> dataset = Datasets.getTwitterSamples();
            ArrayList<List<String>> documents = new ArrayList<List<String>>();
            for (TextInstance<Boolean> instance : dataset) {
                documents.add(instance.tokens);
            }
            TFIDF featuresExtractor = new TFIDF(documents, 10000);
            PAClassifier classifier = new PAClassifier();
            for (TextInstance<Boolean> instance : dataset) {
                double[] features = featuresExtractor.extractFeatures(instance.tokens);
                classifier.update((Boolean)instance.label, features);
            }
            Builder.save(featuresExtractor, classifier);
        }

        protected static void save(TFIDF featuresExtractor, PAClassifier classifier) throws IOException {
            MAPPER.writeValue(TEXT_FEATURES_EXTRACTOR_FILE, (Object)featuresExtractor);
            MAPPER.writeValue(CLASSIFIER_FILE, (Object)classifier);
        }

        public static TextFeaturesExtractor loadFeatureExtractor() throws IOException {
            return (TextFeaturesExtractor)MAPPER.readValue(TEXT_FEATURES_EXTRACTOR_FILE, TFIDF.class);
        }

        public static Classifier<Boolean> loadClassifier() throws IOException {
            return (Classifier)MAPPER.readValue(CLASSIFIER_FILE, PAClassifier.class);
        }
    }
}

