/*
 * Decompiled with CFR 0.152.
 */
package org.sift.runtime.impl;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.LinkedList;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
import org.sift.runtime.Fields;
import org.sift.runtime.Tuple;
import org.sift.runtime.spi.OutputCollector;
import org.sift.runtime.spi.Processor;
import org.sift.winnow.StopWords;

public class LuceneWordSplitterProcessor
implements Processor {
    private static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_CURRENT);
    private int nGram = 1;
    private Analyzer analyzer = DEFAULT_ANALYZER;
    private StopWords stopWords;

    public void process(Tuple tuple, OutputCollector collector) {
        Tuple returnTuple = tuple.clone();
        for (Object line : tuple.getList(Fields.VALUES)) {
            LinkedList<String> tokensList = new LinkedList<String>();
            try {
                TokenStream stream = this.analyzer.tokenStream(null, (Reader)new StringReader(((String)line).toLowerCase()));
                while (stream.incrementToken()) {
                    tokensList.add(((TermAttribute)stream.getAttribute(TermAttribute.class)).term());
                }
            }
            catch (IOException e) {
                throw new RuntimeException("Error parsing input line : " + line, e);
            }
            String[] tokens = tokensList.toArray(new String[0]);
            for (int i = 0; i < tokens.length; ++i) {
                StringBuffer tokenBuffer = new StringBuffer();
                for (int j = 0; j < this.getnGram(); ++j) {
                    if (i + j < tokens.length) {
                        tokenBuffer.append(tokens[i + j]);
                        tokenBuffer.append(" ");
                    }
                    String word = tokenBuffer.toString().trim();
                    if (this.getStopWords() == null || this.getStopWords().isStopWord(word)) continue;
                    returnTuple.addToList(Fields.VALUES, (Object)tokenBuffer.toString().trim());
                }
            }
        }
        collector.emit(returnTuple);
    }

    public Analyzer getAnalyzer() {
        return this.analyzer;
    }

    public void setAnalyzer(Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    public int getnGram() {
        return this.nGram;
    }

    public void setnGram(int nGram) {
        this.nGram = nGram;
    }

    public StopWords getStopWords() {
        return this.stopWords;
    }

    public void setStopWords(StopWords stopWords) {
        this.stopWords = stopWords;
    }
}

