/*
 * Decompiled with CFR 0.152.
 */
package com.github.pmerienne.trident.ml.nlp;

import com.github.pmerienne.trident.ml.nlp.TextFeaturesExtractor;
import com.github.pmerienne.trident.ml.nlp.Vocabulary;
import com.github.pmerienne.trident.ml.util.MathUtil;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class TFIDF
implements TextFeaturesExtractor {
    private Integer corpusSize;
    private Map<String, Double> termsInverseDocumentFrequencies;

    public TFIDF() {
    }

    public TFIDF(List<List<String>> documents, int featureSize) {
        this.init(documents, featureSize);
    }

    @Override
    public double[] extractFeatures(List<String> documentTerms) {
        double[] features = new double[this.termsInverseDocumentFrequencies.size()];
        int i = 0;
        for (String term : this.termsInverseDocumentFrequencies.keySet()) {
            features[i] = this.tfIdf(term, documentTerms);
            ++i;
        }
        return MathUtil.normalize(features);
    }

    public void init(List<List<String>> documents, int featureSize) {
        Vocabulary vocabulary = new Vocabulary();
        for (List<String> document : documents) {
            vocabulary.addAll(document);
        }
        vocabulary.limitWords(featureSize);
        this.corpusSize = documents.size();
        this.termsInverseDocumentFrequencies = new HashMap<String, Double>(vocabulary.wordCount());
        for (String term : vocabulary) {
            double idf = this.idf(term, documents);
            this.termsInverseDocumentFrequencies.put(term, idf);
        }
    }

    protected double tf(String term, List<String> documentTerms) {
        double tf = 0.0;
        for (String documentTerm : documentTerms) {
            if (!documentTerm.equals(term)) continue;
            tf += 1.0;
        }
        return tf;
    }

    protected double idf(String term, List<List<String>> documents) {
        double d = 0.0;
        for (List<String> document : documents) {
            if (!document.contains(term)) continue;
            d += 1.0;
        }
        return Math.log((double)this.corpusSize.intValue() / (1.0 + d));
    }

    protected double tfIdf(String term, List<String> documentTerms) {
        double idf = this.termsInverseDocumentFrequencies.containsKey(term) ? this.termsInverseDocumentFrequencies.get(term) : Math.log(this.corpusSize.intValue());
        double tf = this.tf(term, documentTerms);
        return tf * idf;
    }

    public Integer getCorpusSize() {
        return this.corpusSize;
    }

    public void setCorpusSize(Integer corpusSize) {
        this.corpusSize = corpusSize;
    }

    public Map<String, Double> getTermsInverseDocumentFrequencies() {
        return this.termsInverseDocumentFrequencies;
    }

    public void setTermsInverseDocumentFrequencies(Map<String, Double> termsInverseDocumentFrequencies) {
        this.termsInverseDocumentFrequencies = termsInverseDocumentFrequencies;
    }
}

