/*
 * Decompiled with CFR 0.152.
 */
package com.cybozu.labs.langdetect;

import com.cybozu.labs.langdetect.DetectorFactory;
import com.cybozu.labs.langdetect.LanguageProbability;
import com.cybozu.labs.langdetect.NoFeatureInTextException;
import com.cybozu.labs.langdetect.util.NGram;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Formatter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.regex.Pattern;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class Detector {
    private static final double ALPHA_DEFAULT = 0.5;
    private static final double ALPHA_WIDTH = 0.05;
    private static final int ITERATION_LIMIT = 1000;
    private static final double PROB_THRESHOLD = 0.1;
    private static final double CONV_THRESHOLD = 0.99999;
    private static final int BASE_FREQ = 10000;
    private static final String UNKNOWN_LANG = "unknown";
    private static final Pattern URL_REGEX = Pattern.compile("https?://[-_.?&~;+=/#0-9A-Za-z]{1,2076}");
    private static final Pattern MAIL_REGEX = Pattern.compile("[-_.0-9A-Za-z]{1,64}@[-_0-9A-Za-z]{1,255}[-_.0-9A-Za-z]{1,255}");
    private final HashMap<String, double[]> wordLangProbMap;
    private final ArrayList<String> langlist;
    private StringBuilder text;
    private double[] langprob = null;
    private double alpha = 0.5;
    private int n_trial = 7;
    private int max_text_length = 10000;
    private double[] priorMap = null;
    private boolean verbose = false;
    private Long seed = null;

    public Detector(DetectorFactory factory) {
        this.wordLangProbMap = factory.wordLangProbMap;
        this.langlist = factory.langlist;
        this.text = new StringBuilder();
        this.seed = factory.seed;
    }

    public void setVerbose() {
        this.verbose = true;
    }

    public void setAlpha(double alpha) {
        this.alpha = alpha;
    }

    public void setPriorMap(Map<String, Double> priorMap) {
        int i;
        this.priorMap = new double[this.langlist.size()];
        double sump = 0.0;
        for (i = 0; i < this.priorMap.length; ++i) {
            String lang = this.langlist.get(i);
            if (!priorMap.containsKey(lang)) continue;
            double p = priorMap.get(lang);
            if (p < 0.0) {
                throw new IllegalArgumentException("Prior probability must be non-negative.");
            }
            this.priorMap[i] = p;
            sump += p;
        }
        if (sump <= 0.0) {
            throw new IllegalArgumentException("More one of prior probability must be non-zero.");
        }
        i = 0;
        while (i < this.priorMap.length) {
            int n = i++;
            this.priorMap[n] = this.priorMap[n] / sump;
        }
    }

    public void setMaxTextLength(int max_text_length) {
        this.max_text_length = max_text_length;
    }

    public void append(Reader reader) throws IOException {
        char[] buf = new char[this.max_text_length / 2];
        while (this.text.length() < this.max_text_length && reader.ready()) {
            int length = reader.read(buf);
            this.append(new String(buf, 0, length));
        }
    }

    public void append(String text) {
        text = URL_REGEX.matcher(text).replaceAll(" ");
        text = MAIL_REGEX.matcher(text).replaceAll(" ");
        text = NGram.normalize_vi(text);
        int pre = 0;
        for (int i = 0; i < text.length() && i < this.max_text_length; ++i) {
            char c = text.charAt(i);
            if (c != ' ' || pre != 32) {
                this.text.append(c);
            }
            pre = c;
        }
    }

    private void cleaningText() {
        int latinCount = 0;
        int nonLatinCount = 0;
        for (int i = 0; i < this.text.length(); ++i) {
            char c = this.text.charAt(i);
            if (c <= 'z' && c >= 'A') {
                ++latinCount;
                continue;
            }
            if (c < '\u0300' || Character.UnicodeBlock.of(c) == Character.UnicodeBlock.LATIN_EXTENDED_ADDITIONAL) continue;
            ++nonLatinCount;
        }
        if (latinCount * 2 < nonLatinCount) {
            StringBuilder textWithoutLatin = new StringBuilder();
            for (int i = 0; i < this.text.length(); ++i) {
                char c = this.text.charAt(i);
                if (c <= 'z' && c >= 'A') continue;
                textWithoutLatin.append(c);
            }
            this.text = textWithoutLatin;
        }
    }

    public String detect() {
        List<LanguageProbability> probabilities = this.getProbabilities();
        if (probabilities.size() > 0) {
            return probabilities.get(0).getLanguage();
        }
        return UNKNOWN_LANG;
    }

    public List<LanguageProbability> getProbabilities() {
        if (this.langprob == null) {
            this.detectBlock();
        }
        List<LanguageProbability> list = this.sortProbability(this.langprob);
        return list;
    }

    private void detectBlock() {
        this.cleaningText();
        ArrayList<String> ngrams = this.extractNGrams();
        if (ngrams.size() == 0) {
            throw new NoFeatureInTextException("No feature in text");
        }
        this.langprob = new double[this.langlist.size()];
        Random rand = new Random();
        if (this.seed != null) {
            rand.setSeed(this.seed);
        }
        for (int t = 0; t < this.n_trial; ++t) {
            double[] prob = this.initProbability();
            double alpha = this.alpha + rand.nextGaussian() * 0.05;
            int i = 0;
            while (true) {
                int r = rand.nextInt(ngrams.size());
                this.updateLangProb(prob, ngrams.get(r), alpha);
                if (i % 5 == 0) {
                    if (Detector.normalizeProb(prob) > 0.99999 || i >= 1000) break;
                    if (this.verbose) {
                        System.out.println("> " + this.sortProbability(prob));
                    }
                }
                ++i;
            }
            for (int j = 0; j < this.langprob.length; ++j) {
                int n = j;
                this.langprob[n] = this.langprob[n] + prob[j] / (double)this.n_trial;
            }
            if (!this.verbose) continue;
            System.out.println("==> " + this.sortProbability(prob));
        }
    }

    private double[] initProbability() {
        double[] prob = new double[this.langlist.size()];
        if (this.priorMap != null) {
            for (int i = 0; i < prob.length; ++i) {
                prob[i] = this.priorMap[i];
            }
        } else {
            for (int i = 0; i < prob.length; ++i) {
                prob[i] = 1.0 / (double)this.langlist.size();
            }
        }
        return prob;
    }

    private ArrayList<String> extractNGrams() {
        ArrayList<String> list = new ArrayList<String>();
        NGram ngram = new NGram();
        for (int i = 0; i < this.text.length(); ++i) {
            ngram.addChar(this.text.charAt(i));
            for (int n = 1; n <= 3; ++n) {
                String w = ngram.get(n);
                if (w == null || !this.wordLangProbMap.containsKey(w)) continue;
                list.add(w);
            }
        }
        return list;
    }

    private boolean updateLangProb(double[] prob, String word, double alpha) {
        if (word == null || !this.wordLangProbMap.containsKey(word)) {
            return false;
        }
        double[] langProbMap = this.wordLangProbMap.get(word);
        if (this.verbose) {
            System.out.println(word + "(" + Detector.unicodeEncode(word) + "):" + this.wordProbToString(langProbMap));
        }
        double weight = alpha / 10000.0;
        for (int i = 0; i < prob.length; ++i) {
            int n = i;
            prob[n] = prob[n] * (weight + langProbMap[i]);
        }
        return true;
    }

    private String wordProbToString(double[] prob) {
        Formatter formatter = new Formatter();
        for (int j = 0; j < prob.length; ++j) {
            double p = prob[j];
            if (!(p >= 1.0E-5)) continue;
            formatter.format(" %s:%.5f", this.langlist.get(j), p);
        }
        String string = formatter.toString();
        formatter.close();
        return string;
    }

    private static double normalizeProb(double[] prob) {
        int i;
        double maxp = 0.0;
        double sump = 0.0;
        for (i = 0; i < prob.length; ++i) {
            sump += prob[i];
        }
        for (i = 0; i < prob.length; ++i) {
            double p = prob[i] / sump;
            if (maxp < p) {
                maxp = p;
            }
            prob[i] = p;
        }
        return maxp;
    }

    private List<LanguageProbability> sortProbability(double[] prob) {
        ArrayList<LanguageProbability> list = new ArrayList<LanguageProbability>();
        for (int j = 0; j < prob.length; ++j) {
            double p = prob[j];
            if (!(p > 0.1)) continue;
            list.add(new LanguageProbability(this.langlist.get(j), p));
        }
        Collections.sort(list);
        return list;
    }

    private static String unicodeEncode(String word) {
        StringBuffer buf = new StringBuffer();
        for (int i = 0; i < word.length(); ++i) {
            char ch = word.charAt(i);
            if (ch >= '\u0080') {
                String st = Integer.toHexString(65536 + ch);
                while (st.length() < 4) {
                    st = "0" + st;
                }
                buf.append("\\u").append(st.subSequence(1, 5));
                continue;
            }
            buf.append(ch);
        }
        return buf.toString();
    }
}

