/*
 * Decompiled with CFR 0.152.
 */
package me.shenfeng.mmseg;

import java.io.IOException;
import java.io.PushbackReader;
import java.io.Reader;
import java.util.Arrays;
import me.shenfeng.mmseg.Dictionary;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;

public final class SimpleMMsegTokenizer
extends Tokenizer {
    static final int EN = 1;
    static final int ZH = 2;
    static final int UNKNOW = 3;
    char[] buffer = new char[32];
    int bufferIdx = 0;
    int bufferStart = 0;
    private int lastType = 1;
    int idx = 0;
    int read;
    private Dictionary dic;
    private PushbackReader reader;
    private CharTermAttribute termAtt;
    private OffsetAttribute offsetAtt;
    private boolean lowercase = true;

    private int type(int ch) {
        int t = Character.getType(ch);
        if (t == 5) {
            return 2;
        }
        if (t == 2 || t == 1) {
            return 1;
        }
        if (t == 9) {
            if (this.lastType == 3) {
                return 1;
            }
            return this.lastType;
        }
        return 3;
    }

    public SimpleMMsegTokenizer(Dictionary dic, Reader input) {
        super(input);
        this.reader = input instanceof PushbackReader ? (PushbackReader)input : new PushbackReader(input, 1);
        this.dic = dic;
        this.termAtt = (CharTermAttribute)this.addAttribute(CharTermAttribute.class);
        this.offsetAtt = (OffsetAttribute)this.addAttribute(OffsetAttribute.class);
    }

    public SimpleMMsegTokenizer(Dictionary dic, Reader input, boolean lowercase) {
        this(dic, input);
        this.lowercase = lowercase;
    }

    public void addToBuffer(int read) {
        if (this.buffer.length == this.bufferIdx) {
            this.buffer = Arrays.copyOf(this.buffer, this.bufferIdx * 2);
        }
        this.buffer[this.bufferIdx++] = (char)read;
    }

    private void advance() throws IOException {
        while ((this.read = this.reader.read()) != -1) {
            if (this.type(this.read) == this.lastType) {
                ++this.idx;
                this.addToBuffer(this.read);
                continue;
            }
            this.reader.unread(this.read);
            return;
        }
    }

    public void nextCh() {
        int max = this.dic.maxMath(this.buffer, this.bufferStart, this.bufferIdx - this.bufferStart);
        int offset = this.bufferStart;
        this.bufferStart += max;
        this.termAtt.copyBuffer(this.buffer, offset, max);
        int start = this.idx - (this.bufferIdx - offset);
        this.offsetAtt.setOffset(start, start + max);
    }

    public void nextEn() {
        if (this.lowercase) {
            for (int i = this.bufferStart; i < this.bufferIdx; ++i) {
                this.buffer[i] = Character.toLowerCase(this.buffer[i]);
            }
        }
        int length = this.bufferIdx - this.bufferStart;
        this.termAtt.copyBuffer(this.buffer, this.bufferStart, length);
        this.offsetAtt.setOffset(this.idx - length, this.idx);
        this.bufferStart = 0;
        this.bufferIdx = 0;
    }

    public boolean incrementToken() throws IOException {
        this.clearAttributes();
        if (this.bufferStart < this.bufferIdx) {
            this.nextCh();
            return true;
        }
        this.bufferStart = 0;
        this.bufferIdx = 0;
        while ((this.read = this.reader.read()) != -1) {
            ++this.idx;
            this.lastType = this.type(this.read);
            switch (this.lastType) {
                case 2: {
                    this.addToBuffer(this.read);
                    this.advance();
                    this.nextCh();
                    return true;
                }
                case 1: {
                    this.addToBuffer(this.read);
                    this.advance();
                    this.nextEn();
                    return true;
                }
            }
        }
        return false;
    }
}

