/*
 * Decompiled with CFR 0.152.
 */
package org.apdplat.word.lucene;

import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apdplat.word.lucene.ChineseWordTokenizer;
import org.apdplat.word.segmentation.Segmentation;
import org.apdplat.word.segmentation.SegmentationAlgorithm;
import org.apdplat.word.segmentation.SegmentationFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class ChineseWordAnalyzer
extends Analyzer {
    private static final Logger LOGGER = LoggerFactory.getLogger(ChineseWordAnalyzer.class);
    private Segmentation segmentation = null;

    public ChineseWordAnalyzer() {
        this.segmentation = SegmentationFactory.getSegmentation(SegmentationAlgorithm.BidirectionalMaximumMatching);
    }

    public ChineseWordAnalyzer(Segmentation segmentation) {
        this.segmentation = segmentation;
    }

    protected Analyzer.TokenStreamComponents createComponents(String string, Reader reader) {
        ChineseWordTokenizer chineseWordTokenizer = new ChineseWordTokenizer(reader, this.segmentation);
        return new Analyzer.TokenStreamComponents((Tokenizer)chineseWordTokenizer);
    }

    public static void main(String[] stringArray) throws IOException {
        PositionIncrementAttribute positionIncrementAttribute;
        OffsetAttribute offsetAttribute;
        CharTermAttribute charTermAttribute;
        ChineseWordAnalyzer chineseWordAnalyzer = new ChineseWordAnalyzer();
        TokenStream tokenStream = chineseWordAnalyzer.tokenStream("text", "\u6768\u5c1a\u5ddd\u662fAPDPlat\u5e94\u7528\u7ea7\u4ea7\u54c1\u5f00\u53d1\u5e73\u53f0\u7684\u4f5c\u8005");
        while (tokenStream.incrementToken()) {
            charTermAttribute = (CharTermAttribute)tokenStream.getAttribute(CharTermAttribute.class);
            offsetAttribute = (OffsetAttribute)tokenStream.getAttribute(OffsetAttribute.class);
            positionIncrementAttribute = (PositionIncrementAttribute)tokenStream.getAttribute(PositionIncrementAttribute.class);
            LOGGER.info(charTermAttribute.toString() + " (" + offsetAttribute.startOffset() + " - " + offsetAttribute.endOffset() + ") " + positionIncrementAttribute.getPositionIncrement());
        }
        tokenStream = chineseWordAnalyzer.tokenStream("text", "word\u662f\u4e00\u4e2a\u4e2d\u6587\u5206\u8bcd\u9879\u76ee\uff0c\u4f5c\u8005\u662f\u6768\u5c1a\u5ddd\uff0c\u6768\u5c1a\u5ddd\u7684\u82f1\u6587\u540d\u53ebysc");
        while (tokenStream.incrementToken()) {
            charTermAttribute = (CharTermAttribute)tokenStream.getAttribute(CharTermAttribute.class);
            offsetAttribute = (OffsetAttribute)tokenStream.getAttribute(OffsetAttribute.class);
            positionIncrementAttribute = (PositionIncrementAttribute)tokenStream.getAttribute(PositionIncrementAttribute.class);
            LOGGER.info(charTermAttribute.toString() + " (" + offsetAttribute.startOffset() + " - " + offsetAttribute.endOffset() + ") " + positionIncrementAttribute.getPositionIncrement());
        }
    }
}

