package com.hankcs.lucene;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.io.IOUtil;
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.TraditionalChineseTokenizer;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;

/* loaded from: input_file:com/hankcs/lucene/HanLPTokenizerFactory.class */
public class HanLPTokenizerFactory extends TokenizerFactory {
    private boolean enableIndexMode;
    private boolean enablePorterStemming;
    private boolean enableNumberQuantifierRecognize;
    private boolean enableCustomDictionary;
    private boolean enableCustomDictionaryForcing;
    private boolean enableTranslatedNameRecognize;
    private boolean enableJapaneseNameRecognize;
    private boolean enableOrganizationRecognize;
    private boolean enablePlaceRecognize;
    private boolean enableNameRecognize;
    private boolean enableTraditionalChineseMode;
    private String algorithm;
    private Set<String> stopWordDictionary;

    public HanLPTokenizerFactory(Map<String, String> map) {
        super(map);
        this.enableIndexMode = getBoolean(map, "enableIndexMode", true);
        this.enablePorterStemming = getBoolean(map, "enablePorterStemming", false);
        this.enableNumberQuantifierRecognize = getBoolean(map, "enableNumberQuantifierRecognize", false);
        this.enableCustomDictionary = getBoolean(map, "enableCustomDictionary", true);
        this.enableCustomDictionaryForcing = getBoolean(map, "enableCustomDictionaryForcing", true);
        this.enableTranslatedNameRecognize = getBoolean(map, "enableTranslatedNameRecognize", false);
        this.enableJapaneseNameRecognize = getBoolean(map, "enableJapaneseNameRecognize", false);
        this.enableOrganizationRecognize = getBoolean(map, "enableOrganizationRecognize", false);
        this.enableNameRecognize = getBoolean(map, "enableNameRecognize", false);
        this.enablePlaceRecognize = getBoolean(map, "enablePlaceRecognize", false);
        this.enableTraditionalChineseMode = getBoolean(map, "enableTraditionalChineseMode", false);
        HanLP.Config.Normalization = getBoolean(map, "enableNormalization", HanLP.Config.Normalization);
        this.algorithm = getString(map, "algorithm", "viterbi");
        Set set = getSet(map, "customDictionaryPath");
        if (set != null) {
            HanLP.Config.CustomDictionaryPath = (String[]) set.toArray(new String[0]);
        }
        String str = get(map, "stopWordDictionaryPath");
        if (str != null) {
            this.stopWordDictionary = new TreeSet();
            this.stopWordDictionary.addAll(IOUtil.readLineListWithLessMemory(str));
        }
        if (getBoolean(map, "enableDebug", false)) {
            HanLP.Config.enableDebug();
        }
    }

    protected final String getString(Map<String, String> map, String str, String str2) {
        String remove = map.remove(str);
        return remove == null ? str2 : remove;
    }

    public Tokenizer create(AttributeFactory attributeFactory) {
        Segment enablePlaceRecognize = HanLP.newSegment(this.algorithm).enableOffset(true).enableIndexMode(this.enableIndexMode).enableNameRecognize(this.enableNameRecognize).enableNumberQuantifierRecognize(this.enableNumberQuantifierRecognize).enableCustomDictionary(this.enableCustomDictionary).enableCustomDictionaryForcing(this.enableCustomDictionaryForcing).enableTranslatedNameRecognize(this.enableTranslatedNameRecognize).enableJapaneseNameRecognize(this.enableJapaneseNameRecognize).enableOrganizationRecognize(this.enableOrganizationRecognize).enablePlaceRecognize(this.enablePlaceRecognize);
        if (this.enableTraditionalChineseMode) {
            enablePlaceRecognize.enableIndexMode(false);
            TraditionalChineseTokenizer.SEGMENT = enablePlaceRecognize;
            enablePlaceRecognize = new Segment() { // from class: com.hankcs.lucene.HanLPTokenizerFactory.1
                protected List<Term> segSentence(char[] cArr) {
                    return TraditionalChineseTokenizer.segment(new String(cArr));
                }
            };
        }
        return new HanLPTokenizer(enablePlaceRecognize, this.stopWordDictionary, this.enablePorterStemming);
    }
}
