package com.jxdinfo.idp.icpac.custom.sentencefilter;

import com.jxdinfo.idp.icpac.core.context.DuplicateCheckContext;
import com.jxdinfo.idp.icpac.core.entity.DuplicateCheckCandidateSentence;
import com.jxdinfo.idp.icpac.core.handler.CandidateSentenceFilter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:com/jxdinfo/idp/icpac/custom/sentencefilter/DefaultCandidateSentenceFilter.class */
public class DefaultCandidateSentenceFilter implements CandidateSentenceFilter {
    private static final String DATE_REG = "\\d{4}[-/年]\\d{1,2}[-/月]\\d{1,2}[日]?";
    private static final List<Pattern> PATTERN_LIST = new ArrayList();
    private static final List<String> PUNCTUATION_LIST = new ArrayList();

    @Override // com.jxdinfo.idp.icpac.core.handler.CandidateSentenceFilter
    public List<DuplicateCheckCandidateSentence> filter(List<DuplicateCheckCandidateSentence> list, DuplicateCheckContext duplicateCheckContext) {
        Iterator<DuplicateCheckCandidateSentence> it = list.iterator();
        List<String> templateTextList = duplicateCheckContext.getTemplateTextList();
        if (CollectionUtils.isEmpty(templateTextList)) {
            return list;
        }
        int minWordLen = duplicateCheckContext.getMinWordLen();
        ArrayList arrayList = new ArrayList();
        Iterator<String> it2 = templateTextList.iterator();
        while (it2.hasNext()) {
            String replaceAll = it2.next().replaceAll(" ", "");
            while (it.hasNext()) {
                DuplicateCheckCandidateSentence next = it.next();
                String replaceAll2 = next.getText().replaceAll(" ", "").replaceAll(DATE_REG, " ");
                if (replaceAll2.endsWith(" ")) {
                    next.setText(replaceAll2.trim());
                } else if (replaceAll2.contains(" ")) {
                    for (String str : replaceAll2.split(" ")) {
                        if (str.length() >= minWordLen && !replaceAll.contains(str)) {
                            arrayList.add(new DuplicateCheckCandidateSentence(str, new HashMap()));
                        }
                    }
                    it.remove();
                }
                if (StringUtils.isEmpty(replaceAll2) || replaceAll2.length() < minWordLen) {
                    it.remove();
                } else if (replaceAll.contains(replaceAll2)) {
                    it.remove();
                }
            }
        }
        if (CollectionUtils.isNotEmpty(arrayList)) {
            list.addAll(arrayList);
        }
        return list;
    }

    static {
        PUNCTUATION_LIST.add("，");
        PUNCTUATION_LIST.add("。");
        PUNCTUATION_LIST.add("？");
    }
}
