/*
 * Decompiled with CFR 0.152.
 */
package com.jxdinfo.idp.extract.extractor.defaults.word;

import com.jxdinfo.idp.common.entity.util.docparse.word.WordParagraphInfo;
import com.jxdinfo.idp.common.util.docparse.MatchTextUtil;
import com.jxdinfo.idp.extract.domain.config.word.WordTextFromParaConfig;
import com.jxdinfo.idp.extract.domain.extractor.ExtractorCarrier;
import com.jxdinfo.idp.extract.domain.location.Location;
import com.jxdinfo.idp.extract.enums.ExtractorEnum;
import com.jxdinfo.idp.extract.extractor.defaults.AbstractDefaultExtractor;
import com.jxdinfo.idp.extract.extractorOld.enums.ExtractorGroupEnum;
import com.jxdinfo.idp.extract.params.annotation.Extractor;
import com.jxdinfo.idp.model.base.dto.CategoryDto;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Service;

@Service
@Extractor(group=ExtractorGroupEnum.DOCUMENT_PARSE, name="Word-\u6587\u672c\u63d0\u53d6", order=15)
public class WordTextFromParaExtractor
extends AbstractDefaultExtractor<WordParagraphInfo, String, WordTextFromParaConfig> {
    @Override
    public ExtractorEnum extractorEnum() {
        return ExtractorEnum.WORD_TEXT_FROM_PARA;
    }

    @Override
    public Class<?> outputClass() {
        return String.class;
    }

    @Override
    protected CategoryDto categoryDto() {
        return this.stringCategoryDto();
    }

    @Override
    public ExtractorCarrier<String> outPut(List<WordParagraphInfo> paraInfos, WordTextFromParaConfig config) {
        ExtractorCarrier carrier = this.carrier(new ArrayList(), new ArrayList<Location>());
        List<WordParagraphInfo> targetParas = this.targetParas(paraInfos, config);
        for (WordParagraphInfo para : targetParas) {
            this.match(carrier, config.getRegex(), para.getText());
        }
        return carrier.toJsonList();
    }

    private List<WordParagraphInfo> targetParas(List<WordParagraphInfo> paraInfos, WordTextFromParaConfig config) {
        ArrayList<WordParagraphInfo> result = new ArrayList<WordParagraphInfo>();
        for (WordParagraphInfo paraInfo : paraInfos) {
            if (!MatchTextUtil.isMatch((String)config.getParaRegex(), (String)paraInfo.getText()).booleanValue()) continue;
            result.add(paraInfo);
        }
        return result;
    }

    public void match(ExtractorCarrier<String> carrier, String regex, String text) {
        List data = carrier.getData();
        List locations = carrier.getLocations();
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(text);
        if (StringUtils.isEmpty((CharSequence)regex)) {
            data.add(text);
            locations.add(new Location(text));
        }
        while (matcher.find()) {
            int groupCount = matcher.groupCount();
            if (groupCount == 0) {
                data.add(matcher.group());
            } else {
                StringBuilder builder = new StringBuilder();
                for (int i = 1; i <= groupCount; ++i) {
                    builder.append(matcher.group(i));
                    if (i >= groupCount) continue;
                    builder.append("\t");
                }
                if (StringUtils.isNotEmpty((CharSequence)builder.toString())) {
                    data.add(builder.toString());
                }
            }
            locations.add(new Location(matcher.group()));
        }
    }
}

