/*
 * Decompiled with CFR 0.152.
 */
package com.jxdinfo.idp.extract.core.extractor.channel.pdf;

import cn.hutool.core.collection.CollUtil;
import com.jxdinfo.idp.common.base.dto.FileBytesInfo;
import com.jxdinfo.idp.common.base.dto.ImplCodeDto;
import com.jxdinfo.idp.common.pdfparser.core.PdfParser;
import com.jxdinfo.idp.common.pdfparser.pojo.ContentPojo;
import com.jxdinfo.idp.common.util.StringUtils;
import com.jxdinfo.idp.common.util.docparse.MatchTextUtil;
import com.jxdinfo.idp.extract.core.extractor.channel.AbstractChannelExtractor;
import com.jxdinfo.idp.extract.domain.dto.extractconfigOld.ExtractConfig;
import com.jxdinfo.idp.extract.domain.dto.extractconfigOld.pdf.ResolvablePDFTextConfig;
import com.jxdinfo.idp.extract.domain.extractor.ExtractResult;
import com.jxdinfo.idp.extract.domain.location.Location;
import com.jxdinfo.idp.extract.domain.location.TempLocation;
import com.jxdinfo.idp.extract.enums.ReturnTypeEnum;
import com.jxdinfo.idp.extract.extractorOld.enums.GroupLevel3Enum;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;

@Service(value="resolvablePDFTextChannelExtractor")
public class ResolvablePDFTextExtractor
extends AbstractChannelExtractor<FileBytesInfo, ResolvablePDFTextConfig> {
    private static final Logger log = LoggerFactory.getLogger(ResolvablePDFTextExtractor.class);

    @Override
    public ImplCodeDto implCodeDto() {
        return new ImplCodeDto(GroupLevel3Enum.RESOLVABLE_PDF_TEXT.getCode(), GroupLevel3Enum.RESOLVABLE_PDF_TEXT.getName(), 1);
    }

    @Override
    public String belongExtractorImplCode() {
        return GroupLevel3Enum.RESOLVABLE_PDF_TEXT.getLevel2Enum().getCode();
    }

    @Override
    public Class<? extends ExtractConfig> getConfigClass() {
        return ResolvablePDFTextConfig.class;
    }

    @Override
    public List<String> supportDataTypes() {
        ArrayList<String> list = new ArrayList<String>();
        list.add(ReturnTypeEnum.STRING.getKey());
        return list;
    }

    @Override
    public void before(FileBytesInfo contentPojo, ResolvablePDFTextConfig config) {
    }

    @Override
    public List<ExtractResult> extract(FileBytesInfo fileBytesInfo, ResolvablePDFTextConfig config) {
        log.info("\u63d0\u53d6>>\u8fdb\u5165\u3010ResolvablePDFTextExtractor.extract\u3011" + fileBytesInfo.getFileName());
        ContentPojo contentPojo = null;
        try {
            PDDocument document = PDDocument.load((byte[])fileBytesInfo.getFileBytes());
            contentPojo = PdfParser.parsingUnTaggedPdfWithTableDetection((PDDocument)document, (boolean)true, (boolean)true);
        }
        catch (Exception e) {
            log.error("\u63d0\u53d6>>\u3010ResolvablePDFTextExtractor.extract\u3011\u5f02\u5e38:" + e.getMessage());
            return new ArrayList<ExtractResult>();
        }
        ArrayList<ExtractResult> result = new ArrayList<ExtractResult>();
        List paras = contentPojo.getOutList().stream().filter(item -> "text".equals(item.getElementType())).map(item -> item.getText()).collect(Collectors.toList());
        int startIndex = 0;
        int endIndex = paras.size() - 1;
        String resultStr = "";
        if ("para".equals(config.getFrom())) {
            String para;
            int i;
            if (StringUtils.isNotEmpty((String)config.getStartRegex())) {
                for (i = 0; i < paras.size(); ++i) {
                    para = (String)paras.get(i);
                    if (MatchTextUtil.isMatch((String)config.getStartRegex(), (String)para).booleanValue()) {
                        startIndex = i;
                        break;
                    }
                    if (i != paras.size() - 1) continue;
                    return result;
                }
            }
            if (StringUtils.isNotEmpty((String)config.getEndRegex())) {
                for (i = startIndex; i < paras.size(); ++i) {
                    para = (String)paras.get(i);
                    if (MatchTextUtil.isMatch((String)config.getEndRegex(), (String)para).booleanValue()) {
                        endIndex = i;
                        break;
                    }
                    if (i != paras.size() - 1) continue;
                    return result;
                }
            }
            startIndex = Math.max(startIndex + config.getStartOffset(), 0);
            endIndex = Math.min(endIndex + config.getEndOffset(), paras.size() - 1);
            ArrayList targetParas = new ArrayList();
            StringBuilder resultText = new StringBuilder();
            if (startIndex <= endIndex && startIndex <= paras.size() - 1) {
                for (int i2 = startIndex; i2 <= endIndex; ++i2) {
                    targetParas.add(paras.get(i2));
                    resultText.append(((String)paras.get(i2)).replaceAll("\n", ""));
                }
                resultStr = resultText.toString();
            }
        } else {
            List tables = contentPojo.getOutList().stream().filter(item -> "table".equals(item.getElementType())).map(ContentPojo.contentElement::getCells).collect(Collectors.toList());
            List<Object> targetCells = new ArrayList();
            if (CollUtil.isEmpty((Collection)config.getCellKey()) || StringUtils.isEmpty((String)((String)config.getCellKey().get(0)))) {
                targetCells = tables;
            } else {
                for (String cellKey : config.getCellKey()) {
                    List<String> regexes = Arrays.asList(cellKey.split(";"));
                    block6: for (List cells : tables) {
                        if (CollUtil.isEmpty((Collection)cells)) continue;
                        for (int i = 0; i < regexes.size(); ++i) {
                            int cellIndex = 0;
                            ContentPojo.contentElement.InnerCell cell = (ContentPojo.contentElement.InnerCell)cells.get(cellIndex);
                            String regex = regexes.get(i);
                            String cellText = this.getCellText(cell.getText());
                            while (!MatchTextUtil.isMatch((String)regex, (String)cellText).booleanValue()) {
                                if (++cellIndex >= cells.size()) continue block6;
                                cell = (ContentPojo.contentElement.InnerCell)cells.get(cellIndex);
                                cellText = this.getCellText(cell.getText());
                            }
                            if (i != regexes.size() - 1) continue;
                            targetCells.add(cells);
                        }
                    }
                }
            }
            List targetCellList = new ArrayList();
            ContentPojo.contentElement.InnerCell targetCell = null;
            block9: for (List list : targetCells) {
                for (ContentPojo.contentElement.InnerCell cell : list) {
                    if (!MatchTextUtil.isMatch((String)config.getAnchor(), (String)this.getCellText(cell.getText())).booleanValue()) continue;
                    targetCell = cell;
                    targetCellList = list;
                    break block9;
                }
            }
            if (targetCell == null) {
                return new ArrayList<ExtractResult>();
            }
            for (ContentPojo.contentElement.InnerCell innerCell : targetCellList) {
                if (innerCell.getRow_index() != targetCell.getRow_index() + config.getYOffset() || innerCell.getCol_index() != targetCell.getCol_index() + config.getXOffset()) continue;
                resultStr = innerCell.getText() != null ? innerCell.getText().replaceAll("(\\r?\\n)", "").trim() : "";
            }
        }
        if (StringUtils.isEmpty((String)resultStr)) {
            return new ArrayList<ExtractResult>();
        }
        if (CollUtil.isEmpty((Collection)config.getRegexes())) {
            TempLocation location = new TempLocation(resultStr, resultStr.trim());
            location.setFileId(fileBytesInfo.getFileId());
            result.add(new ExtractResult((Object)resultStr.trim(), (Location)location, ReturnTypeEnum.STRING.getKey(), GroupLevel3Enum.RESOLVABLE_PDF_TEXT.getCode()));
        }
        for (String regex : config.getRegexes()) {
            String text = MatchTextUtil.matchOne((String)regex, (String)resultStr);
            if (StringUtils.isEmpty((String)text)) continue;
            TempLocation location = new TempLocation(resultStr, text);
            location.setFileId(fileBytesInfo.getFileId());
            result.add(new ExtractResult((Object)text, (Location)location, ReturnTypeEnum.STRING.getKey(), GroupLevel3Enum.RESOLVABLE_PDF_TEXT.getCode()));
        }
        return result;
    }

    private String getCellText(String cellText) {
        return StringUtils.isNotBlank((CharSequence)cellText) ? cellText.replaceAll("(\\r?\\n)", "").replaceAll("\\s+", "").trim() : "";
    }
}

