/*
 * Decompiled with CFR 0.152.
 */
package com.jxdinfo.idp.extract.extractorOld.impl.channelextractor.pdf;

import com.jxdinfo.hutool.core.collection.CollUtil;
import com.jxdinfo.idp.common.base.dto.FileBytesInfo;
import com.jxdinfo.idp.common.base.dto.ImplCodeDto;
import com.jxdinfo.idp.common.pdfparser.core.PdfParser;
import com.jxdinfo.idp.common.pdfparser.pojo.ContentPojo;
import com.jxdinfo.idp.common.util.StringUtils;
import com.jxdinfo.idp.common.util.docparse.MatchTextUtil;
import com.jxdinfo.idp.common.util.docparse.ReadPdfUtil;
import com.jxdinfo.idp.extract.domain.dto.extractconfigOld.pdf.ResolvablePDFTextConfig;
import com.jxdinfo.idp.extract.extractorOld.enums.GroupLevel3Enum;
import com.jxdinfo.idp.extract.extractorOld.impl.channelextractor.AbstractChannelExtractor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Service;

@Service
public class ResolvablePDFTextExtractor
extends AbstractChannelExtractor<FileBytesInfo, ResolvablePDFTextConfig> {
    @Override
    public ImplCodeDto implCodeDto() {
        return new ImplCodeDto(GroupLevel3Enum.RESOLVABLE_PDF_TEXT.getCode(), "\u53ef\u89e3\u6790PDF\u63d0\u53d6\u5668\uff08java\uff09");
    }

    @Override
    @PostConstruct
    public void init() {
        super.init();
    }

    @Override
    public void before(FileBytesInfo contentPojo, ResolvablePDFTextConfig config) {
    }

    @Override
    public List<Object> extract(FileBytesInfo fileBytesInfo, ResolvablePDFTextConfig config) {
        ContentPojo contentPojo = null;
        try {
            ReadPdfUtil.removeWatermark((FileBytesInfo)fileBytesInfo, (String[])new String[0]);
            PDDocument document = PDDocument.load((byte[])fileBytesInfo.getFileBytes());
            contentPojo = PdfParser.parsingUnTaggedPdfWithTableDetection((PDDocument)document, (boolean)true);
        }
        catch (IOException e) {
            return new ArrayList<Object>();
        }
        ArrayList<Object> result = new ArrayList<Object>();
        List paras = contentPojo.getOutList().stream().filter(item -> "text".equals(item.getElementType())).map(item -> item.getText()).collect(Collectors.toList());
        int startIndex = 0;
        int endIndex = paras.size() - 1;
        boolean startFlag = true;
        if ("para".equals(config.getFrom())) {
            if (StringUtils.isNotEmpty((String)config.getStartRegex()) || StringUtils.isNotEmpty((String)config.getEndRegex())) {
                for (int i = 0; i < paras.size(); ++i) {
                    String para = (String)paras.get(i);
                    if (StringUtils.isNotEmpty((String)config.getStartRegex()) && MatchTextUtil.isMatch((String)config.getStartRegex(), (String)para).booleanValue() && startFlag) {
                        startIndex = i;
                        startFlag = false;
                    }
                    if (!StringUtils.isNotEmpty((String)config.getEndRegex()) || !MatchTextUtil.isMatch((String)config.getEndRegex(), (String)para).booleanValue() || i < startIndex) continue;
                    endIndex = i;
                    break;
                }
            }
            startIndex = Math.max(startIndex + config.getStartOffset(), 0);
            endIndex = Math.min(endIndex + config.getEndOffset(), paras.size() - 1);
            ArrayList targetParas = new ArrayList();
            StringBuilder resultText = new StringBuilder();
            if (startIndex <= endIndex && startIndex <= paras.size() - 1) {
                for (int i = startIndex; i <= endIndex; ++i) {
                    targetParas.add(paras.get(i));
                    resultText.append((String)paras.get(i));
                }
            }
            if (CollUtil.isEmpty(targetParas)) {
                return new ArrayList<Object>();
            }
            if (CollUtil.isEmpty((Collection)config.getRegexes())) {
                result.add(resultText.toString());
                return result;
            }
            Iterator i = config.getRegexes().iterator();
            if (i.hasNext()) {
                String regex = (String)i.next();
                result.addAll(MatchTextUtil.match((String)regex, (String)resultText.toString()));
                return result;
            }
        } else {
            List tables = contentPojo.getOutList().stream().filter(item -> "table".equals(item.getElementType())).map(ContentPojo.contentElement::getCells).collect(Collectors.toList());
            List<Object> targetCells = new ArrayList();
            if (CollUtil.isEmpty((Collection)config.getCellKey()) || StringUtils.isEmpty((String)((String)config.getCellKey().get(0)))) {
                targetCells = tables;
            } else {
                for (String cellKey : config.getCellKey()) {
                    List<String> regexes = Arrays.asList(cellKey.split(";"));
                    block5: for (List cells : tables) {
                        if (cn.hutool.core.collection.CollUtil.isEmpty((Collection)cells)) continue;
                        int cellIndex = 0;
                        ContentPojo.contentElement.InnerCell cell = (ContentPojo.contentElement.InnerCell)cells.get(cellIndex);
                        for (int i = 0; i < regexes.size(); ++i) {
                            String regex = regexes.get(i);
                            while (!MatchTextUtil.isMatch((String)regex, (String)cell.getText()).booleanValue()) {
                                if (++cellIndex >= cells.size()) continue block5;
                                cell = (ContentPojo.contentElement.InnerCell)cells.get(cellIndex);
                            }
                            if (i != regexes.size() - 1) continue;
                            targetCells.add(cells);
                        }
                    }
                }
            }
            List targetCellList = new ArrayList();
            ContentPojo.contentElement.InnerCell targetCell = null;
            block8: for (List list : targetCells) {
                for (ContentPojo.contentElement.InnerCell cell : list) {
                    if (!MatchTextUtil.isMatch((String)config.getAnchor(), (String)cell.getText()).booleanValue()) continue;
                    targetCell = cell;
                    targetCellList = list;
                    break block8;
                }
            }
            if (targetCell == null) {
                return new ArrayList<Object>();
            }
            for (ContentPojo.contentElement.InnerCell innerCell : targetCellList) {
                if (innerCell.getRow_index() != targetCell.getRow_index() + config.getYOffset() || innerCell.getCol_index() != targetCell.getCol_index() + config.getXOffset()) continue;
                if (StringUtils.isEmpty((Collection)config.getRegexes()) || StringUtils.isEmpty((String)((String)config.getRegexes().get(0)))) {
                    result.add(innerCell.getText());
                    continue;
                }
                result.add(MatchTextUtil.match((String)config.getRegex(), (String)innerCell.getText()));
            }
        }
        return result;
    }
}

