package com.jxdinfo.idp.extract.extractorOld.impl.channelextractor.pdf;

import cn.hutool.core.collection.CollUtil;
import com.jxdinfo.idp.common.base.dto.FileBytesInfo;
import com.jxdinfo.idp.common.base.dto.ImplCodeDto;
import com.jxdinfo.idp.common.pdfparser.core.PdfParser;
import com.jxdinfo.idp.common.pdfparser.pojo.ContentPojo;
import com.jxdinfo.idp.common.util.docparse.MatchTextUtil;
import com.jxdinfo.idp.extract.domain.dto.extractconfigOld.pdf.ResolvablePDFTableConfig;
import com.jxdinfo.idp.extract.extractorOld.enums.GroupLevel3Enum;
import com.jxdinfo.idp.extract.extractorOld.impl.channelextractor.AbstractChannelExtractor;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.springframework.stereotype.Service;

@Service
/* loaded from: input_file:com/jxdinfo/idp/extract/extractorOld/impl/channelextractor/pdf/ResolvablePDFTableExtractor.class */
public class ResolvablePDFTableExtractor extends AbstractChannelExtractor<FileBytesInfo, ResolvablePDFTableConfig> {
    @Override // com.jxdinfo.idp.extract.extractorOld.IChannelExtractor
    public ImplCodeDto implCodeDto() {
        return new ImplCodeDto(GroupLevel3Enum.RESOLVABLE_PDF_TABLE.getCode(), "PDF表格提取器（java）");
    }

    @Override // com.jxdinfo.idp.extract.extractorOld.impl.channelextractor.AbstractChannelExtractor, com.jxdinfo.idp.extract.extractorOld.IChannelExtractor
    @PostConstruct
    public void init() {
        super.init();
    }

    @Override // com.jxdinfo.idp.extract.extractorOld.IChannelExtractor
    public void before(FileBytesInfo fileBytesInfo, ResolvablePDFTableConfig resolvablePDFTableConfig) {
    }

    @Override // com.jxdinfo.idp.extract.extractorOld.IChannelExtractor
    public List<Object> extract(FileBytesInfo fileBytesInfo, ResolvablePDFTableConfig resolvablePDFTableConfig) {
        try {
            List list = (List) PdfParser.parsingUnTaggedPdfWithTableDetection(PDDocument.load(fileBytesInfo.getFileBytes()), true).getOutList().stream().filter(contentelement -> {
                return "table".equals(contentelement.getElementType());
            }).collect(Collectors.toList());
            ArrayList arrayList = new ArrayList();
            Iterator it = resolvablePDFTableConfig.getCellKey().iterator();
            while (it.hasNext()) {
                List asList = Arrays.asList(((String) it.next()).split(","));
                Iterator it2 = list.iterator();
                while (it2.hasNext()) {
                    List cells = ((ContentPojo.contentElement) it2.next()).getCells();
                    if (!CollUtil.isEmpty(cells)) {
                        int i = 0;
                        ContentPojo.contentElement.InnerCell innerCell = (ContentPojo.contentElement.InnerCell) cells.get(0);
                        for (int i2 = 0; i2 < asList.size(); i2++) {
                            String str = (String) asList.get(i2);
                            while (!MatchTextUtil.isMatch(str, innerCell.getText()).booleanValue()) {
                                i++;
                                if (i >= cells.size()) {
                                    break;
                                }
                                innerCell = (ContentPojo.contentElement.InnerCell) cells.get(i);
                            }
                            if (i2 == asList.size() - 1) {
                                arrayList.add(cells);
                                return arrayList;
                            }
                        }
                    }
                }
            }
            return arrayList;
        } catch (IOException e) {
            return new ArrayList();
        }
    }
}
