/*
 * Decompiled with CFR 0.152.
 */
package com.jxdinfo.idp.common.pdfparser.core;

import com.jxdinfo.idp.common.pdfparser.arrange.MarkPdf;
import com.jxdinfo.idp.common.pdfparser.core.BirdViewer;
import com.jxdinfo.idp.common.pdfparser.core.UnTaggedAnalyser;
import com.jxdinfo.idp.common.pdfparser.core.UnTaggedContext;
import com.jxdinfo.idp.common.pdfparser.pojo.BoldStatus;
import com.jxdinfo.idp.common.pdfparser.pojo.ContentPojo;
import com.jxdinfo.idp.common.pdfparser.pojo.SearchPattern;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;

public class PdfParser {
    public static ContentPojo parsingUnTaggedPdf(PDDocument pdd) throws IOException {
        int num = pdd.getNumberOfPages();
        UnTaggedContext untaggedContext = new UnTaggedContext();
        ArrayList docPages = new ArrayList();
        for (int i = 1; i <= num; ++i) {
            List<ContentPojo.contentElement> contentElements = UnTaggedAnalyser.parsePagePureText(pdd, i);
            System.out.println(contentElements);
        }
        return null;
    }

    public static ContentPojo parsingUnTaggedPdfWithTableDetection(PDDocument pdd, boolean verifyPara) throws IOException {
        return PdfParser.parsingUnTaggedPdfWithTableDetectionAndPicture(pdd, null, verifyPara);
    }

    public static ContentPojo parsingUnTaggedPdfWithTableDetectionAndPicture(PDDocument pdd, String picSavePath, boolean verifyPara) throws IOException {
        int num = pdd.getNumberOfPages();
        boolean isDocTransPdf = true;
        if (!isDocTransPdf) {
            ContentPojo contentPojo = new ContentPojo(new ArrayList<ContentPojo.contentElement>());
            contentPojo.setIsPptTransPDF(true);
            return contentPojo;
        }
        UnTaggedContext untaggedContext = new UnTaggedContext();
        untaggedContext.preHeat(pdd, 20);
        ArrayList<List<ContentPojo.contentElement>> docPages = new ArrayList<List<ContentPojo.contentElement>>();
        for (int i = 1; i <= num; ++i) {
            List<ContentPojo.contentElement> page = UnTaggedAnalyser.parsePage(pdd, i, untaggedContext, picSavePath, verifyPara);
            docPages.add(page);
        }
        BirdViewer.mergePElement(docPages, untaggedContext);
        ArrayList<ContentPojo.contentElement> outList = new ArrayList<ContentPojo.contentElement>();
        for (int i = 0; i < docPages.size(); ++i) {
            outList.addAll((Collection)docPages.get(i));
        }
        BirdViewer.mergeTableElements(outList, pdd);
        ContentPojo contentPojo = new ContentPojo();
        contentPojo.setOutList(outList);
        return contentPojo;
    }

    public static ContentPojo.contentElement searchOne(ContentPojo pojo, SearchPattern searchPattern) {
        List<ContentPojo.contentElement> outList = pojo.getOutList();
        for (ContentPojo.contentElement c : outList) {
            if (c.getText() == null || !c.getText().matches(searchPattern.getRegexStr())) continue;
            if (searchPattern.getBoldStatus() == null) {
                return c;
            }
            BoldStatus bs = MarkPdf.verifyBold(c.getPdfStyleStructs());
            if (searchPattern.getBoldStatus() != bs) continue;
            return c;
        }
        return null;
    }

    public static List<ContentPojo.contentElement> searchList(ContentPojo pojo, List<SearchPattern> searchPatterns) {
        ArrayList<ContentPojo.contentElement> resultList = new ArrayList<ContentPojo.contentElement>();
        for (SearchPattern s : searchPatterns) {
            ContentPojo.contentElement contentElement2 = PdfParser.searchOne(pojo, s);
            resultList.add(contentElement2);
        }
        return resultList;
    }

    public static ContentPojo.contentElement searchTableAfterPattern(ContentPojo pojo, SearchPattern searchPattern) {
        List<ContentPojo.contentElement> outList = pojo.getOutList();
        boolean flag = false;
        for (ContentPojo.contentElement c : outList) {
            if (flag && "table".equals(c.getElementType())) {
                return c;
            }
            if (c.getText() == null || !c.getText().matches(searchPattern.getRegexStr())) continue;
            if (searchPattern.getBoldStatus() == null) {
                flag = true;
                continue;
            }
            BoldStatus bs = MarkPdf.verifyBold(c.getPdfStyleStructs());
            if (searchPattern.getBoldStatus() != bs) continue;
            flag = true;
        }
        return null;
    }

    public static List<ContentPojo.contentElement> searchListTableAfterPattern(ContentPojo pojo, List<SearchPattern> searchPatterns) {
        ArrayList<ContentPojo.contentElement> resultList = new ArrayList<ContentPojo.contentElement>();
        for (SearchPattern s : searchPatterns) {
            ContentPojo.contentElement contentElement2 = PdfParser.searchTableAfterPattern(pojo, s);
            resultList.add(contentElement2);
        }
        return resultList;
    }

    public static void main(String[] args) throws IOException {
        PDDocument document = PDDocument.load((File)new File("D:\\linkunpeng\\Desktop\\\u667a\u80fd\u9884\u5ba1\\2.0\\\u6d4b\u8bd5\u6587\u4ef6\\\u5408\u540c\u5ba1\u67e5-F3\\\u5de5\u7a0b\\ZF3G24020003\u5168\u6d41\u7a0b\u667a\u80fd\u5de5\u5382300\u5428\u8f6c\u7089\u64cd\u4f5c\u53f0\u5408\u5e76\u9879\u76ee\\1.ZF3G24020003\u4e2d\u6807\u901a\u77e5\u4e66(\u65e0\u6c34\u5370).pdf"));
        PDDocument waterRemark = PDDocument.load((File)new File("D:\\linkunpeng\\Desktop\\\u667a\u80fd\u9884\u5ba1\\2.0\\\u6d4b\u8bd5\u6587\u4ef6\\\u5408\u540c\u5ba1\u67e5-F3\\\u5de5\u7a0b\\ZF3G24020003\u5168\u6d41\u7a0b\u667a\u80fd\u5de5\u5382300\u5428\u8f6c\u7089\u64cd\u4f5c\u53f0\u5408\u5e76\u9879\u76ee\\1.ZF3G24020003\u4e2d\u6807\u901a\u77e5\u4e66.pdf"));
        ContentPojo contentPojo = PdfParser.parsingUnTaggedPdfWithTableDetection(document, true);
        ContentPojo contentPojo1 = PdfParser.parsingUnTaggedPdfWithTableDetection(waterRemark, true);
        System.out.println(contentPojo);
        System.out.println(contentPojo1);
    }
}

