package com.jxdinfo.idp.common.util.docparse;

import cn.hutool.core.collection.CollUtil;
import com.alibaba.fastjson.JSONObject;
import com.jxdinfo.idp.common.base.dto.FileBytesInfo;
import com.jxdinfo.idp.common.entity.util.docparse.pdf.PdfInfo;
import com.jxdinfo.idp.common.util.StringUtils;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import javax.annotation.Nullable;
import javax.annotation.PostConstruct;
import lombok.Generated;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.util.Charsets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;

@Component
/* loaded from: input_file:com/jxdinfo/idp/common/util/docparse/ReadPdfUtil.class */
public class ReadPdfUtil {

    @Value("${idp.pdf.python.bin}")
    private String pythonBin;

    @Value("${idp.pdf.python.path}")
    private String pythonPath;

    @Value("${idp.pdf.charsetName:}")
    private String charsetName;

    @Value("${idp.pdf.tablekeyLine:4}")
    private Integer tablekeyLine;

    @Value("${idp.pdf.file.path}")
    private String pdfFilePath;
    private static String pythonBinStatic;
    private static String pythonPathStatic;
    public static Integer tablekeyLineStatic;
    private static String charsetNameStatic;
    private static String pdfFilePathStatic;

    @Generated
    private static final Logger log = LoggerFactory.getLogger(ReadPdfUtil.class);
    private static final SpreadsheetExtractionAlgorithm ALGORITHM = new SpreadsheetExtractionAlgorithm();

    @PostConstruct
    public void initStatic() {
        pythonPathStatic = this.pythonPath;
        charsetNameStatic = this.charsetName;
        pythonBinStatic = this.pythonBin;
        tablekeyLineStatic = this.tablekeyLine;
        pdfFilePathStatic = this.pdfFilePath;
    }

    public static void removeWatermark(FileBytesInfo fileBytesInfo, @Nullable String... strArr) {
        try {
            PDDocument load = PDDocument.load(fileBytesInfo.getFileBytes());
            removeWatermark(load, strArr);
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
            load.save(byteArrayOutputStream);
            fileBytesInfo.setFileBytes(byteArrayOutputStream.toByteArray());
        } catch (IOException e) {
            log.error("PDF文件移除水印失败", e);
        }
    }

    public static void removeWatermark(PDDocument pDDocument, @Nullable String... strArr) {
        try {
            HashMap hashMap = new HashMap();
            HashMap hashMap2 = new HashMap();
            int i = 0;
            PDPageTree pages = pDDocument.getPages();
            Iterator it = pages.iterator();
            while (it.hasNext()) {
                PDPage pDPage = (PDPage) it.next();
                ArrayList arrayList = new ArrayList();
                PDFStreamParser pDFStreamParser = new PDFStreamParser(pDPage);
                pDFStreamParser.parse();
                List tokens = pDFStreamParser.getTokens();
                boolean z = false;
                for (int i2 = 0; i2 < tokens.size(); i2++) {
                    Object obj = tokens.get(i2);
                    if (obj instanceof Operator) {
                        z = ((Operator) obj).getName().equals("Tm");
                    } else if (z && (obj instanceof COSString)) {
                        String str = new String(((COSString) obj).getBytes(), Charsets.UTF_16BE);
                        if (str.length() > 1) {
                            if (strArr != null) {
                                for (String str2 : strArr) {
                                    if (str.matches(str2)) {
                                        hashMap.put(str, Integer.valueOf(((Integer) hashMap.getOrDefault(str, 0)).intValue() + 1));
                                        arrayList.add(Integer.valueOf(i2));
                                        break;
                                    }
                                }
                            }
                            hashMap.put(str, Integer.valueOf(((Integer) hashMap.getOrDefault(str, 0)).intValue() + 1));
                            arrayList.add(Integer.valueOf(i2));
                        }
                    }
                }
                if (CollUtil.isNotEmpty(arrayList)) {
                    hashMap2.put(Integer.valueOf(i), arrayList);
                }
                i++;
            }
            int i3 = 0;
            if (CollUtil.isEmpty(hashMap2)) {
                return;
            }
            Iterator it2 = pages.iterator();
            while (it2.hasNext()) {
                PDPage pDPage2 = (PDPage) it2.next();
                int i4 = i3;
                i3++;
                List list = (List) hashMap2.get(Integer.valueOf(i4));
                if (!CollUtil.isEmpty(list)) {
                    PDFStreamParser pDFStreamParser2 = new PDFStreamParser(pDPage2);
                    pDFStreamParser2.parse();
                    List tokens2 = pDFStreamParser2.getTokens();
                    Iterator it3 = tokens2.iterator();
                    int i5 = 0;
                    while (true) {
                        if (!it3.hasNext()) {
                            break;
                        }
                        Object next = it3.next();
                        if (list.contains(Integer.valueOf(i5)) && (next instanceof COSString) && ((Integer) hashMap.get(new String(((COSString) next).getBytes(), Charsets.UTF_16BE))).intValue() >= pDDocument.getNumberOfPages()) {
                            it3.remove();
                            break;
                        }
                        i5++;
                    }
                    PDStream pDStream = new PDStream(pDDocument);
                    OutputStream createOutputStream = pDStream.createOutputStream();
                    new ContentStreamWriter(createOutputStream).writeTokens(tokens2);
                    createOutputStream.close();
                    pDPage2.setContents(pDStream);
                }
            }
        } catch (IOException e) {
            log.error("去除水印失败", e);
        }
    }

    public static void removeTextWatermark(PDDocument pDDocument) throws Exception {
        HashMap hashMap = new HashMap();
        Iterator it = pDDocument.getPages().iterator();
        while (it.hasNext()) {
            PDFStreamParser pDFStreamParser = new PDFStreamParser((PDPage) it.next());
            pDFStreamParser.parse();
            boolean z = false;
            for (Object obj : pDFStreamParser.getTokens()) {
                if (obj instanceof Operator) {
                    z = ((Operator) obj).getName().equals("Tm");
                } else if (z && (obj instanceof COSString)) {
                    COSString cOSString = (COSString) obj;
                    cOSString.getString();
                    cOSString.getASCII();
                    System.out.println(new String(cOSString.getBytes(), Charsets.UTF_16BE));
                }
            }
        }
        ArrayList arrayList = new ArrayList();
        Iterator it2 = hashMap.entrySet().iterator();
        while (it2.hasNext()) {
            arrayList.add(((Map.Entry) it2.next()).getKey());
        }
        Iterator it3 = pDDocument.getPages().iterator();
        while (it3.hasNext()) {
            PDPage pDPage = (PDPage) it3.next();
            PDFStreamParser pDFStreamParser2 = new PDFStreamParser(pDPage);
            pDFStreamParser2.parse();
            List tokens = pDFStreamParser2.getTokens();
            Iterator it4 = tokens.iterator();
            boolean z2 = false;
            while (it4.hasNext()) {
                Object next = it4.next();
                if (next instanceof Operator) {
                    z2 = ((Operator) next).getName().equals("Tm");
                } else if (z2 && (next instanceof COSString)) {
                    Iterator it5 = arrayList.iterator();
                    while (it5.hasNext()) {
                        if (next.toString().equals((String) it5.next())) {
                            it4.remove();
                        }
                    }
                }
            }
            PDStream pDStream = new PDStream(pDDocument);
            OutputStream createOutputStream = pDStream.createOutputStream();
            new ContentStreamWriter(createOutputStream).writeTokens(tokens);
            createOutputStream.close();
            pDPage.setContents(pDStream);
        }
    }

    public static void removeTextWatermark(FileBytesInfo fileBytesInfo) {
        try {
            PDDocument load = PDDocument.load(fileBytesInfo.getFileBytes());
            Throwable th = null;
            try {
                ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
                Throwable th2 = null;
                try {
                    removeTextWatermark(load);
                    load.save(byteArrayOutputStream);
                    fileBytesInfo.setFileBytes(byteArrayOutputStream.toByteArray());
                    if (byteArrayOutputStream != null) {
                        if (0 != 0) {
                            try {
                                byteArrayOutputStream.close();
                            } catch (Throwable th3) {
                                th2.addSuppressed(th3);
                            }
                        } else {
                            byteArrayOutputStream.close();
                        }
                    }
                    if (load != null) {
                        if (0 != 0) {
                            try {
                                load.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            load.close();
                        }
                    }
                } catch (Throwable th5) {
                    if (byteArrayOutputStream != null) {
                        if (0 != 0) {
                            try {
                                byteArrayOutputStream.close();
                            } catch (Throwable th6) {
                                th2.addSuppressed(th6);
                            }
                        } else {
                            byteArrayOutputStream.close();
                        }
                    }
                    throw th5;
                }
            } finally {
            }
        } catch (Exception e) {
        }
    }

    public static synchronized PdfInfo getPdfInfo(FileBytesInfo fileBytesInfo) {
        if (fileBytesInfo.getRealPath() == null) {
            fileBytesInfo.setRealPath(pdfFilePathStatic + fileBytesInfo.getFileName());
        }
        log.info("开始解析pdf，pdf路径为{}", fileBytesInfo.getRealPath());
        StringBuilder sb = new StringBuilder(pythonBinStatic + " ");
        StringBuilder sb2 = new StringBuilder();
        FileOutputStream fileOutputStream = null;
        try {
            try {
                fileOutputStream = new FileOutputStream(fileBytesInfo.getRealPath());
                fileOutputStream.write(fileBytesInfo.getFileBytes());
                sb.append(pythonPathStatic).append(" ").append(fileBytesInfo.getRealPath());
                Process exec = Runtime.getRuntime().exec(sb.toString());
                BufferedReader bufferedReader = new BufferedReader(StringUtils.isNotEmpty(charsetNameStatic) ? new InputStreamReader(exec.getInputStream(), charsetNameStatic) : new InputStreamReader(exec.getInputStream()));
                while (true) {
                    String readLine = bufferedReader.readLine();
                    if (readLine == null) {
                        break;
                    }
                    sb2.append(readLine);
                }
                bufferedReader.close();
                exec.waitFor();
                try {
                    File file = new File(fileBytesInfo.getRealPath());
                    if (file.exists()) {
                        file.delete();
                    }
                    if (fileOutputStream != null) {
                        fileOutputStream.close();
                    }
                } catch (IOException e) {
                }
            } finally {
                try {
                    File file2 = new File(fileBytesInfo.getRealPath());
                    if (file2.exists()) {
                        file2.delete();
                    }
                    if (fileOutputStream != null) {
                        fileOutputStream.close();
                    }
                } catch (IOException e2) {
                    log.error("文件{}删除失败", fileBytesInfo.getRealPath());
                }
            }
        } catch (IOException | InterruptedException e3) {
            log.error("pdf解析失败", e3);
            try {
                File file3 = new File(fileBytesInfo.getRealPath());
                if (file3.exists()) {
                    file3.delete();
                }
                if (fileOutputStream != null) {
                    fileOutputStream.close();
                }
            } catch (IOException e4) {
                log.error("文件{}删除失败", fileBytesInfo.getRealPath());
            }
        }
        try {
            return (PdfInfo) JSONObject.parseObject(sb2.toString(), PdfInfo.class);
        } catch (Exception e5) {
            return new PdfInfo();
        }
    }

    public static synchronized JSONObject getPDFTable(FileBytesInfo fileBytesInfo) {
        log.info("开始解析pdf，pdf路径为{}", fileBytesInfo.getRealPath());
        StringBuilder sb = new StringBuilder();
        try {
            if (fileBytesInfo.getRealPath() == null) {
                fileBytesInfo.setRealPath(pdfFilePathStatic + fileBytesInfo.getFileName());
                new FileOutputStream(fileBytesInfo.getRealPath()).write(fileBytesInfo.getFileBytes());
            }
            StringBuilder sb2 = new StringBuilder(pythonBinStatic + " ");
            sb2.append(pythonPathStatic).append(" ").append(fileBytesInfo.getRealPath());
            Process exec = Runtime.getRuntime().exec(sb2.toString());
            BufferedReader bufferedReader = new BufferedReader(StringUtils.isNotEmpty(charsetNameStatic) ? new InputStreamReader(exec.getInputStream(), charsetNameStatic) : new InputStreamReader(exec.getInputStream()));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                sb.append(readLine);
            }
            bufferedReader.close();
            exec.waitFor();
        } catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
        log.info("pdf解析结束解析完成");
        try {
            return JSONObject.parseObject(sb.toString());
        } catch (Exception e2) {
            return new JSONObject();
        }
    }

    public static synchronized PdfInfo getPDFInfoTest(String str) {
        log.info("开始解析pdf，pdf路径为{}", str);
        StringBuilder sb = new StringBuilder("D:\\Code\\language\\python\\Python-3.9.8\\python.exe ");
        sb.append("E:/code/idps/idp_web/idp-common/src/main/resources/extract.py").append(" ").append(str);
        StringBuilder sb2 = new StringBuilder();
        try {
            Process exec = Runtime.getRuntime().exec(sb.toString());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(exec.getInputStream()));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                sb2.append(readLine);
            }
            bufferedReader.close();
            exec.waitFor();
        } catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
        log.info("pdf解析结束解析完成");
        return (PdfInfo) JSONObject.parseObject(sb2.toString(), PdfInfo.class);
    }

    public static synchronized JSONObject getPDFTable(String str) {
        log.info("开始解析pdf，pdf路径为{}", str);
        StringBuilder sb = new StringBuilder("D:\\Code\\language\\python\\Python-3.9.8\\python.exe ");
        sb.append("E:/code/idps/idp_web/idp-common/src/main/resources/extract.py").append(" ").append(str);
        StringBuilder sb2 = new StringBuilder();
        try {
            Process exec = Runtime.getRuntime().exec(sb.toString());
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(exec.getInputStream()));
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    break;
                }
                sb2.append(readLine);
            }
            bufferedReader.close();
            exec.waitFor();
        } catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
        log.info("pdf解析结束解析完成");
        return JSONObject.parseObject(sb2.toString());
    }

    private static void checkCellMap(TreeMap<Double, Double> treeMap) {
        Iterator<Map.Entry<Double, Double>> it = treeMap.entrySet().iterator();
        double d = 0.0d;
        while (true) {
            double d2 = d;
            if (!it.hasNext()) {
                return;
            }
            treeMap.put(it.next().getKey(), Double.valueOf(d2));
            d = d2 + 1.0d;
        }
    }

    public static void main(String[] strArr) throws Exception {
        getPDFTable("D:\\linkunpeng\\Desktop\\智能预审\\2.0\\测试文件\\文档对比\\日钢现场\\2\\烧结制造部13#14#15#18#19#环冷机增加环形皮带项目技术协议(2).pdf");
        System.out.println("end");
    }
}
