/*
 * Decompiled with CFR 0.152.
 */
package com.jxdinfo.idp.common.util.docparse;

import cn.hutool.core.collection.CollUtil;
import com.alibaba.fastjson.JSONObject;
import com.jxdinfo.idp.common.base.dto.FileBytesInfo;
import com.jxdinfo.idp.common.util.StringUtils;
import java.io.BufferedReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.UUID;
import javax.annotation.PostConstruct;
import lombok.Generated;
import org.apache.pdfbox.contentstream.PDContentStream;
import org.apache.pdfbox.contentstream.operator.Operator;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.pdfparser.PDFStreamParser;
import org.apache.pdfbox.pdfwriter.ContentStreamWriter;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.common.PDStream;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.util.Charsets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import technology.tabula.extractors.SpreadsheetExtractionAlgorithm;

@Component
public class ReadPdfUtil {
    @Generated
    private static final Logger log = LoggerFactory.getLogger(ReadPdfUtil.class);
    @Value(value="${idp.pdf.file.path}")
    private String filePath;
    @Value(value="${idp.pdf.python.path}")
    private String pythonPath;
    @Value(value="${idp.pdf.python.shuiyin}")
    private String shuiyin;
    @Value(value="${idp.pdf.charsetName:}")
    private String charsetName;
    private static String filePathStatic;
    private static String shuiyinStatic;
    private static String pythonPathStatic;
    private static String charsetNameStatic;
    private static final SpreadsheetExtractionAlgorithm ALGORITHM;

    @PostConstruct
    public void initStatic() {
        filePathStatic = this.filePath;
        pythonPathStatic = this.pythonPath;
        charsetNameStatic = this.charsetName;
        shuiyinStatic = this.shuiyin;
    }

    public static void removeWatermark(FileBytesInfo bytesInfo, String ... regexes) {
        try {
            PDDocument document = PDDocument.load((byte[])bytesInfo.getFileBytes());
            ReadPdfUtil.removeWatermark(document, regexes);
            ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
            document.save((OutputStream)outputStream);
            bytesInfo.setFileBytes(outputStream.toByteArray());
        }
        catch (IOException e) {
            log.error("PDF\u6587\u4ef6\u79fb\u9664\u6c34\u5370\u5931\u8d25", (Throwable)e);
        }
    }

    public static void removeWatermark(PDDocument document, String ... regexes) {
        try {
            String text;
            List tokens;
            PDFStreamParser parser;
            List<Integer> remarkIndexes;
            HashMap<String, Integer> remarkRepeatCount = new HashMap<String, Integer>();
            HashMap<Integer, List<Integer>> pageRemarkIndexes = new HashMap<Integer, List<Integer>>();
            int pageNum = 0;
            PDPageTree pages = document.getPages();
            for (PDPage page : pages) {
                remarkIndexes = new ArrayList();
                parser = new PDFStreamParser((PDContentStream)page);
                parser.parse();
                tokens = parser.getTokens();
                boolean find = false;
                block3: for (int i = 0; i < tokens.size(); ++i) {
                    Object token = tokens.get(i);
                    if (token instanceof Operator) {
                        Operator op = (Operator)token;
                        find = op.getName().equals("Tm");
                        continue;
                    }
                    if (!find || !(token instanceof COSString) || (text = new String(((COSString)token).getBytes(), Charsets.UTF_16BE)).length() <= 1) continue;
                    if (regexes != null) {
                        for (String regex : regexes) {
                            if (!text.matches(regex)) continue;
                            remarkRepeatCount.put(text, remarkRepeatCount.getOrDefault(text, 0) + 1);
                            remarkIndexes.add(i);
                            continue block3;
                        }
                    }
                    remarkRepeatCount.put(text, remarkRepeatCount.getOrDefault(text, 0) + 1);
                    remarkIndexes.add(i);
                }
                if (CollUtil.isNotEmpty(remarkIndexes)) {
                    pageRemarkIndexes.put(pageNum, remarkIndexes);
                }
                ++pageNum;
            }
            pageNum = 0;
            if (CollUtil.isEmpty(pageRemarkIndexes)) {
                return;
            }
            for (PDPage page : pages) {
                if (CollUtil.isEmpty(remarkIndexes = (List)pageRemarkIndexes.get(pageNum++))) continue;
                parser = new PDFStreamParser((PDContentStream)page);
                parser.parse();
                tokens = parser.getTokens();
                Iterator iterator = tokens.iterator();
                int index = 0;
                while (iterator.hasNext()) {
                    Object next = iterator.next();
                    if (remarkIndexes.contains(index) && next instanceof COSString && (Integer)remarkRepeatCount.get(text = new String(((COSString)next).getBytes(), Charsets.UTF_16BE)) >= document.getNumberOfPages()) {
                        iterator.remove();
                        break;
                    }
                    ++index;
                }
                PDStream updatedStream = new PDStream(document);
                OutputStream out = updatedStream.createOutputStream();
                ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
                tokenWriter.writeTokens(tokens);
                out.close();
                page.setContents(updatedStream);
            }
        }
        catch (IOException e) {
            log.error("\u53bb\u9664\u6c34\u5370\u5931\u8d25", (Throwable)e);
        }
    }

    public static void removeTextWatermark(PDDocument pd) throws Exception {
        HashMap countMap = new HashMap();
        for (PDPage page : pd.getPages()) {
            PDFStreamParser pdfsp = new PDFStreamParser((PDContentStream)page);
            pdfsp.parse();
            List tokens = pdfsp.getTokens();
            Iterator iterator = tokens.iterator();
            boolean find = false;
            while (iterator.hasNext()) {
                Object next = iterator.next();
                if (next instanceof Operator) {
                    Operator op = (Operator)next;
                    find = op.getName().equals("Tm");
                    continue;
                }
                if (!find || !(next instanceof COSString)) continue;
                COSString text = (COSString)next;
                String string = text.getString();
                text.getASCII();
                byte[] bytes = text.getBytes();
                String result = new String(bytes, Charsets.UTF_16BE);
                System.out.println(result);
            }
        }
        ArrayList waterMarks = new ArrayList();
        for (Map.Entry entry : countMap.entrySet()) {
            waterMarks.add(entry.getKey());
        }
        for (PDPage page : pd.getPages()) {
            PDFStreamParser pdfsp = new PDFStreamParser((PDContentStream)page);
            pdfsp.parse();
            List tokens = pdfsp.getTokens();
            Iterator iterator = tokens.iterator();
            boolean find = false;
            while (iterator.hasNext()) {
                Object next = iterator.next();
                if (next instanceof Operator) {
                    Operator op = (Operator)next;
                    find = op.getName().equals("Tm");
                    continue;
                }
                if (!find || !(next instanceof COSString)) continue;
                for (String waterMark : waterMarks) {
                    if (!next.toString().equals(waterMark)) continue;
                    iterator.remove();
                }
            }
            PDStream updatedStream = new PDStream(pd);
            OutputStream out = updatedStream.createOutputStream();
            ContentStreamWriter tokenWriter = new ContentStreamWriter(out);
            tokenWriter.writeTokens(tokens);
            out.close();
            page.setContents(updatedStream);
        }
    }

    public static void removeTextWatermark(FileBytesInfo bytesInfo) {
        try (PDDocument pd = PDDocument.load((byte[])bytesInfo.getFileBytes());
             ByteArrayOutputStream outputStream = new ByteArrayOutputStream();){
            ReadPdfUtil.removeTextWatermark(pd);
            pd.save((OutputStream)outputStream);
            bytesInfo.setFileBytes(outputStream.toByteArray());
        }
        catch (Exception exception) {
            // empty catch block
        }
    }

    public static synchronized String qsy(FileBytesInfo bytesInfo) {
        String p = filePathStatic + File.separator + UUID.randomUUID().toString() + ".pdf";
        StringBuilder cmd = new StringBuilder("python ");
        cmd.append(shuiyinStatic).append(" ").append(bytesInfo.getRealPath()).append(" ").append(p);
        StringBuilder sb = new StringBuilder();
        try {
            Process proc = Runtime.getRuntime().exec(cmd.toString());
            BufferedReader in = new BufferedReader(StringUtils.isNotEmpty(charsetNameStatic) ? new InputStreamReader(proc.getInputStream(), charsetNameStatic) : new InputStreamReader(proc.getInputStream()));
            String line = null;
            while ((line = in.readLine()) != null) {
                sb.append(line);
            }
            in.close();
            proc.waitFor();
            return p;
        }
        catch (IOException | InterruptedException e) {
            e.printStackTrace();
            return bytesInfo.getRealPath();
        }
    }

    public static synchronized JSONObject getPDFTable(FileBytesInfo bytesInfo) {
        StringBuilder cmd = new StringBuilder("python ");
        cmd.append(pythonPathStatic).append(" ").append(ReadPdfUtil.qsy(bytesInfo));
        StringBuilder sb = new StringBuilder();
        try {
            Process proc = Runtime.getRuntime().exec(cmd.toString());
            BufferedReader in = new BufferedReader(StringUtils.isNotEmpty(charsetNameStatic) ? new InputStreamReader(proc.getInputStream(), charsetNameStatic) : new InputStreamReader(proc.getInputStream()));
            String line = null;
            while ((line = in.readLine()) != null) {
                sb.append(line);
            }
            in.close();
            proc.waitFor();
        }
        catch (IOException | InterruptedException e) {
            e.printStackTrace();
        }
        return JSONObject.parseObject((String)sb.toString());
    }

    private static void checkCellMap(TreeMap<Double, Double> cellMap) {
        Iterator<Map.Entry<Double, Double>> iterator = cellMap.entrySet().iterator();
        double index = 0.0;
        while (iterator.hasNext()) {
            Map.Entry<Double, Double> currentEntry = iterator.next();
            cellMap.put(currentEntry.getKey(), index);
            index += 1.0;
        }
    }

    public static void print(PDDocument document) throws IOException {
        PDFTextStripper stripper = new PDFTextStripper();
        stripper.setSortByPosition(true);
        for (PDPage page : document.getPages()) {
            PDFStreamParser pdfsp = new PDFStreamParser((PDContentStream)page);
            pdfsp.parse();
            List tokens = pdfsp.getTokens();
            System.out.println("page------------------------------");
            for (Object e : tokens) {
            }
            tokens.forEach(System.out::println);
        }
    }

    public static void main(String[] args) throws Exception {
        FileInputStream inputStream = new FileInputStream("D:\\linkunpeng\\Desktop\\\u667a\u80fd\u9884\u5ba1\\2.0\\\u6d4b\u8bd5\u6587\u4ef6\\\u6c34\u5370\u53bb\u9664\\ZW6O24080003\u5408\u540c.pdf");
        PDDocument pdDocument = PDDocument.load((InputStream)inputStream);
        ReadPdfUtil.print(pdDocument);
        pdDocument.close();
    }

    static {
        ALGORITHM = new SpreadsheetExtractionAlgorithm();
    }
}

