package com.jxdinfo.idp.icpac.core.dochandler.impl;

import com.jxdinfo.idp.common.entity.util.docparse.word.WordChapterInfo;
import com.jxdinfo.idp.common.entity.util.docparse.word.WordInfo;
import com.jxdinfo.idp.common.entity.util.docparse.word.WordParagraphInfo;
import com.jxdinfo.idp.icpac.core.dochandler.DocumentHandler;
import com.jxdinfo.idp.icpac.core.entity.DuplicateCheckCandidateSentence;
import com.jxdinfo.idp.icpac.core.entity.DuplicateCheckInfo;
import com.jxdinfo.idp.icpac.core.entity.PDFContrastRequest;
import com.jxdinfo.idp.icpac.core.handler.SentenceHandler;
import com.jxdinfo.idp.icpac.core.rmi.feign.MutilFormatRestorationFeignHttp;
import com.jxdinfo.idp.icpac.service.DuplicateCheckDocService;
import com.jxdinfo.idp.icpac.utils.FilenameUtils;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.mock.web.MockMultipartFile;
import org.springframework.web.multipart.MultipartFile;

/* loaded from: input_file:com/jxdinfo/idp/icpac/core/dochandler/impl/PdfDocumentHandler.class */
public class PdfDocumentHandler implements DocumentHandler {

    @Autowired
    private MutilFormatRestorationFeignHttp iMutilFormatRestorationFeignHttp;
    private final Map<String, PDFChapter> chapterCollection = new HashMap();
    private static final Logger log = LoggerFactory.getLogger(PdfDocumentHandler.class);
    private static final Pattern CHAPTER_PATTERN = Pattern.compile("^((\\d+\\.?)+)");
    private static final Pattern CAESURA_NOISE_PATTERN = Pattern.compile("^((\\d+、)+)");
    private static final Pattern MONTH_NOISE_PATTERN = Pattern.compile("^(\\d{4}[年/-]\\d{1,2})[月/-]\\d{1,2}日?");

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:com/jxdinfo/idp/icpac/core/dochandler/impl/PdfDocumentHandler$PDFChapter.class */
    public class PDFChapter {
        private String chapterStr;
        private PDFChapter parent;
        private int level;
        private WordChapterInfo chapterInfo;

        public PDFChapter(String str, PDFChapter pDFChapter, int i) {
            this.level = 0;
            this.chapterStr = str;
            this.parent = pDFChapter;
            this.level = i;
        }

        public String getChapterStr() {
            return this.chapterStr;
        }

        public PDFChapter getParent() {
            return this.parent;
        }

        public int getLevel() {
            return this.level;
        }

        public WordChapterInfo getChapterInfo() {
            return this.chapterInfo;
        }

        public void setChapterStr(String str) {
            this.chapterStr = str;
        }

        public void setParent(PDFChapter pDFChapter) {
            this.parent = pDFChapter;
        }

        public void setLevel(int i) {
            this.level = i;
        }

        public void setChapterInfo(WordChapterInfo wordChapterInfo) {
            this.chapterInfo = wordChapterInfo;
        }

        public boolean equals(Object obj) {
            if (obj == this) {
                return true;
            }
            if (!(obj instanceof PDFChapter)) {
                return false;
            }
            PDFChapter pDFChapter = (PDFChapter) obj;
            if (!pDFChapter.canEqual(this) || getLevel() != pDFChapter.getLevel()) {
                return false;
            }
            String chapterStr = getChapterStr();
            String chapterStr2 = pDFChapter.getChapterStr();
            if (chapterStr == null) {
                if (chapterStr2 != null) {
                    return false;
                }
            } else if (!chapterStr.equals(chapterStr2)) {
                return false;
            }
            PDFChapter parent = getParent();
            PDFChapter parent2 = pDFChapter.getParent();
            if (parent == null) {
                if (parent2 != null) {
                    return false;
                }
            } else if (!parent.equals(parent2)) {
                return false;
            }
            WordChapterInfo chapterInfo = getChapterInfo();
            WordChapterInfo chapterInfo2 = pDFChapter.getChapterInfo();
            return chapterInfo == null ? chapterInfo2 == null : chapterInfo.equals(chapterInfo2);
        }

        protected boolean canEqual(Object obj) {
            return obj instanceof PDFChapter;
        }

        public int hashCode() {
            int level = (1 * 59) + getLevel();
            String chapterStr = getChapterStr();
            int hashCode = (level * 59) + (chapterStr == null ? 43 : chapterStr.hashCode());
            PDFChapter parent = getParent();
            int hashCode2 = (hashCode * 59) + (parent == null ? 43 : parent.hashCode());
            WordChapterInfo chapterInfo = getChapterInfo();
            return (hashCode2 * 59) + (chapterInfo == null ? 43 : chapterInfo.hashCode());
        }

        public String toString() {
            return "PdfDocumentHandler.PDFChapter(chapterStr=" + getChapterStr() + ", parent=" + getParent() + ", level=" + getLevel() + ", chapterInfo=" + getChapterInfo() + ")";
        }

        public PDFChapter(String str, PDFChapter pDFChapter, int i, WordChapterInfo wordChapterInfo) {
            this.level = 0;
            this.chapterStr = str;
            this.parent = pDFChapter;
            this.level = i;
            this.chapterInfo = wordChapterInfo;
        }
    }

    @Override // com.jxdinfo.idp.icpac.core.dochandler.DocumentHandler
    public List<DuplicateCheckCandidateSentence> handleDoc(DuplicateCheckInfo duplicateCheckInfo) throws Exception {
        MultipartFile file = duplicateCheckInfo.getFile();
        if (!canParsePdf(file)) {
            log.warn("当前的pdf：'{}' 不是电子版 而是扫描件", duplicateCheckInfo.getFile().getOriginalFilename());
            return new ArrayList();
        }
        parseBookMark(file);
        WordInfo parseDocx = parseDocx(pdf2Word(file));
        parseDocx.setChapterInfos(assembleChapter(parseDocx.getElementInfos(), new HashMap()));
        SentenceHandler sentenceHandler = duplicateCheckInfo.getSentenceHandler();
        if (sentenceHandler == null) {
            throw new RuntimeException("给定的自定义的句子处理器为空，没有办法获取自定义的句子处理器");
        }
        return sentenceHandler.handleSentence(parseDocx.getChapterInfos(), duplicateCheckInfo);
    }

    private void parseBookMark(MultipartFile multipartFile) throws IOException {
    }

    private void printBookmark(PDOutlineItem pDOutlineItem, int i, PDFChapter pDFChapter) throws IOException {
        String title = pDOutlineItem.getTitle();
        PDFChapter pDFChapter2 = new PDFChapter(title.replaceAll(" ", "").trim(), pDFChapter, i);
        this.chapterCollection.put(title, pDFChapter2);
        Iterable children = pDOutlineItem.children();
        if (children != null) {
            Iterator it = children.iterator();
            while (it.hasNext()) {
                printBookmark((PDOutlineItem) it.next(), i + 1, pDFChapter2);
            }
        }
    }

    private boolean canParsePdf(MultipartFile multipartFile) throws IOException {
        return true;
    }

    private WordInfo parseDocx(MultipartFile multipartFile) {
        XWPFDocument xWPFDocument;
        Throwable th;
        WordInfo wordInfo = new WordInfo();
        try {
            xWPFDocument = new XWPFDocument(new ByteArrayInputStream(multipartFile.getBytes()));
            th = null;
        } catch (IOException e) {
            log.error("docx文档+【" + multipartFile.getOriginalFilename() + "】加载异常！");
        }
        try {
            try {
                ArrayList arrayList = new ArrayList();
                wordInfo.setElementInfos(arrayList);
                int i = -1;
                int i2 = -1;
                Iterator it = xWPFDocument.getParagraphs().iterator();
                while (it.hasNext()) {
                    String paragraphText = ((XWPFParagraph) it.next()).getParagraphText();
                    if (!StringUtils.isEmpty(paragraphText)) {
                        WordParagraphInfo wordParagraphInfo = new WordParagraphInfo();
                        arrayList.add(wordParagraphInfo);
                        i++;
                        wordParagraphInfo.setPoiIndex(Integer.valueOf(i));
                        wordParagraphInfo.setText(paragraphText);
                        i2++;
                        wordParagraphInfo.setIndex(Integer.valueOf(i2));
                    }
                }
                if (xWPFDocument != null) {
                    if (0 != 0) {
                        try {
                            xWPFDocument.close();
                        } catch (Throwable th2) {
                            th.addSuppressed(th2);
                        }
                    } else {
                        xWPFDocument.close();
                    }
                }
                return wordInfo;
            } finally {
            }
        } finally {
        }
    }

    /* JADX WARN: Removed duplicated region for block: B:23:0x01a4 A[SYNTHETIC] */
    /* JADX WARN: Removed duplicated region for block: B:26:0x017a A[SYNTHETIC] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private java.util.List<com.jxdinfo.idp.common.entity.util.docparse.word.WordChapterInfo> assembleChapter(java.util.List<com.jxdinfo.idp.common.entity.util.docparse.word.WordElementInfo> r7, java.util.Map<java.lang.Integer, java.util.Set<java.lang.String>> r8) {
        /*
            Method dump skipped, instructions count: 432
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: com.jxdinfo.idp.icpac.core.dochandler.impl.PdfDocumentHandler.assembleChapter(java.util.List, java.util.Map):java.util.List");
    }

    private boolean isChapter(String str, WordParagraphInfo wordParagraphInfo, WordChapterInfo wordChapterInfo, Map<Integer, Set<String>> map) {
        if (StringUtils.isEmpty(str)) {
            return false;
        }
        String text = wordParagraphInfo.getText();
        if (CAESURA_NOISE_PATTERN.matcher(text).find() || MONTH_NOISE_PATTERN.matcher(text).find()) {
            return false;
        }
        String[] split = str.split("\\.");
        int length = split.length;
        Set<String> set = map.get(Integer.valueOf(length));
        if (CollectionUtils.isNotEmpty(set)) {
            return set.contains(wordParagraphInfo.getText().replaceAll(str, "").trim());
        }
        if (wordChapterInfo == null) {
            return true;
        }
        Matcher matcher = CHAPTER_PATTERN.matcher(wordChapterInfo.getTitleName());
        String[] split2 = (matcher.find() ? matcher.group() : "").split("\\.");
        int i = 0;
        int i2 = 0;
        int length2 = split2.length;
        while (i < length && i2 < length2) {
            try {
                String str2 = split[i];
                String str3 = split2[i2];
                int parseInt = Integer.parseInt(str2);
                int parseInt2 = Integer.parseInt(str3);
                if (parseInt != parseInt2 && parseInt - parseInt2 != 1) {
                    return false;
                }
                i++;
                i2++;
            } catch (NumberFormatException e) {
                return false;
            }
        }
        return i == length || i2 == split2.length;
    }

    private MultipartFile pdf2Word(MultipartFile multipartFile) throws Exception {
        log.info("开始pdf转word操作");
        String originalFilename = multipartFile.getOriginalFilename();
        PDFContrastRequest pDFContrastRequest = new PDFContrastRequest();
        pDFContrastRequest.setFile(multipartFile);
        pDFContrastRequest.setFile_type("PDF");
        pDFContrastRequest.setFilter_header_footer(DuplicateCheckDocService.HISTORY_KY);
        pDFContrastRequest.setMerge_cross_paraph(DuplicateCheckDocService.HISTORY_KY);
        pDFContrastRequest.setFilter_catalog(DuplicateCheckDocService.HISTORY_KY);
        byte[] bArr = (byte[]) this.iMutilFormatRestorationFeignHttp.turnPDFToWord(pDFContrastRequest).getData();
        if (bArr == null || !isSuccess(bArr)) {
            throw new RuntimeException(originalFilename + "转word出现异常:" + (bArr != null ? new String(bArr, StandardCharsets.UTF_8) : "查重服务给定的pdf转word为null"));
        }
        String filePrefix = FilenameUtils.getFilePrefix(originalFilename);
        log.info("开始pdf转word操作结束");
        return new MockMultipartFile(filePrefix + ".docx", filePrefix + ".docx", (String) null, bArr);
    }

    private boolean isSuccess(byte[] bArr) {
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 4; i++) {
            sb.append(String.format("%02x", Byte.valueOf(bArr[i])));
        }
        return "504B0304".equalsIgnoreCase(sb.toString());
    }
}
