package com.jxdinfo.idp.icpac.utils.sentence;

import com.jxdinfo.idp.common.exception.BusinessException;
import com.jxdinfo.idp.icpac.common.entity.FileBytesInfo;
import com.jxdinfo.idp.icpac.common.util.FileUtils;
import com.jxdinfo.idp.icpac.docexamine.entity.location.SentenceContent;
import com.jxdinfo.idp.icpac.utils.entity.word.WordChapterInfo;
import com.jxdinfo.idp.icpac.utils.entity.word.WordParagraphInfo;
import com.jxdinfo.idp.icpac.utils.entity.word.WordStructureInfo;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/jxdinfo/idp/icpac/utils/sentence/ReadAsSentencesUtil.class */
public class ReadAsSentencesUtil {
    private static final Logger log = LoggerFactory.getLogger(ReadAsSentencesUtil.class);
    private static final String[] SYMBOL_SPLIT = {"\\t", "。", "？", "\\?", ";", "；", "\\n", "!", "！"};

    /* JADX WARN: Finally extract failed */
    public static List<SentenceContent> readSentences(FileBytesInfo fileBytesInfo, Integer num) {
        try {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(fileBytesInfo.getFileBytes());
            ArrayList arrayList = new ArrayList();
            if (fileBytesInfo.getFileName().endsWith(".docx")) {
                XWPFDocument xWPFDocument = new XWPFDocument(byteArrayInputStream);
                Throwable th = null;
                try {
                    Iterator it = xWPFDocument.getParagraphs().iterator();
                    while (it.hasNext()) {
                        arrayList.add(((XWPFParagraph) it.next()).getText());
                    }
                    if (xWPFDocument != null) {
                        if (0 != 0) {
                            try {
                                xWPFDocument.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            xWPFDocument.close();
                        }
                    }
                } catch (Throwable th3) {
                    if (xWPFDocument != null) {
                        if (0 != 0) {
                            try {
                                xWPFDocument.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            xWPFDocument.close();
                        }
                    }
                    throw th3;
                }
            }
            if (fileBytesInfo.getFileName().endsWith(".doc") || fileBytesInfo.getFileName().endsWith(".wps")) {
                HWPFDocument hWPFDocument = new HWPFDocument(byteArrayInputStream);
                Throwable th5 = null;
                try {
                    try {
                        Range range = hWPFDocument.getRange();
                        for (int i = 0; i < range.numParagraphs(); i++) {
                            arrayList.add(range.getParagraph(i).text());
                        }
                        if (hWPFDocument != null) {
                            if (0 != 0) {
                                try {
                                    hWPFDocument.close();
                                } catch (Throwable th6) {
                                    th5.addSuppressed(th6);
                                }
                            } else {
                                hWPFDocument.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            }
            ArrayList arrayList2 = new ArrayList();
            int i2 = 0;
            for (int i3 = 0; i3 < arrayList.size(); i3++) {
                String str = (String) arrayList.get(i3);
                if (!StringUtils.isEmpty(str)) {
                    for (String str2 : str.split((String) Arrays.stream(SYMBOL_SPLIT).collect(Collectors.joining("|")))) {
                        SentenceContent sentenceContent = new SentenceContent();
                        if (!StringUtils.isEmpty(str2) && str2.replaceAll(" ", "").length() > num.intValue()) {
                            sentenceContent.setSentenceIndex(Integer.valueOf(i2));
                            i2++;
                            sentenceContent.setSentenceStr(str2);
                            sentenceContent.setTextLength(Integer.valueOf(str2.length()));
                            sentenceContent.setParagraphIndex(Integer.valueOf(i3));
                            sentenceContent.setTextIndex(Integer.valueOf(str.indexOf(str2)));
                            arrayList2.add(sentenceContent);
                        }
                    }
                }
            }
            return arrayList2;
        } catch (BusinessException | IOException e) {
            log.error("按句读取文章失败:", e);
            return Collections.emptyList();
        }
    }

    public static Map<Integer, SentenceContent> readSentencesForMap(FileBytesInfo fileBytesInfo, Integer num) {
        return (Map) readSentences(fileBytesInfo, num).stream().collect(Collectors.toMap((v0) -> {
            return v0.getSentenceIndex();
        }, sentenceContent -> {
            return sentenceContent;
        }));
    }

    public static Map<String, SentenceContent> readSentencesForMap(WordStructureInfo wordStructureInfo) {
        String str = (String) Arrays.stream(SYMBOL_SPLIT).collect(Collectors.joining("|"));
        HashMap hashMap = new HashMap();
        for (WordChapterInfo wordChapterInfo : wordStructureInfo.getChapterInfos()) {
            String titleName = wordChapterInfo.getTitleName();
            int i = 0;
            List<WordParagraphInfo> pInfoList = wordChapterInfo.getPInfoList();
            if (!CollectionUtils.isEmpty(pInfoList)) {
                for (WordParagraphInfo wordParagraphInfo : pInfoList) {
                    Integer index = wordParagraphInfo.getIndex();
                    String text = wordParagraphInfo.getText();
                    if (!StringUtils.isEmpty(text)) {
                        for (String str2 : text.split(str)) {
                            String replaceAll = str2.trim().replaceAll("\n", "");
                            if (StringUtils.isNotEmpty(titleName)) {
                                SentenceContent sentenceContent = new SentenceContent();
                                sentenceContent.setSentenceIndex(Integer.valueOf(i));
                                sentenceContent.setSentenceStr(replaceAll);
                                sentenceContent.setTextLength(Integer.valueOf(replaceAll.length()));
                                sentenceContent.setParagraphIndex(index);
                                sentenceContent.setTextIndex(Integer.valueOf(text.indexOf(replaceAll)));
                                hashMap.put(titleName + i, sentenceContent);
                                i++;
                            }
                        }
                    }
                }
            }
        }
        int i2 = 0;
        for (WordParagraphInfo wordParagraphInfo2 : wordStructureInfo.getPInfoList()) {
            Integer index2 = wordParagraphInfo2.getIndex();
            String text2 = wordParagraphInfo2.getText();
            if (!StringUtils.isEmpty(text2)) {
                for (String str3 : text2.split(str)) {
                    String replaceAll2 = str3.trim().replaceAll("\n", "");
                    if (!StringUtils.isEmpty(replaceAll2)) {
                        SentenceContent sentenceContent2 = new SentenceContent();
                        sentenceContent2.setSentenceIndex(Integer.valueOf(i2));
                        sentenceContent2.setSentenceStr(replaceAll2);
                        sentenceContent2.setTextLength(Integer.valueOf(replaceAll2.length()));
                        sentenceContent2.setParagraphIndex(index2);
                        sentenceContent2.setTextIndex(Integer.valueOf(text2.indexOf(replaceAll2)));
                        hashMap.put("" + i2, sentenceContent2);
                        i2++;
                    }
                }
            }
        }
        return hashMap;
    }

    public static Map<Integer, SentenceContent> readSentencesForMap(String str, Integer num) {
        if (StringUtils.isEmpty(str)) {
            return new LinkedHashMap();
        }
        FileBytesInfo fileBytesInfo = new FileBytesInfo();
        try {
            fileBytesInfo = FileUtils.getBytesInfo(Long.valueOf(Long.parseLong(str)));
        } catch (Exception e) {
            log.error("根据文档id获取文件失败", e);
        }
        return readSentencesForMap(fileBytesInfo, num);
    }

    public static List<String> readParagraphs(FileBytesInfo fileBytesInfo) {
        try {
            ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(fileBytesInfo.getFileBytes());
            ArrayList arrayList = new ArrayList();
            if (fileBytesInfo.getFileName().endsWith(".docx")) {
                XWPFDocument xWPFDocument = new XWPFDocument(byteArrayInputStream);
                Throwable th = null;
                try {
                    try {
                        Iterator it = xWPFDocument.getParagraphs().iterator();
                        while (it.hasNext()) {
                            arrayList.add(((XWPFParagraph) it.next()).getText());
                        }
                        if (xWPFDocument != null) {
                            if (0 != 0) {
                                try {
                                    xWPFDocument.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                xWPFDocument.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            }
            if (fileBytesInfo.getFileName().endsWith(".doc") || fileBytesInfo.getFileName().endsWith(".wps")) {
                HWPFDocument hWPFDocument = new HWPFDocument(byteArrayInputStream);
                Throwable th3 = null;
                try {
                    try {
                        Range range = hWPFDocument.getRange();
                        for (int i = 0; i < range.numParagraphs(); i++) {
                            arrayList.add(range.getParagraph(i).text());
                        }
                        if (hWPFDocument != null) {
                            if (0 != 0) {
                                try {
                                    hWPFDocument.close();
                                } catch (Throwable th4) {
                                    th3.addSuppressed(th4);
                                }
                            } else {
                                hWPFDocument.close();
                            }
                        }
                    } finally {
                    }
                } finally {
                }
            }
            return arrayList;
        } catch (BusinessException | IOException e) {
            log.error("按段落读取文章失败:", e);
            return new ArrayList();
        }
    }
}
