package com.yishuifengxiao.common.crawler.content;

import com.yishuifengxiao.common.crawler.content.impl.SimpleContentExtract;
import com.yishuifengxiao.common.crawler.content.matcher.ContentMatcher;
import com.yishuifengxiao.common.crawler.content.matcher.SimpleContentMatcher;
import com.yishuifengxiao.common.crawler.domain.entity.Page;
import com.yishuifengxiao.common.crawler.domain.model.ContentRule;
import com.yishuifengxiao.common.crawler.domain.model.ExtractRule;
import com.yishuifengxiao.common.crawler.extractor.ExtractorFactory;
import com.yishuifengxiao.common.crawler.extractor.content.ContentExtractor;
import com.yishuifengxiao.common.crawler.extractor.content.impl.CharsetContentExtractor;
import com.yishuifengxiao.common.crawler.extractor.content.impl.DescpContentExtractor;
import com.yishuifengxiao.common.crawler.extractor.content.impl.KeywordContentExtractor;
import com.yishuifengxiao.common.crawler.extractor.content.impl.TitleContentExtractor;
import com.yishuifengxiao.common.crawler.macther.MatcherFactory;
import com.yishuifengxiao.common.crawler.utils.LocalCrawler;
import com.yishuifengxiao.common.tool.exception.ServiceException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/yishuifengxiao/common/crawler/content/ContentExtractDecorator.class */
public class ContentExtractDecorator implements ContentExtract {
    private static final Logger log = LoggerFactory.getLogger(ContentExtractDecorator.class);
    private final ExtractorFactory factory = new ExtractorFactory();
    protected ContentMatcher contentMatcher = new SimpleContentMatcher();
    private MatcherFactory matcherFactory = new MatcherFactory();
    protected ContentExtract contentExtract;

    @Override // com.yishuifengxiao.common.crawler.content.ContentExtract
    public synchronized void extract(ContentRule contentRule, List<ExtractRule> list, Page page) throws ServiceException {
        if (null == page) {
            return;
        }
        if (200 != page.getCode()) {
            page.setSkip(true);
            Logger logger = log;
            Object[] objArr = new Object[4];
            objArr[0] = LocalCrawler.get() != null ? LocalCrawler.get().getUuid() : "test";
            objArr[1] = LocalCrawler.get() != null ? LocalCrawler.get().getName() : "test";
            objArr[2] = page.getRequest().getUrl();
            objArr[3] = Integer.valueOf(page.getCode());
            logger.debug("【id:{} , name:{} 】 The actual address of the page corresponding to request {} is 【 {} 】,and it has a response code of {} and will not extract data from it", objArr);
            return;
        }
        boolean match = this.matcherFactory.getMatcher(contentRule.getContentPageRule()).match(null != page.getRedirectUrl() ? page.getRedirectUrl() : page.getRequest().getUrl());
        if (match) {
            match = this.contentMatcher.match(contentRule.getPageRule(), page.getRawTxt());
        }
        Logger logger2 = log;
        Object[] objArr2 = new Object[5];
        objArr2[0] = LocalCrawler.get() != null ? LocalCrawler.get().getUuid() : "test";
        objArr2[1] = LocalCrawler.get() != null ? LocalCrawler.get().getName() : "test";
        objArr2[2] = page.getRequest().getUrl();
        objArr2[3] = page.getRedirectUrl();
        objArr2[4] = Boolean.valueOf(match);
        logger2.debug("【id:{} , name:{} 】 The actual address of the page corresponding to request {} is 【 {} 】, and the matching result of whether the content page address is satisfied is {}", objArr2);
        page.setSkip(!match);
        if (match) {
            extract(list, contentRule, page);
        }
        Logger logger3 = log;
        Object[] objArr3 = new Object[5];
        objArr3[0] = LocalCrawler.get() != null ? LocalCrawler.get().getUuid() : "test";
        objArr3[1] = LocalCrawler.get() != null ? LocalCrawler.get().getName() : "test";
        objArr3[2] = page.getRequest().getUrl();
        objArr3[3] = page.getRedirectUrl();
        objArr3[4] = page.getData();
        logger3.debug("【id:{} , name:{} 】 The actual address of request {} is 【{}】, and the extracted data is {}", objArr3);
    }

    private void extract(List<ExtractRule> list, ContentRule contentRule, Page page) throws ServiceException {
        new SimpleContentExtract(createContentExtractors(list)).extract(contentRule, list, page);
        if (null != this.contentExtract) {
            synchronized (ContentExtractDecorator.class) {
                this.contentExtract.extract(contentRule, list, page);
            }
        }
    }

    private List<ContentExtractor> createContentExtractors(List<ExtractRule> list) {
        List<ContentExtractor> buildContentExtractor = buildContentExtractor(list);
        buildContentExtractor.addAll(Arrays.asList(new DescpContentExtractor(), new KeywordContentExtractor(), new TitleContentExtractor(), new CharsetContentExtractor()));
        return buildContentExtractor;
    }

    private List<ContentExtractor> buildContentExtractor(List<ExtractRule> list) {
        ArrayList arrayList = new ArrayList();
        Stream<ExtractRule> stream = list.stream();
        ExtractorFactory extractorFactory = this.factory;
        extractorFactory.getClass();
        Stream<R> map = stream.map(extractorFactory::getContentExtractor);
        arrayList.getClass();
        map.forEach((v1) -> {
            r1.add(v1);
        });
        return (List) arrayList.stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).collect(Collectors.toList());
    }

    public ContentExtractDecorator(ContentExtract contentExtract) {
        this.contentExtract = contentExtract;
    }
}
