package com.yishuifengxiao.common.crawler.simulator;

import com.yishuifengxiao.common.crawler.content.ContentExtractDecorator;
import com.yishuifengxiao.common.crawler.content.matcher.ContentMatcher;
import com.yishuifengxiao.common.crawler.content.matcher.SimpleContentMatcher;
import com.yishuifengxiao.common.crawler.domain.entity.Page;
import com.yishuifengxiao.common.crawler.domain.entity.Request;
import com.yishuifengxiao.common.crawler.domain.entity.SimulatorData;
import com.yishuifengxiao.common.crawler.domain.model.ContentRule;
import com.yishuifengxiao.common.crawler.domain.model.ExtractRule;
import com.yishuifengxiao.common.crawler.domain.model.LinkRule;
import com.yishuifengxiao.common.crawler.domain.model.MatcherRule;
import com.yishuifengxiao.common.crawler.domain.model.PageRule;
import com.yishuifengxiao.common.crawler.domain.model.SiteRule;
import com.yishuifengxiao.common.crawler.downloader.Downloader;
import com.yishuifengxiao.common.crawler.downloader.impl.SimpleDownloader;
import com.yishuifengxiao.common.crawler.link.LinkExtractDecorator;
import com.yishuifengxiao.common.crawler.macther.MatcherFactory;
import com.yishuifengxiao.common.crawler.scheduler.request.RequestCreater;
import com.yishuifengxiao.common.crawler.scheduler.request.SimpleRequestCreater;
import com.yishuifengxiao.common.crawler.utils.LinkUtils;
import com.yishuifengxiao.common.tool.exception.CustomException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;

/* loaded from: input_file:com/yishuifengxiao/common/crawler/simulator/SimpleSimulator.class */
public class SimpleSimulator implements Simulator {
    private MatcherFactory matcherFactory = new MatcherFactory();
    protected ContentMatcher contentMatcher = new SimpleContentMatcher();
    private RequestCreater requestCreater = new SimpleRequestCreater();

    @Override // com.yishuifengxiao.common.crawler.simulator.Simulator
    public SimulatorData down(String str, SiteRule siteRule, Downloader downloader) {
        SimulatorData simulatorData;
        try {
            Page download = download(siteRule, str, downloader);
            simulatorData = (null == download || null == download.getRawTxt()) ? new SimulatorData(false, "下载的结果为空", null) : new SimulatorData(true, "下载成功", download.getRawTxt());
        } catch (Exception e) {
            simulatorData = new SimulatorData(false, "下载失败", e.getMessage());
        }
        return simulatorData;
    }

    @Override // com.yishuifengxiao.common.crawler.simulator.Simulator
    public SimulatorData link(String str, SiteRule siteRule, LinkRule linkRule, Downloader downloader) {
        SimulatorData simulatorData;
        try {
            check(linkRule);
            Page download = download(siteRule, str, downloader);
            new LinkExtractDecorator(null).extract(linkRule, download);
            simulatorData = new SimulatorData(true, "链接解析成功", download.getLinks());
        } catch (Exception e) {
            simulatorData = new SimulatorData(false, "链接解析失败", e.getMessage());
        }
        return simulatorData;
    }

    @Override // com.yishuifengxiao.common.crawler.simulator.Simulator
    public SimulatorData match(String str, SiteRule siteRule, ContentRule contentRule, Downloader downloader) {
        SimulatorData simulatorData;
        Page download;
        try {
            download = download(siteRule, str, downloader);
        } catch (Exception e) {
            simulatorData = new SimulatorData(false, "匹配过程出现异常", e.getMessage());
        }
        if (null == download || null == download.getRawTxt()) {
            return new SimulatorData(false, "下载结果为空", null);
        }
        if (null == contentRule) {
            simulatorData = new SimulatorData(true, "匹配通过", "匹配规则为空时直接通过");
        } else if (this.matcherFactory.getMatcher(contentRule.getContentPageRule()).match(str)) {
            boolean match = this.contentMatcher.match(contentRule.getPageRule(), download.getRawTxt());
            simulatorData = new SimulatorData(Boolean.valueOf(match), match ? "匹配通过" : "匹配失败", match ? "匹配通过" : "匹配失败");
        } else {
            simulatorData = new SimulatorData(true, "匹配不通过", "目标地址不符合内容页地址规则");
        }
        return simulatorData;
    }

    @Override // com.yishuifengxiao.common.crawler.simulator.Simulator
    public SimulatorData extract(String str, SiteRule siteRule, ExtractRule extractRule, Downloader downloader) {
        SimulatorData simulatorData;
        try {
            check(extractRule);
            Page download = download(siteRule, str, downloader);
            new ContentExtractDecorator(null).extract(new ContentRule(new MatcherRule(), new PageRule()), Arrays.asList(extractRule), download);
            simulatorData = new SimulatorData(true, "提取成功", download.getData(extractRule.getCode()));
        } catch (Exception e) {
            simulatorData = new SimulatorData(false, "提取失败", e.getMessage());
        }
        return simulatorData;
    }

    private Page download(SiteRule siteRule, String str, Downloader downloader) throws Exception {
        if (StringUtils.isBlank(str)) {
            throw new Exception("测试网址不能为空");
        }
        if (!LinkUtils.matchHttpRequest(str)) {
            throw new Exception("请输入正确的测试地址");
        }
        Page down = (null != downloader ? downloader : new SimpleDownloader()).down(this.requestCreater.create(siteRule == null ? new SiteRule().setHeaders(new ArrayList()) : siteRule, new Request(str, str)));
        if (null == down) {
            throw new Exception("下载失败");
        }
        if (200 != down.getCode()) {
            throw new Exception(down.getRawTxt());
        }
        return down;
    }

    private void check(LinkRule linkRule) throws Exception {
        if (linkRule == null) {
            throw new Exception("链接提取规则不能为空");
        }
        if (linkRule.getStartUrl() == null || "".equals(linkRule.getStartUrl())) {
            throw new Exception("起始链接链接提取规则不能为空");
        }
        if (null == linkRule.getRules() || linkRule.getRules().size() == 0) {
            throw new Exception("请至少配置一个链接提取规则");
        }
    }

    private void check(ExtractRule extractRule) throws CustomException {
        if (extractRule == null) {
            throw new CustomException("提取规则不能为空");
        }
        if (StringUtils.isBlank(extractRule.getName()) || extractRule.getRules() == null || extractRule.getRules().isEmpty()) {
            throw new CustomException("请配置正确的内容提取规则");
        }
        List list = (List) extractRule.getRules().stream().filter(extractFieldRule -> {
            return extractFieldRule.getRule() != null;
        }).collect(Collectors.toList());
        if (list == null || list.isEmpty()) {
            throw new CustomException("请至少配置一个正确的属性提取规则");
        }
    }
}
