package com.yishuifengxiao.common.crawler;

import com.yishuifengxiao.common.crawler.content.ContentExtract;
import com.yishuifengxiao.common.crawler.content.ContentExtractDecorator;
import com.yishuifengxiao.common.crawler.domain.entity.CrawlerData;
import com.yishuifengxiao.common.crawler.domain.entity.Page;
import com.yishuifengxiao.common.crawler.domain.entity.Request;
import com.yishuifengxiao.common.crawler.downloader.Downloader;
import com.yishuifengxiao.common.crawler.link.LinkExtract;
import com.yishuifengxiao.common.crawler.link.LinkExtractDecorator;
import com.yishuifengxiao.common.crawler.utils.LocalCrawler;
import com.yishuifengxiao.common.crawler.utils.RegexFactory;
import com.yishuifengxiao.common.tool.exception.ServiceException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/yishuifengxiao/common/crawler/CrawlerWorker.class */
public class CrawlerWorker implements Runnable {
    private static final Logger log = LoggerFactory.getLogger(CrawlerWorker.class);
    private final CrawlerProcessor crawlerProcessor;
    private final Request request;
    private final LinkExtract linkExtract;
    private final ContentExtract contentExtract;
    private final Downloader downloader;

    public CrawlerWorker(Request request, Downloader downloader, CrawlerProcessor crawlerProcessor) {
        this.crawlerProcessor = crawlerProcessor;
        this.request = request;
        this.downloader = downloader;
        this.linkExtract = new LinkExtractDecorator(crawlerProcessor.crawler.linkExtract);
        this.contentExtract = new ContentExtractDecorator(crawlerProcessor.crawler.contentExtract);
    }

    @Override // java.lang.Runnable
    public void run() {
        Page down;
        try {
            try {
                LocalCrawler.put(this.crawlerProcessor.crawler);
                synchronized (CrawlerWorker.class) {
                    down = this.downloader.down(this.request);
                }
                if (down == null) {
                    throw new ServiceException(new StringBuffer("Web page (").append(this.request.getUrl()).append(" ) download results are empty").toString());
                }
                down.setRequest(this.request);
                log.debug("【id:{} , name:{} 】   The request {}  has been downloaded.The real path is {} ,and  downloaded content is {}", new Object[]{this.crawlerProcessor.crawler.getUuid(), this.crawlerProcessor.crawler.getName(), this.request.getUrl(), down.getRedirectUrl(), down});
                this.crawlerProcessor.crawler.crawlerListener.onDownSuccess(this.crawlerProcessor.crawler, down);
                if (!intercepCheck(down)) {
                    processRequest(down);
                }
                LocalCrawler.clear();
                this.crawlerProcessor.signalNewUrl();
            } catch (Exception e) {
                this.crawlerProcessor.crawler.crawlerListener.onDownError(this.crawlerProcessor.crawler, null, e);
                log.info("【id:{} , name:{} 】  There was a problem requesting {} to download. The cause of the problem is {}", new Object[]{this.crawlerProcessor.crawler.getUuid(), this.crawlerProcessor.crawler.getName(), this.request, e});
                LocalCrawler.clear();
                this.crawlerProcessor.signalNewUrl();
            }
        } catch (Throwable th) {
            LocalCrawler.clear();
            this.crawlerProcessor.signalNewUrl();
            throw th;
        }
    }

    private boolean intercepCheck(Page page) {
        boolean z = false;
        if (this.crawlerProcessor.crawler.getCrawlerRule().getSite().statCheck()) {
            z = RegexFactory.find(this.crawlerProcessor.crawler.getCrawlerRule().getSite().getFailureMark(), page.getRawTxt());
            if (z) {
                log.debug("【id:{} , name:{} 】  The crawler was detected by the server for the {} time", new Object[]{this.crawlerProcessor.crawler.getUuid(), this.crawlerProcessor.crawler.getName(), Long.valueOf(this.crawlerProcessor.incrementAndGet())});
            } else {
                this.crawlerProcessor.clearInterceptCount();
            }
        }
        return z;
    }

    private void processRequest(Page page) {
        try {
            this.linkExtract.extract(this.crawlerProcessor.crawler.getCrawlerRule().getLink(), page);
            page.getLinks().stream().map(str -> {
                return new Request(str, page.getRequest().getUrl(), page.getRequest().getDepth() + 1);
            }).forEach(request -> {
                this.crawlerProcessor.crawler.scheduler.push(this.crawlerProcessor.crawler, request);
            });
            this.contentExtract.extract(this.crawlerProcessor.crawler.getCrawlerRule().getContent(), this.crawlerProcessor.crawler.getCrawlerRule().getRules(), page);
            output(page);
            this.crawlerProcessor.incrementTaskCount();
            this.crawlerProcessor.crawler.crawlerListener.onExtractSuccess(this.crawlerProcessor.crawler, page);
        } catch (Exception e) {
            this.crawlerProcessor.incrementFailCount();
            this.crawlerProcessor.crawler.crawlerListener.onExtractError(this.crawlerProcessor.crawler, page, e);
            log.info("【id:{} , name:{} 】  There was a problem requesting {} to extract. The cause of the problem is {}", new Object[]{this.crawlerProcessor.crawler.getUuid(), this.crawlerProcessor.crawler.getName(), this.request, e});
        } finally {
            this.crawlerProcessor.signalNewUrl();
        }
    }

    private void output(Page page) throws ServiceException {
        if (page.isSkip()) {
            return;
        }
        this.crawlerProcessor.crawler.pipeline.recieve(new CrawlerData().setRawTxt(page.getRawTxt()).setData(page.getData()).setRequest(page.getRequest()).setRedirectUrl(page.getRedirectUrl()).setTask(this.crawlerProcessor.crawler));
    }
}
