package com.yishuifengxiao.common.crawler;

import com.yishuifengxiao.common.crawler.domain.constant.SiteConstant;
import com.yishuifengxiao.common.crawler.domain.entity.CrawlerRule;
import com.yishuifengxiao.common.crawler.domain.eunm.Pattern;
import com.yishuifengxiao.common.crawler.domain.eunm.Type;
import com.yishuifengxiao.common.crawler.domain.model.ContentRule;
import com.yishuifengxiao.common.crawler.domain.model.ExtractFieldRule;
import com.yishuifengxiao.common.crawler.domain.model.ExtractRule;
import com.yishuifengxiao.common.crawler.domain.model.HeaderRule;
import com.yishuifengxiao.common.crawler.domain.model.LinkRule;
import com.yishuifengxiao.common.crawler.domain.model.MatcherRule;
import com.yishuifengxiao.common.crawler.domain.model.PageRule;
import com.yishuifengxiao.common.crawler.domain.model.SiteRule;
import com.yishuifengxiao.common.crawler.utils.LinkUtils;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.springframework.util.Assert;

/* loaded from: input_file:com/yishuifengxiao/common/crawler/CrawlerBuilder.class */
public class CrawlerBuilder {
    private String userAgent;
    private String referrer;
    private String cookieValue;
    private String failureMark;
    private String cookieSpec;
    private String startUrl;
    private MatcherRule contentPageRule;
    private Type matcherType;
    private String matcherPattern;
    private String matcherTarget;
    private Long interval = 10000L;
    private Long waitTime = Long.valueOf(SiteConstant.WAIT_TIME_FOR_CLOSE);
    private Integer threadNum = 1;
    private String cacheControl = SiteConstant.CACHE_CONTROL;
    private int maxRedirects = 50;
    private Integer interceptCount = 5;
    private int retryCount = 3;
    private long maxDepth = 0;
    private int connectTimeout = SiteConstant.CONNECTION_TIME_OUT;
    private boolean redirectsEnabled = true;
    private boolean contentCompressionEnabled = true;
    private boolean relativeRedirectsAllowed = false;
    private boolean circularRedirectsAllowed = false;
    private boolean normalizeUri = true;
    private List<HeaderRule> headers = new ArrayList();
    private Set<MatcherRule> linkRules = new HashSet();
    private Boolean matcherMode = true;
    private Boolean matcherCaseSensitive = false;
    private Boolean matcherFuzzy = true;
    private Map<String, ExtractRule> extractRules = new HashMap();

    public static CrawlerBuilder create() {
        return new CrawlerBuilder();
    }

    public static CrawlerBuilder create(CrawlerRule crawlerRule) {
        Assert.notNull(crawlerRule, "配置规则不能为空");
        if (crawlerRule.getSite() == null) {
            crawlerRule.setSite(new SiteRule());
        }
        if (crawlerRule.getContent() == null) {
            crawlerRule.setContent(new ContentRule());
        }
        if (crawlerRule.getLink() == null) {
            throw new IllegalArgumentException("起始链接不能为空");
        }
        if (crawlerRule.getRules() == null) {
            crawlerRule.setRules(new ArrayList());
        }
        CrawlerBuilder crawlerBuilder = new CrawlerBuilder();
        crawlerBuilder.link(crawlerRule.getLink()).site(crawlerRule.getSite()).content(crawlerRule.getContent()).setExtractRules(crawlerRule.getRules()).interval(crawlerRule.getInterval().longValue()).waitTime(crawlerRule.getWaitTime().longValue()).threadNum(crawlerRule.getThreadNum().intValue());
        return crawlerBuilder;
    }

    public long interval() {
        return this.interval.longValue();
    }

    public CrawlerBuilder interval(long j) {
        this.interval = Long.valueOf(j);
        return this;
    }

    public long waitTime() {
        return this.waitTime.longValue();
    }

    public CrawlerBuilder waitTime(long j) {
        this.waitTime = Long.valueOf(j);
        return this;
    }

    public int threadNum() {
        return this.threadNum.intValue();
    }

    public CrawlerBuilder threadNum(int i) {
        this.threadNum = Integer.valueOf(i);
        return this;
    }

    public SiteRule site() {
        return new SiteRule().setUserAgent(this.userAgent).setReferrer(this.referrer).setCacheControl(this.cacheControl).setCookieValue(this.cookieValue).setMaxDepth(this.maxDepth).setFailureMark(this.failureMark).setInterceptCount(this.interceptCount).setRetryCount(this.retryCount).setRedirectsEnabled(this.redirectsEnabled).setRelativeRedirectsAllowed(this.relativeRedirectsAllowed).setConnectTimeout(this.connectTimeout).setContentCompressionEnabled(this.contentCompressionEnabled).setMaxRedirects(this.maxRedirects).setHeaders(this.headers).setCircularRedirectsAllowed(this.circularRedirectsAllowed).setCookieSpec(this.cookieSpec).setNormalizeUri(this.normalizeUri);
    }

    public CrawlerBuilder site(SiteRule siteRule) {
        Assert.notNull(siteRule, "站点配置规则数据不能为空");
        userAgent(siteRule.getUserAgent());
        referrer(siteRule.getReferrer());
        cacheControl(siteRule.getCacheControl());
        cookieValue(siteRule.getCookieValue());
        failureMark(siteRule.getFailureMark());
        interceptCount(siteRule.getInterceptCount().intValue());
        setHeaders(siteRule.getHeaders());
        retryCount(siteRule.getRetryCount());
        connectTimeout(siteRule.getConnectTimeout());
        redirectsEnabled(siteRule.isRedirectsEnabled());
        maxRedirects(siteRule.getMaxRedirects());
        contentCompressionEnabled(siteRule.isContentCompressionEnabled());
        maxDepth(siteRule.getMaxDepth());
        circularRedirectsAllowed(siteRule.isCircularRedirectsAllowed());
        cookieSpec(siteRule.getCookieSpec());
        normalizeUri(siteRule.isNormalizeUri());
        relativeRedirectsAllowed(siteRule.isRelativeRedirectsAllowed());
        return this;
    }

    public String userAgent() {
        return this.userAgent;
    }

    public CrawlerBuilder userAgent(String str) {
        this.userAgent = str;
        return this;
    }

    public String referrer() {
        return this.referrer;
    }

    public CrawlerBuilder referrer(String str) {
        this.referrer = str;
        return this;
    }

    public String cookieValue() {
        return this.cookieValue;
    }

    public CrawlerBuilder cookieValue(String str) {
        this.cookieValue = str;
        return this;
    }

    public String cacheControl() {
        return this.cacheControl;
    }

    public CrawlerBuilder cacheControl(String str) {
        this.cacheControl = str;
        return this;
    }

    public List<HeaderRule> headers() {
        return this.headers;
    }

    public CrawlerBuilder addHeader(HeaderRule headerRule) {
        Assert.notNull(headerRule, "请求头参数对不能为空");
        this.headers.add(headerRule);
        return this;
    }

    public CrawlerBuilder addHeaders(List<HeaderRule> list) {
        this.headers.addAll(null != list ? list : new ArrayList<>());
        return this;
    }

    public CrawlerBuilder setHeaders(List<HeaderRule> list) {
        this.headers = null != list ? list : new ArrayList<>();
        return this;
    }

    public String failureMark() {
        return this.failureMark;
    }

    public CrawlerBuilder failureMark(String str) {
        this.failureMark = str;
        return this;
    }

    public int interceptCount() {
        return this.interceptCount.intValue();
    }

    public CrawlerBuilder interceptCount(int i) {
        this.interceptCount = Integer.valueOf(i);
        return this;
    }

    public int retryCount() {
        return this.retryCount;
    }

    public CrawlerBuilder retryCount(int i) {
        this.retryCount = this.interceptCount.intValue();
        return this;
    }

    public long maxDepth() {
        return this.maxDepth;
    }

    public CrawlerBuilder maxDepth(long j) {
        this.maxDepth = j;
        return this;
    }

    public int connectTimeout() {
        return this.connectTimeout;
    }

    public CrawlerBuilder connectTimeout(int i) {
        this.connectTimeout = i < 1 ? SiteConstant.CONNECTION_TIME_OUT : i;
        return this;
    }

    public boolean redirectsEnabled() {
        return this.redirectsEnabled;
    }

    public CrawlerBuilder redirectsEnabled(boolean z) {
        this.redirectsEnabled = z;
        return this;
    }

    public String cookieSpec() {
        return this.cookieSpec;
    }

    public CrawlerBuilder cookieSpec(String str) {
        this.cookieSpec = str;
        return this;
    }

    public boolean relativeRedirectsAllowed() {
        return this.relativeRedirectsAllowed;
    }

    public CrawlerBuilder relativeRedirectsAllowed(boolean z) {
        this.relativeRedirectsAllowed = z;
        return this;
    }

    public boolean circularRedirectsAllowed() {
        return this.circularRedirectsAllowed;
    }

    public CrawlerBuilder circularRedirectsAllowed(boolean z) {
        this.circularRedirectsAllowed = z;
        return this;
    }

    public boolean normalizeUri() {
        return this.normalizeUri;
    }

    public CrawlerBuilder normalizeUri(boolean z) {
        this.normalizeUri = z;
        return this;
    }

    public int maxRedirects() {
        return this.maxRedirects;
    }

    public CrawlerBuilder maxRedirects(int i) {
        this.maxRedirects = i;
        return this;
    }

    public boolean contentCompressionEnabled() {
        return this.contentCompressionEnabled;
    }

    public CrawlerBuilder contentCompressionEnabled(boolean z) {
        this.contentCompressionEnabled = z;
        return this;
    }

    public LinkRule link() {
        return new LinkRule().setStartUrl(this.startUrl).setRules(this.linkRules);
    }

    public CrawlerBuilder link(LinkRule linkRule) {
        Assert.notNull(linkRule, "链接解析规则不能为空");
        startUrl(linkRule.getStartUrl());
        setLinkRules(linkRule.getRules());
        return this;
    }

    public Set<MatcherRule> linkRules() {
        return this.linkRules;
    }

    public CrawlerBuilder setLinkRules(Set<MatcherRule> set) {
        this.linkRules = null != set ? set : new HashSet<>();
        return this;
    }

    public CrawlerBuilder addLinkRules(Set<MatcherRule> set) {
        if (null == set) {
            set = new HashSet();
        }
        this.linkRules.addAll(set);
        return this;
    }

    public CrawlerBuilder addLinkRule(MatcherRule matcherRule) {
        Assert.notNull(matcherRule, "链接提取规则不能为空");
        this.linkRules.add(matcherRule);
        return this;
    }

    public String startUrl() {
        return this.startUrl;
    }

    public CrawlerBuilder startUrl(String str) {
        Assert.notNull(str, "起始链接不能为空");
        this.startUrl = str;
        return this;
    }

    public ContentRule content() {
        return new ContentRule().setContentPageRule(this.contentPageRule).setPageRule(pageRule());
    }

    public CrawlerBuilder content(ContentRule contentRule) {
        Assert.notNull(contentRule, "内容提取规则不能为空");
        contentPageRule(contentRule.getContentPageRule());
        pageRule(contentRule.getPageRule());
        return this;
    }

    public PageRule pageRule() {
        return new PageRule(this.matcherType, this.matcherPattern, this.matcherTarget, this.matcherMode, this.matcherCaseSensitive, this.matcherFuzzy);
    }

    public CrawlerBuilder pageRule(PageRule pageRule) {
        PageRule pageRule2 = null == pageRule ? new PageRule() : pageRule;
        matcherType(pageRule2.getType());
        matcherPattern(pageRule2.getPattern());
        matcherTarget(pageRule2.getTarget());
        matcherMode(pageRule2.getMode());
        matcherCaseSensitive(pageRule2.getCaseSensitive());
        matcherFuzzy(pageRule2.getFuzzy());
        return this;
    }

    public MatcherRule contentPageRule() {
        return this.contentPageRule;
    }

    public CrawlerBuilder contentPageRule(MatcherRule matcherRule) {
        this.contentPageRule = matcherRule;
        return this;
    }

    public Type matcherType() {
        return this.matcherType;
    }

    public CrawlerBuilder matcherType(Type type) {
        this.matcherType = type;
        return this;
    }

    public String matcherPattern() {
        return this.matcherPattern;
    }

    public CrawlerBuilder matcherPattern(String str) {
        this.matcherPattern = str;
        return this;
    }

    public String matcherTarget() {
        return this.matcherTarget;
    }

    public CrawlerBuilder matcherTarget(String str) {
        this.matcherTarget = str;
        return this;
    }

    public Boolean matcherMode() {
        return this.matcherMode;
    }

    public CrawlerBuilder matcherMode(Boolean bool) {
        this.matcherMode = bool;
        return this;
    }

    public Boolean matcherCaseSensitive() {
        return this.matcherCaseSensitive;
    }

    public CrawlerBuilder matcherCaseSensitive(Boolean bool) {
        this.matcherCaseSensitive = bool;
        return this;
    }

    public Boolean matcherFuzzy() {
        return this.matcherFuzzy;
    }

    public CrawlerBuilder matcherFuzzy(Boolean bool) {
        this.matcherFuzzy = bool;
        return this;
    }

    public List<ExtractRule> extractRules() {
        return (List) this.extractRules.values().stream().collect(Collectors.toList());
    }

    public CrawlerBuilder addExtractRules(List<ExtractRule> list) {
        if (null != list) {
            list.stream().filter((v0) -> {
                return Objects.nonNull(v0);
            }).filter(extractRule -> {
                return StringUtils.isNotBlank(extractRule.getCode());
            }).forEach(this::addExtractRule);
        }
        return this;
    }

    public CrawlerBuilder setExtractRules(List<ExtractRule> list) {
        this.extractRules.clear();
        addExtractRules(list);
        return this;
    }

    public ExtractRule extractRule(String str) {
        Assert.notNull(str, "内容提取规则的编码不能为空");
        return this.extractRules.get(str);
    }

    public CrawlerBuilder addExtractRule(ExtractRule extractRule) {
        Assert.notNull(extractRule, "内容提取规则不能为空");
        Assert.notNull(extractRule.getCode(), "内容提取规则的编码不能为空");
        this.extractRules.put(extractRule.getCode(), extractRule);
        return this;
    }

    public List<ExtractFieldRule> fieldExtractRule(String str) {
        Assert.notNull(str, "内容提取规则的编码不能为空");
        ExtractRule extractRule = extractRule(str);
        if (null != extractRule) {
            return extractRule.getRules();
        }
        return null;
    }

    public CrawlerBuilder addFieldExtractRules(String str, List<ExtractFieldRule> list) {
        Assert.notNull(list, "内容提取规则的提取规则不能为空");
        ExtractRule extractRule = extractRule(str);
        if (null != extractRule) {
            List<ExtractFieldRule> rules = extractRule.getRules();
            rules.addAll(list);
            extractRule.setRules(rules);
            addExtractRule(extractRule);
        }
        return this;
    }

    public CrawlerBuilder addExtractRule(String str, ExtractFieldRule extractFieldRule) {
        Assert.notNull(extractFieldRule, "内容提取规则的提取规则不能为空");
        ExtractRule extractRule = extractRule(str);
        if (null != extractRule) {
            List<ExtractFieldRule> rules = extractRule.getRules();
            rules.add(extractFieldRule);
            extractRule.setRules(rules);
            addExtractRule(extractRule);
        }
        return this;
    }

    public CrawlerBuilder setExtractRules(String str, List<ExtractFieldRule> list) {
        ExtractRule extractRule = extractRule(str);
        if (null != extractRule) {
            extractRule.setRules(list);
            addExtractRule(extractRule);
        }
        return this;
    }

    public CrawlerRule build() {
        Assert.notNull(this.startUrl, "起始链接不能为空");
        validate();
        if (null == this.headers) {
            this.headers = new ArrayList();
        }
        this.headers = (List) this.headers.stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(headerRule -> {
            return StringUtils.isNotBlank(headerRule.getHeaderName());
        }).collect(Collectors.toList());
        if (null == this.linkRules) {
            this.linkRules = new HashSet();
        }
        this.linkRules = (Set) this.linkRules.stream().filter(matcherRule -> {
            return null != matcherRule;
        }).filter(matcherRule2 -> {
            return matcherRule2.getPattern() != null;
        }).collect(Collectors.toSet());
        if (this.linkRules.size() == 0) {
            addLinkRule(new MatcherRule(Pattern.KEYWORD, LinkUtils.keyword(this.startUrl)));
        }
        if (null == this.contentPageRule) {
            this.contentPageRule = new MatcherRule(Pattern.KEYWORD, LinkUtils.keyword(this.startUrl));
        }
        List<ExtractRule> list = (List) extractRules().stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(extractRule -> {
            return StringUtils.isNotBlank(extractRule.getCode());
        }).filter(extractRule2 -> {
            return null != extractRule2.getRules() && extractRule2.getRules().size() > 0;
        }).collect(Collectors.toList());
        setExtractRules(list);
        return new CrawlerRule(this.interval, this.waitTime, this.threadNum, site(), link(), content(), list);
    }

    private void validate() {
        if (null == this.interval || this.interval.longValue() < 0) {
            this.interval = 10000L;
        }
        if (null == this.waitTime || this.waitTime.longValue() <= 0) {
            this.waitTime = Long.valueOf(SiteConstant.WAIT_TIME_FOR_CLOSE);
        }
        if (null == this.threadNum || this.threadNum.intValue() <= 0) {
            this.threadNum = 1;
        }
        if (this.maxRedirects <= 0) {
            this.maxRedirects = 50;
        }
        if (this.interceptCount.intValue() <= 0) {
            this.interceptCount = 5;
        }
        if (this.retryCount <= 0) {
            this.retryCount = 3;
        }
        if (this.maxDepth <= 0) {
            this.maxDepth = 0L;
        }
        if (this.connectTimeout <= 0) {
            this.connectTimeout = SiteConstant.CONNECTION_TIME_OUT;
        }
    }

    public Crawler creatCrawler() {
        return new Crawler(build());
    }

    private CrawlerBuilder() {
    }
}
