package com.yishuifengxiao.common.crawler.link;

import com.yishuifengxiao.common.crawler.domain.entity.Page;
import com.yishuifengxiao.common.crawler.domain.model.LinkRule;
import com.yishuifengxiao.common.crawler.domain.model.MatcherRule;
import com.yishuifengxiao.common.crawler.extractor.ExtractorFactory;
import com.yishuifengxiao.common.crawler.link.filter.BaseLinkFilter;
import com.yishuifengxiao.common.crawler.link.filter.impl.AbsoluteLinkFilter;
import com.yishuifengxiao.common.crawler.link.filter.impl.HashLinkFilter;
import com.yishuifengxiao.common.crawler.link.filter.impl.HttpLinkFilter;
import com.yishuifengxiao.common.crawler.link.filter.impl.IllegalLinkFilter;
import com.yishuifengxiao.common.crawler.link.filter.impl.RelativeLinkFilter;
import com.yishuifengxiao.common.crawler.link.filter.impl.ShortLinkFilter;
import com.yishuifengxiao.common.crawler.macther.MatcherFactory;
import com.yishuifengxiao.common.crawler.utils.LocalCrawler;
import com.yishuifengxiao.common.tool.exception.ServiceException;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/yishuifengxiao/common/crawler/link/LinkExtractDecorator.class */
public class LinkExtractDecorator implements LinkExtract {
    private static final Logger log = LoggerFactory.getLogger(LinkExtractDecorator.class);
    private final ExtractorFactory factory = new ExtractorFactory();
    private final MatcherFactory matcherFactory = new MatcherFactory();
    private final BaseLinkFilter linkFilter = createLinkFilter();
    private LinkExtract linkExtract;

    @Override // com.yishuifengxiao.common.crawler.link.LinkExtract
    public synchronized void extract(LinkRule linkRule, Page page) throws ServiceException {
        page.setLinks(this.factory.getLinkExtractor().extract(page));
        if (this.linkExtract != null) {
            synchronized (LinkExtractDecorator.class) {
                this.linkExtract.extract(linkRule, page);
            }
        }
        String redirectUrl = StringUtils.isNotBlank(page.getRedirectUrl()) ? page.getRedirectUrl() : page.getRequest().getUrl();
        page.setLinks(fliter(linkRule, redirectUrl, (Set) page.getLinks().stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).map(str -> {
            return this.linkFilter.doFilter(redirectUrl, str);
        }).collect(Collectors.toSet())));
        Logger logger = log;
        Object[] objArr = new Object[5];
        objArr[0] = LocalCrawler.get() != null ? LocalCrawler.get().getUuid() : "test";
        objArr[1] = LocalCrawler.get() != null ? LocalCrawler.get().getName() : "test";
        objArr[2] = page.getRequest().getUrl();
        objArr[3] = page.getRedirectUrl();
        objArr[4] = page.getLinks();
        logger.debug("【id:{} , name:{} 】 The actual address of request {} is [ {} ], and the extracted link is {}", objArr);
    }

    private List<String> fliter(LinkRule linkRule, String str, Set<String> set) {
        Stream<MatcherRule> filter = linkRule.getRules().stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(matcherRule -> {
            return matcherRule.getPattern() != null;
        });
        MatcherFactory matcherFactory = this.matcherFactory;
        matcherFactory.getClass();
        List list = (List) filter.map(matcherFactory::getMatcher).collect(Collectors.toList());
        return (List) set.stream().filter((v0) -> {
            return Objects.nonNull(v0);
        }).filter(str2 -> {
            return list.stream().anyMatch(pathMatcher -> {
                return pathMatcher.match(str2);
            });
        }).collect(Collectors.toList());
    }

    private BaseLinkFilter createLinkFilter() {
        return new IllegalLinkFilter(new ShortLinkFilter(new HttpLinkFilter(new AbsoluteLinkFilter(new HashLinkFilter(new RelativeLinkFilter(null))))));
    }

    public LinkExtractDecorator(LinkExtract linkExtract) {
        this.linkExtract = linkExtract;
    }
}
