package com.yishuifengxiao.common.crawler.extractor.links.impl;

import com.yishuifengxiao.common.crawler.domain.constant.CrawlerConstant;
import com.yishuifengxiao.common.crawler.domain.constant.RuleConstant;
import com.yishuifengxiao.common.crawler.domain.entity.Page;
import com.yishuifengxiao.common.crawler.domain.eunm.Rule;
import com.yishuifengxiao.common.crawler.extractor.content.strategy.Strategy;
import com.yishuifengxiao.common.crawler.extractor.content.strategy.StrategyFactory;
import com.yishuifengxiao.common.crawler.extractor.links.LinkExtractor;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:com/yishuifengxiao/common/crawler/extractor/links/impl/SimpleLinkExtractor.class */
public class SimpleLinkExtractor implements LinkExtractor {
    private static final Logger log = LoggerFactory.getLogger(SimpleLinkExtractor.class);
    private final Strategy strategy = StrategyFactory.get(Rule.XPATH);

    @Override // com.yishuifengxiao.common.crawler.extractor.links.LinkExtractor
    public List<String> extract(Page page) {
        String[] splitByWholeSeparatorPreserveAllTokens;
        try {
            String extract = this.strategy.extract(page.getRawTxt(), RuleConstant.XPATH_ALL_LINK, null);
            if (StringUtils.isNotBlank(extract) && (splitByWholeSeparatorPreserveAllTokens = StringUtils.splitByWholeSeparatorPreserveAllTokens(extract, CrawlerConstant.SEPARATOR)) != null && splitByWholeSeparatorPreserveAllTokens.length > 0) {
                return Arrays.asList(splitByWholeSeparatorPreserveAllTokens);
            }
        } catch (Exception e) {
            log.info("提取所有的超链接失败，失败的原因为 {}", e.getMessage());
        }
        return new ArrayList();
    }
}
