package com.geccocrawler.gecco.spider.render.html;

import com.geccocrawler.gecco.annotation.Attr;
import com.geccocrawler.gecco.annotation.Href;
import com.geccocrawler.gecco.annotation.Html;
import com.geccocrawler.gecco.annotation.Image;
import com.geccocrawler.gecco.annotation.Text;
import com.geccocrawler.gecco.request.HttpRequest;
import com.geccocrawler.gecco.response.HttpResponse;
import com.geccocrawler.gecco.spider.SpiderBean;
import com.geccocrawler.gecco.spider.SpiderThreadLocal;
import com.geccocrawler.gecco.spider.conversion.Conversion;
import com.geccocrawler.gecco.spider.render.RenderContext;
import com.geccocrawler.gecco.spider.render.RenderType;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Parser;
import org.jsoup.select.Elements;

/* loaded from: input_file:com/geccocrawler/gecco/spider/render/html/HtmlParser.class */
public class HtmlParser {
    private Log log;
    private Document document;
    private String baseUri;

    public HtmlParser(String str, String str2) {
        long currentTimeMillis = System.currentTimeMillis();
        this.log = LogFactory.getLog(HtmlParser.class);
        this.baseUri = str;
        if (isTable(str2)) {
            this.document = Jsoup.parse(str2, str, Parser.xmlParser());
        } else {
            this.document = Jsoup.parse(str2, str);
        }
        long currentTimeMillis2 = System.currentTimeMillis();
        if (this.log.isTraceEnabled()) {
            this.log.trace("init html parser : " + (currentTimeMillis2 - currentTimeMillis) + "ms");
        }
    }

    public String baseUri() {
        return this.baseUri;
    }

    public Object $basic(String str, Field field) throws Exception {
        if (field.isAnnotationPresent(Text.class)) {
            return Conversion.getValue(field.getType(), $text(str, ((Text) field.getAnnotation(Text.class)).own()));
        }
        if (field.isAnnotationPresent(Image.class)) {
            return $image(str, ((Image) field.getAnnotation(Image.class)).value());
        }
        if (field.isAnnotationPresent(Href.class)) {
            return $href(str, ((Href) field.getAnnotation(Href.class)).value());
        }
        if (field.isAnnotationPresent(Attr.class)) {
            return Conversion.getValue(field.getType(), $attr(str, ((Attr) field.getAnnotation(Attr.class)).value()));
        }
        return field.isAnnotationPresent(Html.class) ? $html(str, ((Html) field.getAnnotation(Html.class)).outer()) : $html(str);
    }

    public List<Object> $basicList(String str, Field field) throws Exception {
        ArrayList arrayList = new ArrayList();
        Iterator it = $(str).iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (field.isAnnotationPresent(Text.class)) {
                arrayList.add(Conversion.getValue(field.getType(), $text(element, ((Text) field.getAnnotation(Text.class)).own())));
            } else if (field.isAnnotationPresent(Image.class)) {
                arrayList.add($image(element, ((Image) field.getAnnotation(Image.class)).value()));
            } else if (field.isAnnotationPresent(Href.class)) {
                arrayList.add($href(element, ((Href) field.getAnnotation(Href.class)).value()));
            } else if (field.isAnnotationPresent(Attr.class)) {
                arrayList.add(Conversion.getValue(field.getType(), $attr(element, ((Attr) field.getAnnotation(Attr.class)).value())));
            } else if (field.isAnnotationPresent(Html.class)) {
                arrayList.add(((Html) field.getAnnotation(Html.class)).outer() ? element.outerHtml() : element.html());
            } else {
                arrayList.add(element.html());
            }
        }
        return arrayList;
    }

    public SpiderBean $bean(String str, HttpRequest httpRequest, Class<? extends SpiderBean> cls) {
        return RenderContext.getRender(RenderType.HTML).inject(cls, httpRequest, HttpResponse.createSimple($html(str)));
    }

    public List<SpiderBean> $beanList(String str, HttpRequest httpRequest, Class<? extends SpiderBean> cls) {
        ArrayList arrayList = new ArrayList();
        Iterator<String> it = $list(str).iterator();
        while (it.hasNext()) {
            arrayList.add(RenderContext.getRender(RenderType.HTML).inject(cls, httpRequest, HttpResponse.createSimple(it.next())));
        }
        return arrayList;
    }

    public Elements $(String str) {
        Elements select = this.document.select(str);
        if (SpiderThreadLocal.get().getEngine().isDebug() && !str.equalsIgnoreCase("script")) {
            System.out.println("[" + str + "]--->[" + select + "]");
        }
        return select;
    }

    public Element $element(String str) {
        Elements $ = $(str);
        if ($ == null || $.size() <= 0) {
            return null;
        }
        return $.first();
    }

    public List<String> $list(String str) {
        ArrayList arrayList = new ArrayList();
        Elements $ = $(str);
        if ($ != null) {
            Iterator it = $.iterator();
            while (it.hasNext()) {
                arrayList.add(((Element) it.next()).outerHtml());
            }
        }
        return arrayList;
    }

    public String $html(String str) {
        return $html(str, false);
    }

    public String $html(String str, boolean z) {
        Elements $ = $(str);
        if ($ != null) {
            return z ? $.outerHtml() : $.html();
        }
        return null;
    }

    public String $text(Element element, boolean z) {
        if (element == null) {
            return null;
        }
        return StringUtils.replace(z ? element.ownText() : element.text(), " ", "");
    }

    public String $text(String str, boolean z) {
        Element $element = $element(str);
        if ($element != null) {
            return $text($element, z);
        }
        return null;
    }

    public String $attr(Element element, String str) {
        if (element == null) {
            return null;
        }
        return element.attr(str);
    }

    public String $attr(String str, String str2) {
        Element $element = $element(str);
        if ($element == null) {
            return null;
        }
        return $element.attr(str2);
    }

    public String $href(Element element, String str) {
        if (element == null) {
            return null;
        }
        return element.absUrl(str);
    }

    public String $href(Element element, String... strArr) {
        if (element == null) {
            return null;
        }
        for (String str : strArr) {
            String $href = $href(element, str);
            if (StringUtils.isNotEmpty($href)) {
                return $href;
            }
        }
        return $href(element, "href");
    }

    public String $href(String str, String str2) {
        return $href($element(str), str2);
    }

    public String $href(String str, String... strArr) {
        return $href($element(str), strArr);
    }

    public String $image(Element element, String str) {
        if (element == null) {
            return null;
        }
        return element.absUrl(str);
    }

    public String $image(Element element, String... strArr) {
        if (element == null) {
            return null;
        }
        for (String str : strArr) {
            String $image = $image(element, str);
            if (StringUtils.isNotEmpty($image)) {
                return $image;
            }
        }
        return $image(element, "src");
    }

    public String $image(String str, String str2) {
        return $image($element(str), str2);
    }

    public String $image(String str, String... strArr) {
        return $image($element(str), strArr);
    }

    public void setLogClass(Class<? extends SpiderBean> cls) {
        this.log = LogFactory.getLog(cls);
    }

    private boolean isTable(String str) {
        return !StringUtils.contains(str, "</html>") && Pattern.compile("<\\s*(thead|tbody|tr|td|th)[\\s\\S]+").matcher(str).matches();
    }
}
