package com.jxdinfo.idp.rule.formula.util;

import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;
import com.jxdinfo.idp.common.exception.BusinessException;
import java.math.BigInteger;
import java.util.HashMap;
import java.util.List;
import org.apache.commons.lang3.StringUtils;
import org.jsoup.Jsoup;

/* loaded from: input_file:com/jxdinfo/idp/rule/formula/util/SimHashUtil.class */
public class SimHashUtil {
    private String topicName;
    private final BigInteger bigSimHash = simHash();
    private Integer hashCount;
    private static final Integer WORD_MIN_LENGTH = 3;
    private static final BigInteger ILLEGAL_X = BigInteger.valueOf(-1);

    public SimHashUtil(String str, Integer num) {
        this.hashCount = 64;
        this.topicName = str;
        this.hashCount = num;
    }

    private BigInteger simHash() {
        this.topicName = clearSpecialCharacters(this.topicName);
        int[] iArr = new int[this.hashCount.intValue()];
        List<Term> segment = StandardTokenizer.segment(this.topicName);
        HashMap hashMap = new HashMap(16, 0.75f);
        hashMap.put("n", 1);
        HashMap hashMap2 = new HashMap(16, 0.75f);
        hashMap2.put("w", "");
        Integer num = 5;
        HashMap hashMap3 = new HashMap(16, 0.75f);
        for (Term term : segment) {
            String str = term.word;
            String nature = term.nature.toString();
            if (hashMap3.containsKey(str)) {
                Integer num2 = (Integer) hashMap3.get(str);
                if (num2.intValue() <= num.intValue()) {
                    hashMap3.put(str, Integer.valueOf(num2.intValue() + 1));
                }
            } else {
                hashMap3.put(str, 1);
            }
            if (!hashMap2.containsKey(nature)) {
                BigInteger wordHash = getWordHash(str);
                for (int i = 0; i < this.hashCount.intValue(); i++) {
                    BigInteger shiftLeft = BigInteger.valueOf(1L).shiftLeft(i);
                    Integer num3 = hashMap.containsKey(nature) ? (Integer) hashMap.get(nature) : 1;
                    if (wordHash.and(shiftLeft).signum() != 0) {
                        int i2 = i;
                        iArr[i2] = iArr[i2] + num3.intValue();
                    } else {
                        int i3 = i;
                        iArr[i3] = iArr[i3] - num3.intValue();
                    }
                }
            }
        }
        BigInteger valueOf = BigInteger.valueOf(0L);
        for (int i4 = 0; i4 < this.hashCount.intValue(); i4++) {
            if (iArr[i4] >= 0) {
                valueOf = valueOf.add(BigInteger.valueOf(1L).shiftLeft(i4));
            }
        }
        return valueOf;
    }

    private BigInteger getWordHash(String str) {
        if (StringUtils.isEmpty(str)) {
            return BigInteger.valueOf(0L);
        }
        StringBuilder sb = new StringBuilder(str);
        while (sb.length() < WORD_MIN_LENGTH.intValue()) {
            sb.append(sb.charAt(0));
        }
        String sb2 = sb.toString();
        char[] charArray = sb2.toCharArray();
        BigInteger valueOf = BigInteger.valueOf(charArray[0] << 7);
        BigInteger valueOf2 = BigInteger.valueOf(1000003L);
        BigInteger subtract = BigInteger.valueOf(2L).pow(this.hashCount.intValue()).subtract(BigInteger.valueOf(1L));
        for (char c : charArray) {
            valueOf = valueOf.multiply(valueOf2).xor(BigInteger.valueOf(c)).and(subtract);
        }
        BigInteger xor = valueOf.xor(new BigInteger(String.valueOf(sb2.length())));
        if (xor.equals(ILLEGAL_X)) {
            xor = BigInteger.valueOf(-2L);
        }
        return xor;
    }

    private String clearSpecialCharacters(String str) {
        String text = Jsoup.parse(StringUtils.lowerCase(str).replace("&nbsp;", "")).body().text();
        for (String str2 : new String[]{" ", "\n", "\r", "\t", "\\r", "\\n", "\\t", "&nbsp;", "&amp;", "&lt;", "&gt;", "&quot;", "&qpos;"}) {
            text = text.replaceAll(str2, "");
        }
        return text;
    }

    private Double getSimilar(SimHashUtil simHashUtil) {
        return Double.valueOf(Double.parseDouble(String.format("%.4f", Double.valueOf(1.0d - (Double.valueOf(getHammingDistance(simHashUtil)).doubleValue() / this.hashCount.intValue())))));
    }

    private int getHammingDistance(SimHashUtil simHashUtil) {
        int i = 0;
        for (BigInteger and = this.bigSimHash.xor(simHashUtil.bigSimHash).and(BigInteger.valueOf(1L).shiftLeft(this.hashCount.intValue()).subtract(BigInteger.valueOf(1L))); and.signum() != 0; and = and.and(and.subtract(BigInteger.valueOf(1L)))) {
            i++;
        }
        return i;
    }

    public static Double getSimhashSimlar(String str, String str2) {
        try {
            return new SimHashUtil(str, 64).getSimilar(new SimHashUtil(str2, 64));
        } catch (Exception e) {
            throw new BusinessException("获取相似度异常：" + e.getMessage());
        }
    }
}
