package com.tydic.se.nlp.service.participle;

import com.tydic.nlp.corpus.tag.Nature;
import com.tydic.nlp.util.SentencesUtil;
import com.tydic.se.nlp.rsp.ParticipleRspBo;
import com.tydic.se.nlp.service.constant.InitialParticipleSqlEnum;
import com.tydic.se.nlp.service.utils.ChineseSymbolUtil;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import javax.annotation.PostConstruct;
import org.apache.commons.lang.StringEscapeUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.jdbc.support.rowset.SqlRowSet;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;

@Service
/* loaded from: input_file:com/tydic/se/nlp/service/participle/ParticipleServiceImpl.class */
public class ParticipleServiceImpl implements IParticipleService {
    private List<String> symbols = new ArrayList();
    static final int PAGE_SIZE = 50000;

    @Autowired
    private JdbcTemplate jdbcTemplate;
    private static final Logger log = LoggerFactory.getLogger(ParticipleServiceImpl.class);
    private static String numberReg = "^[+-]?\\d+\\.?\\d*$";
    private static String pattern = "^[\\u4e00-\\u9fa5_a-zA-Z0-9]+$";
    private static String modelReg = "^[0-9A-Z-]{1,}$";
    private static Integer maxLen = 10;
    private static Integer minLen = 1;
    private static String leftBracket = "(";
    private static String rightBracket = ")";

    @PostConstruct
    public void initSymbols() {
        this.symbols.add("\\|");
        this.symbols.add("\\/");
        this.symbols.add("\\-");
        this.symbols.add("\\(");
        this.symbols.add(leftBracket);
        this.symbols.add("系列");
    }

    String constructQueryCount(String str) {
        return InitialParticipleSqlEnum.COMMODITY_COUNT_BY_VENDOR.getSql() + str + "')";
    }

    String constructQuery(int i, String str) {
        return InitialParticipleSqlEnum.COMMODITY_QUERY_BY_VENDOR.getSql() + str + "') limit " + (i * PAGE_SIZE) + "," + PAGE_SIZE;
    }

    Integer calcTotalPage(Integer num) {
        Integer valueOf = num.intValue() % PAGE_SIZE == 0 ? Integer.valueOf(num.intValue() / PAGE_SIZE) : Integer.valueOf((num.intValue() / PAGE_SIZE) + 1);
        log.info("查询返回的总记录数量：" + num + ",返回的总分页：" + valueOf);
        return valueOf;
    }

    @Override // com.tydic.se.nlp.service.participle.IParticipleService
    public ParticipleRspBo participle(String str) {
        ParticipleRspBo participleRspBo = new ParticipleRspBo();
        Integer calcTotalPage = calcTotalPage((Integer) this.jdbcTemplate.queryForObject(constructQueryCount(str), Integer.class));
        for (int i = 0; i < calcTotalPage.intValue(); i++) {
            participleCommodity(i, str);
        }
        participleRspBo.setCode("0");
        participleRspBo.setMessage("成功");
        return participleRspBo;
    }

    private void participleCommodity(int i, String str) {
        HashSet hashSet = new HashSet();
        SqlRowSet queryForRowSet = this.jdbcTemplate.queryForRowSet(constructQuery(i, str));
        while (queryForRowSet.next()) {
            String string = queryForRowSet.getString(1);
            if (!StringUtils.isEmpty(string)) {
                hashSet.addAll(participleName(string));
            }
        }
        doInsertDic(hashSet, str);
    }

    private void doInsertDic(Set<String> set, String str) {
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        set.stream().forEach(str2 -> {
            Object[] objArr = new Object[3];
            objArr[0] = dealWord(str2);
            objArr[2] = str;
            if (ChineseSymbolUtil.isChinese(str2)) {
                objArr[1] = Nature.ncl.toString();
                arrayList.add(objArr);
            } else if (Pattern.compile(modelReg).matcher(str2).matches()) {
                objArr[1] = Nature.nmd.toString();
                arrayList2.add(objArr);
            } else {
                objArr[1] = Nature.nsz.toString();
                arrayList3.add(objArr);
            }
        });
        this.jdbcTemplate.batchUpdate(InitialParticipleSqlEnum.DIC_INSERT.getSql(), arrayList);
        log.info("供应商{},插入数据类型{},条数{}", new Object[]{str, Nature.ncl.toString(), Integer.valueOf(arrayList.size())});
        this.jdbcTemplate.batchUpdate(InitialParticipleSqlEnum.DIC_INSERT.getSql(), arrayList2);
        log.info("供应商{},插入数据类型{},条数{}", new Object[]{str, Nature.nmd.toString(), Integer.valueOf(arrayList2.size())});
        this.jdbcTemplate.batchUpdate(InitialParticipleSqlEnum.DIC_INSERT.getSql(), arrayList3);
        log.info("供应商{},插入数据类型{},条数{}", new Object[]{str, Nature.nsz.toString(), Integer.valueOf(arrayList3.size())});
    }

    private String dealWord(String str) {
        if (str.indexOf(leftBracket) < 0 && str.indexOf(rightBracket) > 0) {
            str = ChineseSymbolUtil.dealSpecialSymbols(str);
        }
        return str;
    }

    private Set<String> participleName(String str) {
        HashSet hashSet = new HashSet();
        Set<String> hashSet2 = new HashSet(SentencesUtil.toSentenceList(str));
        do {
            hashSet2 = divisionTerms(hashSet2);
            hashSet.addAll(hashSet2);
        } while (isDivisible(hashSet2));
        ArrayList arrayList = new ArrayList();
        hashSet.forEach(str2 -> {
            if (str2.length() <= maxLen.intValue() || !Pattern.compile(modelReg).matcher(str2).matches()) {
                return;
            }
            arrayList.add(str2);
        });
        Set<String> set = (Set) hashSet.stream().filter(str3 -> {
            return !StringUtils.isEmpty(str3);
        }).filter(str4 -> {
            return str4.length() > minLen.intValue();
        }).filter(str5 -> {
            return str5.length() < maxLen.intValue();
        }).filter(str6 -> {
            return Pattern.compile(pattern).matcher(String.valueOf(str6.charAt(0))).matches();
        }).filter(str7 -> {
            return !Pattern.compile(numberReg).matcher(str7).matches();
        }).collect(Collectors.toSet());
        set.addAll(arrayList);
        return set;
    }

    private Set<String> divisionTerms(Set<String> set) {
        HashSet hashSet = new HashSet();
        set.forEach(str -> {
            if (!ChineseSymbolUtil.isChinese(str)) {
                hashSet.add(str);
                return;
            }
            String containsSymbol = getContainsSymbol(str);
            if (StringUtils.isEmpty(containsSymbol)) {
                hashSet.add(ChineseSymbolUtil.dealSpecialSymbols(str));
            } else {
                Arrays.asList(str.split(containsSymbol)).forEach(str -> {
                    hashSet.add(ChineseSymbolUtil.dealSpecialSymbols(str));
                });
            }
        });
        return hashSet;
    }

    private boolean isDivisible(Set<String> set) {
        boolean[] zArr = {false};
        set.stream().forEach(str -> {
            if (ChineseSymbolUtil.isChinese(str)) {
                this.symbols.forEach(str -> {
                    if (str.contains(StringEscapeUtils.unescapeJava(str))) {
                        zArr[0] = true;
                    }
                });
            }
        });
        return zArr[0];
    }

    private String getContainsSymbol(String str) {
        for (String str2 : this.symbols) {
            if (str.contains(StringEscapeUtils.unescapeJava(str2))) {
                return str2;
            }
        }
        return null;
    }
}
