package com.tydic.se.nlp.service.impl;

import com.tydic.nlp.collection.io.IOUtil;
import com.tydic.nlp.corpus.document.CorpusLoader;
import com.tydic.nlp.corpus.document.sentence.Sentence;
import com.tydic.nlp.tokenizer.NlpTokenizer;
import com.tydic.nlp.tokenizer.StandardTokenizer;
import com.tydic.se.nlp.intfs.TokenizerService;
import com.tydic.se.nlp.req.TokenizerReqBo;
import com.tydic.se.nlp.rsp.TokenizerRspBo;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.Iterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Service;
import org.springframework.util.StringUtils;

@Service
/* loaded from: input_file:com/tydic/se/nlp/service/impl/TokenizerServiceImpl.class */
public class TokenizerServiceImpl implements TokenizerService {
    private static final Logger log = LoggerFactory.getLogger(TokenizerServiceImpl.class);

    public TokenizerRspBo standTokenizer(TokenizerReqBo tokenizerReqBo) {
        TokenizerRspBo tokenizerRspBo = new TokenizerRspBo();
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(StandardTokenizer.segment(tokenizerReqBo.getText()));
        tokenizerRspBo.setTerms(arrayList);
        tokenizerRspBo.setCode("0");
        tokenizerRspBo.setMessage("成功");
        return tokenizerRspBo;
    }

    public TokenizerRspBo nlpTokenizer(TokenizerReqBo tokenizerReqBo) {
        TokenizerRspBo tokenizerRspBo = new TokenizerRspBo();
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(NlpTokenizer.segment(tokenizerReqBo.getText()));
        tokenizerRspBo.setTerms(arrayList);
        tokenizerRspBo.setCode("0");
        tokenizerRspBo.setMessage("成功");
        return tokenizerRspBo;
    }

    public TokenizerRspBo tokenizerCorpus(TokenizerReqBo tokenizerReqBo) {
        TokenizerRspBo tokenizerRspBo = new TokenizerRspBo();
        FileOutputStream fileOutputStream = null;
        OutputStreamWriter outputStreamWriter = null;
        try {
            for (File file : IOUtil.fileList(tokenizerReqBo.getInPath())) {
                String str = tokenizerReqBo.getOutPath() + file.getName();
                log.info("开始制作语料文件：" + str);
                fileOutputStream = new FileOutputStream(str);
                outputStreamWriter = new OutputStreamWriter(fileOutputStream);
                Iterator it = CorpusLoader.convert2Document(file).sentenceList.iterator();
                while (it.hasNext()) {
                    String replaceAll = NlpTokenizer.segment(((Sentence) it.next()).text()).toString().replaceAll("(?:\\[|null|\\]| +)", "");
                    try {
                        if (!StringUtils.isEmpty(replaceAll.trim())) {
                            outputStreamWriter.write(replaceAll);
                            outputStreamWriter.append((CharSequence) "\r\n");
                        }
                    } catch (Exception e) {
                        e.printStackTrace();
                    }
                }
            }
            outputStreamWriter.close();
            fileOutputStream.close();
            log.info("制作语料文件完成!");
        } catch (FileNotFoundException e2) {
            e2.printStackTrace();
        } catch (IOException e3) {
            e3.printStackTrace();
        }
        tokenizerRspBo.setCode("0");
        tokenizerRspBo.setMessage("成功");
        return tokenizerRspBo;
    }
}
