def __init__(self, kb_path, init_stop_words_path, keywords_path, set_path=None, custom_dict=None): jieba.load_userdict(custom_dict) if set_path is None: self.set_dict = read_sets(kb_path, 'sets') else: self.set_dict = read_sets(set_path) self.place_holder_dict = read_sets(kb_path, 'place_holder') self.id_pattern_pairs = read_pattern(kb_path, 'ask_pattern', 'intent_id', 'pattern') self.record_list = [ convert2record_list(idpp, self.set_dict, self.place_holder_dict) for idpp in self.id_pattern_pairs ] self.keyword_dict = read_keywords(keywords_path) self.analyzer = han_analyzer() self.processor = Processor(init_stop_words_path=init_stop_words_path) self.base_structure = base_structure self.match_patterns = match_patterns self.get_intent_classes = get_intent_classes self.long_sentence_processor = Long_sentence_processor( init_stop_words_path)
def __init__(self, kb_path, init_stop_words_path): self.set_dict = read_sets(kb_path, 'sets') self.place_holder_dict = read_sets(kb_path, 'place_holder') self.id_pattern_pairs = read_pattern(kb_path, 'ask_pattern', 'intent_id', 'pattern') self.record_list = [ convert2record_list(idpp, self.set_dict, self.place_holder_dict) for idpp in self.id_pattern_pairs ] self.analyzer = han_analyzer() self.processor = Processor(init_stop_words_path=init_stop_words_path) self.base_structure = base_structure self.match_patterns = match_patterns self.get_intent_classes = get_intent_classes
from hanlp_parse import han_analyzer from sentence_structure_utils import base_structure from knowledge_bank_utils import get_intent_classes import numpy as np from input_process_util import Processor #%% init_stop_words_path = '../libs/init_stop_words.txt' data_path = '../../data/results/filtered_test0424.xlsx' out_data_path = '../../data/results/filtered_test_nlu_0424.xlsx' #results_path = '../../data/results/.xlsx' df = pd.read_excel(data_path,'wenti') qs = df['问题'] #%% processor = Processor(init_stop_words_path=init_stop_words_path) analyzer = han_analyzer() #%% ## step one df ['ini_remove'] = df['问题'].apply(processor.check_and_remove_ini,args=(analyzer,False)) #%% intents = [get_intent_classes(i) for i in qs] #%% df_intents = pd.DataFrame(intents) #%% df = df.merge(df_intents, left_index=True, right_index=True) #%% out_data_path = '../../data/results/filtered_test_nlu_0424.xlsx' df.to_excel(out_data_path)
def __init__(self,init_stop_words_path='init_stop_words.txt'): self.analyzer = han_analyzer() self.ini_processor = Processor(init_stop_words_path)