Пример #1
0
    def __init__(self,
                 kb_path,
                 init_stop_words_path,
                 keywords_path,
                 set_path=None,
                 custom_dict=None):
        jieba.load_userdict(custom_dict)
        if set_path is None:
            self.set_dict = read_sets(kb_path, 'sets')
        else:
            self.set_dict = read_sets(set_path)
        self.place_holder_dict = read_sets(kb_path, 'place_holder')
        self.id_pattern_pairs = read_pattern(kb_path, 'ask_pattern',
                                             'intent_id', 'pattern')
        self.record_list = [
            convert2record_list(idpp, self.set_dict, self.place_holder_dict)
            for idpp in self.id_pattern_pairs
        ]

        self.keyword_dict = read_keywords(keywords_path)
        self.analyzer = han_analyzer()
        self.processor = Processor(init_stop_words_path=init_stop_words_path)
        self.base_structure = base_structure
        self.match_patterns = match_patterns
        self.get_intent_classes = get_intent_classes
        self.long_sentence_processor = Long_sentence_processor(
            init_stop_words_path)
Пример #2
0
    def __init__(self, kb_path, init_stop_words_path):
        self.set_dict = read_sets(kb_path, 'sets')
        self.place_holder_dict = read_sets(kb_path, 'place_holder')
        self.id_pattern_pairs = read_pattern(kb_path, 'ask_pattern',
                                             'intent_id', 'pattern')
        self.record_list = [
            convert2record_list(idpp, self.set_dict, self.place_holder_dict)
            for idpp in self.id_pattern_pairs
        ]

        self.analyzer = han_analyzer()
        self.processor = Processor(init_stop_words_path=init_stop_words_path)
        self.base_structure = base_structure
        self.match_patterns = match_patterns
        self.get_intent_classes = get_intent_classes
Пример #3
0
from hanlp_parse import han_analyzer
from sentence_structure_utils import base_structure
from knowledge_bank_utils import get_intent_classes
import numpy as np
from input_process_util import Processor
#%%
init_stop_words_path = '../libs/init_stop_words.txt'
data_path = '../../data/results/filtered_test0424.xlsx'
out_data_path = '../../data/results/filtered_test_nlu_0424.xlsx'
#results_path = '../../data/results/.xlsx'
df = pd.read_excel(data_path,'wenti')
qs = df['问题']

#%%
processor = Processor(init_stop_words_path=init_stop_words_path)
analyzer = han_analyzer()
#%%

## step one 
df ['ini_remove'] = df['问题'].apply(processor.check_and_remove_ini,args=(analyzer,False))
#%%
intents = [get_intent_classes(i) for i in qs]
#%%
df_intents = pd.DataFrame(intents)
#%%

df = df.merge(df_intents, left_index=True, right_index=True)
#%%
out_data_path = '../../data/results/filtered_test_nlu_0424.xlsx'
df.to_excel(out_data_path)
Пример #4
0
 def __init__(self,init_stop_words_path='init_stop_words.txt'):
     self.analyzer = han_analyzer()
     self.ini_processor = Processor(init_stop_words_path)