def execute(self, sent): features = sent2features_(sent) # print("----------features_features--------------") # print(features) act_type = self.__predictor.predict([features]) # print("----------act_type--------------") # print(act_type) surfaces, features = analyze_morph(sent) # print("----------surfaces,features--------------") # print(features) # print(surfaces) morphed_sent = [[surfaces[i]] + features[i].split(',') for i in range(len(surfaces))] features = sent2features(morphed_sent) print("----------morphed_sent,features--------------") print(morphed_sent) print(features) named_entity = self.__extractor.extract(features, morphed_sent) print("----------named_entity--------------") print(named_entity) dialogue_act = {'user_act_type': act_type} if act_type != 'other': dialogue_act.update(dict(named_entity)) # # print("----------dialogue_act--------------") # print(dialogue_act) return dialogue_act
def execute(self, sent): # 対話行為タイプの推定開始 # 4タイプ # - genre: イタリアンとか # - location: 新宿とか # - money: 1万円とか # - other: その他 features = sent2features_(sent) #print("送信されたテキストから変換したfeatures:", features) act_type = self.__predictor.predict([features]) print("featureから予測したact_type:", act_type) # 対話行為タイプの推定完了 # 属性抽出 # ジャンルに対して文字を抽出。イタリアンとか中華とか surfaces, features = analyze_morph(sent) print("surfaces:", surfaces) print("features:", features) morphed_sent = [[surfaces[i]] + features[i].split(',') for i in range(len(surfaces))] print("morphed_sent", morphed_sent) features = sent2features(morphed_sent) named_entity = self.__extractor.extract(features, morphed_sent) dialogue_act = {'user_act_type': act_type} # ここで属性を追加 dialogue_act.update(dict(named_entity)) return dialogue_act
def sent2features_(sent): from training_data_generator.scripts.analyzer import analyze_morph dic_path = os.path.join(os.path.dirname(__file__), 'dic.txt') surfaces, _ = analyze_morph(sent) dictionary = corpora.Dictionary.load_from_text(dic_path) features = to_features(dictionary, surfaces) return features
def sent2features_(sent): from training_data_generator.scripts.analyzer import analyze_morph dic_path = os.path.join(os.path.dirname(__file__), 'dic.txt') # mecabで形態素解析して表層形を取得 surfaces, _ = analyze_morph(sent) print("surfaces:", surfaces) # コーパス dictionary = corpora.Dictionary.load_from_text(dic_path) print("dictionary: ", dictionary) features = to_features(dictionary, surfaces) return features
def execute(self, sent): features = sent2features_(sent) act_type = self.__predictor.predict([features]) surfaces, features = analyze_morph(sent) morphed_sent = [[surfaces[i]] + features[i].split(',') for i in range(len(surfaces))] features = sent2features(morphed_sent) named_entity = self.__extractor.extract(features, morphed_sent) dialogue_act = {'user_act_type': act_type} dialogue_act.update(dict(named_entity)) return dialogue_act
def matching(sentence, ne_list): def get_word_pos_list(sentence, ne_list): word_pos_list = [] searched_pos = 0 for nw, ne in ne_list: nw = ''.join(nw.split(' ')) idx = sentence.index(nw, searched_pos) searched_pos = idx + len(nw) word_pos_list.append((idx, searched_pos)) return word_pos_list def get_morph_pos_list(wakati, word_pos_list): morph_pos_list = [] for start_pos, end_pos in word_pos_list: ch_cnt = 0 morph_pos = [] for i, morph in enumerate(wakati): if ch_cnt < start_pos: ch_cnt += len(morph) elif ch_cnt < end_pos: morph_pos.append(i) ch_cnt += len(morph) else: break morph_pos_list.append(morph_pos) return morph_pos_list def tagging(morph_pos_list, ne_list, ylabel): for i in range(len(ne_list)): nw, ne = ne_list[i] morph_pos = morph_pos_list[i] for j, k in enumerate(morph_pos): prefix = 'B' if j == 0 else 'I' ylabel[k] = prefix + '-' + ne wakati, features = analyze_morph(sentence) sentence = ''.join(sentence.split(' ')) word_pos_list = get_word_pos_list(sentence, ne_list) morph_pos_list = get_morph_pos_list(wakati, word_pos_list) ylabel = ['O'] * len(wakati) tagging(morph_pos_list, ne_list, ylabel) labeled_sent = [] for i in range(len(wakati)): tmp = [] tmp.append(wakati[i]) tmp.extend(features[i].split(',')) tmp.append(ylabel[i]) labeled_sent.append(tmp) # Error for word_pos, morph_pos in zip(word_pos_list, morph_pos_list): start_pos, end_pos = word_pos word1, word2 = sentence[start_pos: end_pos], ''.join([wakati[i] for i in morph_pos]) if word1 != word2: print(word1, word2) return labeled_sent
def matching(sentence, ne_list): def get_word_pos_list(sentence, ne_list): word_pos_list = [] searched_pos = 0 for nw, ne in ne_list: nw = ''.join(nw.split(' ')) idx = sentence.index(nw, searched_pos) searched_pos = idx + len(nw) word_pos_list.append((idx, searched_pos)) return word_pos_list def get_morph_pos_list(wakati, word_pos_list): morph_pos_list = [] for start_pos, end_pos in word_pos_list: ch_cnt = 0 morph_pos = [] for i, morph in enumerate(wakati): if ch_cnt < start_pos: ch_cnt += len(morph) elif ch_cnt < end_pos: morph_pos.append(i) ch_cnt += len(morph) else: break morph_pos_list.append(morph_pos) return morph_pos_list def tagging(morph_pos_list, ne_list, ylabel): for i in range(len(ne_list)): nw, ne = ne_list[i] morph_pos = morph_pos_list[i] for j, k in enumerate(morph_pos): prefix = 'B' if j == 0 else 'I' ylabel[k] = prefix + '-' + ne wakati, features = analyze_morph(sentence) sentence = ''.join(sentence.split(' ')) word_pos_list = get_word_pos_list(sentence, ne_list) morph_pos_list = get_morph_pos_list(wakati, word_pos_list) ylabel = ['O'] * len(wakati) tagging(morph_pos_list, ne_list, ylabel) labeled_sent = [] for i in range(len(wakati)): tmp = [] tmp.append(wakati[i]) tmp.extend(features[i].split(',')) tmp.append(ylabel[i]) labeled_sent.append(tmp) # Error for word_pos, morph_pos in zip(word_pos_list, morph_pos_list): start_pos, end_pos = word_pos word1, word2 = sentence[start_pos:end_pos], ''.join( [wakati[i] for i in morph_pos]) if word1 != word2: print(word1, word2) return labeled_sent