def load_train_data(self): print( 'Start to load training data for entity output module; accept which' ) fname = util.get_data_fname(task=5, trn=True) fd = open(fname, 'r') json_data = json.load(fd) fd.close() train_x = [] train_y = [] i = 0 for story in json_data: if i % 100 == 0: print(i) i += 1 _, utterances = util.split_knowledge( story['utterances'] + [story['answer']['utterance']]) if knowledge.is_terminate(utterances): last_mentioned_rest = None accept_rest = None accept_utter = None user_turn = True for idx, sent in enumerate(utterances): if user_turn: user_turn = False else: ext_values, ext_sent = util.extract_sent(sent) if knowledge.is_accepted(sent): _, accept_utter = util.extract_sent( utterances[idx - 1]) if accept_utter is None: if '<R_name>' in ext_values.keys(): last_mentioned_rest = ext_values['<R_name>'][0] else: if '<R_phone>' in ext_values.keys(): accept_rest = ext_values['<R_phone>'][0][:-6] elif '<R_address>' in ext_values.keys(): accept_rest = ext_values['<R_address>'][0][:-8] user_turn = True if accept_rest is not None: train_x.append(accept_utter) if accept_rest == last_mentioned_rest: train_y.append(1) # last mentioned else: train_y.append(0) # last recommended return train_x, train_y
def load_train_data(self): print ('Start to load training data for entity output module; next mention') fname = util.get_data_fname(task=3, trn=True) fd = open(fname, 'r') json_data = json.load(fd) fd.close() i = 0 train_x = [] train_y = [] for story in json_data: if i % 100 == 0: print(i) i += 1 api_res, utterances = util.split_knowledge(story['utterances'] + [story['answer']['utterance']]) sorted_rest = util.sort_knowledge(api_res) recommend_idx = -1 for idx, sent in enumerate(utterances): y = [0, 0, 0] ext_value, _ = util.extract_sent(sent) if '<R_name>' in ext_value.keys(): # recommendation if knowledge.is_recommend(sent): recommend_idx += 1 request_utter_idx = idx - 1 if utterances[request_utter_idx] == '<silence>': request_utter_idx -= 2 if utterances[request_utter_idx] != '<silence>': _, ext_sent = util.extract_sent(utterances[request_utter_idx]) train_x.append(ext_sent) y[self.next_idx] = 1 train_y.append(y) # only mention else: _, ext_sent = util.extract_sent(utterances[idx-1]) train_x.append(ext_sent) rest_name = sent.split()[-1] rest_idx = sorted_rest.index(rest_name) if rest_idx == 0: y[self.fst_idx] = 1 if recommend_idx - rest_idx == 1: y[self.prev_idx] = 1 assert y != [0, 0, 0] train_y.append(y) return train_x, train_y
def load_train_data(self): print('Start to load training data for entity output module; accept which') fname = util.get_data_fname(task=5, trn=True) fd = open(fname, 'r') json_data = json.load(fd) fd.close() train_x = [] train_y = [] i = 0 for story in json_data: if i % 100 == 0: print(i) i += 1 _, utterances = util.split_knowledge(story['utterances'] + [story['answer']['utterance']]) if knowledge.is_terminate(utterances): last_mentioned_rest = None accept_rest = None accept_utter = None user_turn = True for idx, sent in enumerate(utterances): if user_turn: user_turn = False else: ext_values, ext_sent = util.extract_sent(sent) if knowledge.is_accepted(sent): _, accept_utter = util.extract_sent(utterances[idx-1]) if accept_utter is None: if '<R_name>' in ext_values.keys(): last_mentioned_rest = ext_values['<R_name>'][0] else: if '<R_phone>' in ext_values.keys(): accept_rest = ext_values['<R_phone>'][0][:-6] elif '<R_address>' in ext_values.keys(): accept_rest = ext_values['<R_address>'][0][:-8] user_turn = True if accept_rest is not None: train_x.append(accept_utter) if accept_rest == last_mentioned_rest: train_y.append(1) # last mentioned else: train_y.append(0) # last recommended return train_x, train_y
def load_train_data(self, task): print ('Start to load training data for action selector module') train_user_utter = [] train_context = [] train_bot_utter = [] train_y = [] fname = util.get_data_fname(task) fd = open(fname, 'r') json_data = json.load(fd) fd.close() api_order = knowledge.get_api_order(unseen_slot=False) i = 0 for story in json_data: if i % 100 == 0: print(i) i += 1 _, utterances = util.split_knowledge(story['utterances'] + [story['answer']['utterance']]) story_user_utter = [] story_context = [] story_bot_utter = [] user_turn = True bot_sent = None sv_pair = {} for sent in utterances: if user_turn: ext_values, ext_sent = util.extract_sent(sent) sv_pair = self.entity_tracking.predict(sv_pair, ext_values, ext_sent) context = self.entity_tracking.get_context(api_order, sv_pair) story_user_utter.append(ext_sent) story_context.append(context) story_bot_utter.append(bot_sent) user_turn = False else: bot_sent = util.get_action_template(sent) user_turn = True train_user_utter.append(story_user_utter) train_context.append(story_context) train_bot_utter.append(story_bot_utter) train_y.append(bot_sent) return train_user_utter, train_context, train_bot_utter, train_y
def predict(self, sent): _, ext_sent = util.extract_sent(sent) vector = util.get_multiple_sent_vector([ext_sent]) prob = self.model.predict(vector)[0][0] if prob > 0.5: return 'last_mentioned' else: return 'last_recommended'
def predict_story(self, api_order, clr_order, story): api_result, utterances = util.split_knowledge(story['utterances']) sorted_api_result = util.sort_knowledge(api_result) story_user_utter = [] story_context = [] story_bot_utter = [] user_turn = True bot_sent = None sv_pair = {} context = [0] for sent in utterances: if user_turn: ext_values, ext_sent = util.extract_sent(sent) sv_pair = self.entity_tracking.predict(sv_pair, ext_values, ext_sent) context = self.entity_tracking.get_context(api_order, sv_pair) story_user_utter.append(ext_sent) story_context.append(context) story_bot_utter.append(bot_sent) user_turn = False else: bot_sent = util.get_action_template(sent) user_turn = True prob = self.action_selector.predict_story(story_user_utter, story_context, story_bot_utter) action_mask = knowledge.get_action_mask(context) masked_prob = np.multiply(prob, action_mask) idx = np.argmax(masked_prob) act_template = knowledge.SYS_RES_TEMP_LST[idx] return self.entity_output.predict_story(api_order, clr_order, sv_pair, sorted_api_result, utterances, act_template)
def get_sent_label_pair(self, api_order, api_sent, utterances): api_sv = util.get_api_sv(api_order, api_sent) sent_label_pair = {k: [] for k in api_order} for sent in utterances: ext_values, ext_sent = util.extract_sent(sent) for slot in ext_values.keys(): value = api_sv[slot] slot_sent = ext_sent.replace(slot, '<R_value>') if value in ext_values[slot]: val_idx = ext_values[slot].index(value) else: val_idx = self.neither_idx vect_y = [0, 0, 0] vect_y[val_idx] = 1 sent_label_pair[slot].append([slot_sent, vect_y]) for slot in api_order: for idx in range(len(sent_label_pair[slot])-1): sent_label_pair[slot][idx][1] = [0, 0, 1] return sent_label_pair
def load_train_data(self, task): print('Start to load training data for action selector module') train_user_utter = [] train_context = [] train_bot_utter = [] train_y = [] fname = util.get_data_fname(task) fd = open(fname, 'r') json_data = json.load(fd) fd.close() api_order = knowledge.get_api_order(unseen_slot=False) i = 0 for story in json_data: if i % 100 == 0: print(i) i += 1 _, utterances = util.split_knowledge( story['utterances'] + [story['answer']['utterance']]) story_user_utter = [] story_context = [] story_bot_utter = [] user_turn = True bot_sent = None sv_pair = {} for sent in utterances: if user_turn: ext_values, ext_sent = util.extract_sent(sent) sv_pair = self.entity_tracking.predict( sv_pair, ext_values, ext_sent) context = self.entity_tracking.get_context( api_order, sv_pair) story_user_utter.append(ext_sent) story_context.append(context) story_bot_utter.append(bot_sent) user_turn = False else: bot_sent = util.get_action_template(sent) user_turn = True train_user_utter.append(story_user_utter) train_context.append(story_context) train_bot_utter.append(story_bot_utter) train_y.append(bot_sent) return train_user_utter, train_context, train_bot_utter, train_y
def predict(self, sent): _, ext_sent = util.extract_sent(sent) vector = util.get_multiple_sent_vector([ext_sent]) prob = self.model.predict(vector) ans_idx = np.argmax(prob) return self.answer_lst[ans_idx]
def load_train_data(self): print( 'Start to load training data for entity output module; next mention' ) fname = util.get_data_fname(task=3, trn=True) fd = open(fname, 'r') json_data = json.load(fd) fd.close() i = 0 train_x = [] train_y = [] for story in json_data: if i % 100 == 0: print(i) i += 1 api_res, utterances = util.split_knowledge( story['utterances'] + [story['answer']['utterance']]) sorted_rest = util.sort_knowledge(api_res) recommend_idx = -1 for idx, sent in enumerate(utterances): y = [0, 0, 0] ext_value, _ = util.extract_sent(sent) if '<R_name>' in ext_value.keys(): # recommendation if knowledge.is_recommend(sent): recommend_idx += 1 request_utter_idx = idx - 1 if utterances[request_utter_idx] == '<silence>': request_utter_idx -= 2 if utterances[request_utter_idx] != '<silence>': _, ext_sent = util.extract_sent( utterances[request_utter_idx]) train_x.append(ext_sent) y[self.next_idx] = 1 train_y.append(y) # only mention else: _, ext_sent = util.extract_sent(utterances[idx - 1]) train_x.append(ext_sent) rest_name = sent.split()[-1] rest_idx = sorted_rest.index(rest_name) if rest_idx == 0: y[self.fst_idx] = 1 if recommend_idx - rest_idx == 1: y[self.prev_idx] = 1 assert y != [0, 0, 0] train_y.append(y) return train_x, train_y