def evaluate_val_for_train(self, sess, data): val_history, val_response, val_labels = data all_candidate_scores = [] low = 0 batch_size_for_val=4000 while True: batch_history = self.copy_list(val_history[low:low + batch_size_for_val]) batch_history, batch_history_len = utils.multi_sequences_padding(batch_history, self.max_sentence_len) batch_history, batch_history_len = np.array(batch_history), np.array(batch_history_len) batch_response = self.copy_list(val_response[low:low + batch_size_for_val]) batch_response_len = np.array(utils.get_sequences_length(batch_response, maxlen=self.max_sentence_len)) batch_response = np.array(pad_sequences(batch_response, padding='post', maxlen=self.max_sentence_len)) feed_dict = {self.utterance_ph: batch_history, self.all_utterance_len_ph: batch_history_len, self.response_ph: batch_response, self.response_len: batch_response_len, self.y_true: np.concatenate([val_labels[low:low + batch_size_for_val]], axis=0), } candidate_scores,loss = sess.run([self.y_pred,self.total_loss], feed_dict=feed_dict) all_candidate_scores.append(candidate_scores[:, 1]) low = low + batch_size_for_val if low >= len(val_labels): break all_candidate_scores = np.concatenate(all_candidate_scores, axis=0) return Evaluate.precision_of_matching_1(all_candidate_scores, val_labels,response_num_per_query=11),loss
def Evaluate(self, sess): with open(evaluate_file, 'rb') as f: utterances, last_utterance, labels = pickle.load(f) self.all_candidate_scores = [] utterances, utterances_len = utils.multi_sequences_padding( utterances, self.max_num_utterance, self.max_sentence_len) utterances, utterances_len = np.array(utterances), np.array( utterances_len) last_utterance_len = np.array( utils.get_sequences_length(last_utterance, self.max_sentence_len)) last_utterance = np.array( pad_sequences(last_utterance, padding='post', maxlen=self.max_sentence_len)) low = 0 while True: feed_dict = { self.utterance_ph: np.concatenate([utterances[low:low + 200]], axis=0), self.all_utterance_len_ph: np.concatenate([utterances_len[low:low + 200]], axis=0), self.response_ph: np.concatenate([last_utterance[low:low + 200]], axis=0), self.response_len_ph: np.concatenate([last_utterance_len[low:low + 200]], axis=0), } candidate_scores = sess.run(self.y_pred, feed_dict=feed_dict) self.all_candidate_scores.append(candidate_scores[:, 1]) low = low + 200 if low >= utterances.shape[0]: break all_candidate_scores = np.concatenate(self.all_candidate_scores, axis=0) computeR10_1(all_candidate_scores, labels) computeR2_1(all_candidate_scores, labels)
def predict(self,model_path,history,response): saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.95 # 只分配40%的显存 first = True with tf.Session(config=config) as sess: saver.restore(sess, model_path) all_candidate_scores = [] all_pred_labels = [] low = 0 batch_size_for_val = 3000 while True: batch_history = self.copy_list(history[low:low + batch_size_for_val]) batch_history, batch_history_len = utils.multi_sequences_padding(batch_history, self.max_sentence_len) batch_history, batch_history_len = np.array(batch_history), np.array(batch_history_len) batch_response = self.copy_list(response[low:low + batch_size_for_val]) batch_response_len = np.array(utils.get_sequences_length(batch_response, maxlen=self.max_sentence_len)) batch_response = np.array(pad_sequences(batch_response, padding='post', maxlen=self.max_sentence_len)) feed_dict = {self.utterance_ph: batch_history, self.all_utterance_len_ph: batch_history_len, self.response_ph: batch_response, self.response_len: batch_response_len, } candidate_scores, pred_labels = sess.run([self.y_pred, self.class_label_pred], feed_dict=feed_dict) if first: print(pred_labels) first = False all_candidate_scores.append(candidate_scores[:, 1]) all_pred_labels.append(pred_labels) low = low + batch_size_for_val if low >= len(response): break all_candidate_scores = np.concatenate(all_candidate_scores, axis=0) all_pred_labels = np.concatenate(all_pred_labels, axis=0) return all_candidate_scores,all_pred_labels
def Evaluate(self, sess): with open(ev_path, 'rb') as f: history, true_utt, labels = pickle.load(f) with open(ev_char_path, 'rb') as f: utt_char, true_ch_utt, ch_labels = pickle.load(f) self.all_candidate_scores = [] history, history_len = utils.multi_sequences_padding( history, self.max_sentence_len) history, history_len = np.array(history), np.array(history_len) true_utt_len = np.array( utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array( pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) utt_char = multi_char_sequences_padding(utt_char, 50) true_ch_utt = np.array( pad_sequences(true_ch_utt, padding='post', maxlen=self.max_sentence_len)) low = 0 dro = 0.1 while True: feed_dict = { self.utterance_ph: np.concatenate([history[low:low + 200]], axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + 200]], axis=0), self.response_ph: np.concatenate([true_utt[low:low + 200]], axis=0), self.response_len: np.concatenate([true_utt_len[low:low + 200]], axis=0), self.response_cph: np.concatenate([true_ch_utt[low:low + 200]], axis=0), # todo negs self.utterance_cph: np.concatenate([utt_char[low:low + 200]], axis=0), self.dropout: dro, self.N: 200, self.sample_numbers: 1 } candidate_scores = sess.run(self.y_pred, feed_dict=feed_dict) self.all_candidate_scores.append(candidate_scores[:, 1]) low = low + 200 if low >= history.shape[0]: break all_candidate_scores = np.concatenate(self.all_candidate_scores, axis=0) computeR10_1 = Evaluate.ComputeR10_1(all_candidate_scores, labels) computeR2_1 = Evaluate.ComputeR2_1(all_candidate_scores, labels) return computeR10_1, computeR2_1
def Predict(self, sess, history, es_candidate_list): """ 根据 对话历史(当前知识单轮) 和 es 粗排结果进行精确排 sess single """ history_index = [] for item in history: history_index.append([ word2idx[tmp] if tmp in word2idx.keys() else 0 for tmp in item]) candidate_index = [] for item in es_candidate_list: candidate_index.append([ word2idx[tmp] if tmp in word2idx.keys() else 0 for tmp in item]) history = [history_index] * len(candidate_index) true_utt = candidate_index if mode_debug == True: print('history embedding', history) print('true_utt embedding', true_utt) self.all_candidate_scores = [] history, history_len = utils.multi_sequences_padding(history, self.max_sentence_len) history, history_len = np.array(history), np.array(history_len) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array(pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) if mode_debug == True: print('history embedding padding', history) print('true_utt embedding padding', true_utt) print('history len', history_len) print('true utt len', true_utt_len) low = 0 while True: feed_dict = {self.utterance_ph: np.concatenate([history[low:low + self.batch_size]], axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + self.batch_size]], axis=0), self.response_ph: np.concatenate([true_utt[low:low + self.batch_size]], axis=0), self.response_len: np.concatenate([true_utt_len[low:low + batch_size]], axis=0), } candidate_scores, logits = sess.run([self.y_pred, self.logits], feed_dict=feed_dict) self.all_candidate_scores.append(candidate_scores[:, 1]) # 匹配 if mode_debug == True : print('# logits', logits) print('# candidate socres', candidate_scores) print('all andidate socres', self.all_candidate_scores) low = low + self.batch_size if low >= history.shape[0]: break all_candidate_scores = np.concatenate(self.all_candidate_scores, axis=0) return all_candidate_scores
def TrainModel(self,countinue_train = False, previous_modelpath = "model"): init = tf.global_variables_initializer() saver = tf.train.Saver() merged = tf.summary.merge_all() with tf.Session() as sess: writer = tf.summary.FileWriter("output2", sess.graph) train_writer = tf.summary.FileWriter('output2', sess.graph) with open(response_file, 'rb') as f: actions = pickle.load(f) with open(embedding_file, 'rb') as f: embeddings = pickle.load(f,encoding="bytes") with open(history_file, 'rb') as f: history, true_utt = pickle.load(f) # with open("data/biglearn_test_small.txt", encoding="utf8") as f: # lines = f.readlines() # history, true_utt = utils.build_evaluate_data(lines) history, history_len = utils.multi_sequences_padding(history, self.max_sentence_len) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array(pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) actions_len = np.array(utils.get_sequences_length(actions, maxlen=self.max_sentence_len)) actions = np.array(pad_sequences(actions, padding='post', maxlen=self.max_sentence_len)) history, history_len = np.array(history), np.array(history_len) if countinue_train == False: sess.run(init) sess.run(self.embedding_init, feed_dict={self.embedding_ph: embeddings}) else: saver.restore(sess,previous_modelpath) low = 0 epoch = 1 while epoch < 10: n_sample = min(low + self.batch_size, history.shape[0]) - low negative_indices = [np.random.randint(0, actions.shape[0], n_sample) for _ in range(self.negative_samples)] negs = [actions[negative_indices[i], :] for i in range(self.negative_samples)] negs_len = [actions_len[negative_indices[i]] for i in range(self.negative_samples)] feed_dict = {self.utterance_ph: np.concatenate([history[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.response_ph: np.concatenate([true_utt[low:low + n_sample]] + negs, axis=0), self.response_len: np.concatenate([true_utt_len[low:low + n_sample]] + negs_len, axis=0), self.y_true: np.concatenate([np.ones(n_sample)] + [np.zeros(n_sample)] * self.negative_samples, axis=0) } _, summary = sess.run([self.train_op, merged], feed_dict=feed_dict) train_writer.add_summary(summary) low += n_sample if low % 102400 == 0: print("loss",sess.run(self.total_loss, feed_dict=feed_dict)) self.Evaluate(sess) if low >= history.shape[0]: low = 0 saver.save(sess,"model/model.{0}".format(epoch)) print(sess.run(self.total_loss, feed_dict=feed_dict)) print('epoch={i}'.format(i=epoch)) epoch += 1
def Evaluate(self, test_path, model_path): saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.4 # 只分配40%的显存 with open(test_path, 'rb') as f: val_history, val_response, val_labels = pickle.load(f) first = True with tf.Session(config=config) as sess: saver.restore(sess, model_path) all_candidate_scores = [] all_pred_labels = [] low = 0 batch_size_for_val = 2000 while True: batch_history = self.copy_list(val_history[low:low + batch_size_for_val]) batch_history, batch_history_len = utils.multi_sequences_padding( batch_history, self.max_sentence_len) batch_history, batch_history_len = np.array( batch_history), np.array(batch_history_len) batch_response = self.copy_list( val_response[low:low + batch_size_for_val]) batch_response_len = np.array( utils.get_sequences_length(batch_response, maxlen=self.max_sentence_len)) batch_response = np.array( pad_sequences(batch_response, padding='post', maxlen=self.max_sentence_len)) feed_dict = { self.utterance_ph: batch_history, self.all_utterance_len_ph: batch_history_len, self.response_ph: batch_response, self.response_len: batch_response_len, } candidate_scores, pred_labels = sess.run( [self.y_pred, self.class_label_pred], feed_dict=feed_dict) if first: print(pred_labels) first = False all_candidate_scores.append(candidate_scores[:, 1]) all_pred_labels.append(pred_labels) low = low + batch_size_for_val if low >= len(val_labels): break all_candidate_scores = np.concatenate(all_candidate_scores, axis=0) all_pred_labels = np.concatenate(all_pred_labels, axis=0) return Evaluate.precision_of_classification( all_pred_labels, val_labels), Evaluate.mrr_and_rnk(all_candidate_scores, val_labels, response_num_per_query=11)
def Predict(self, sess, single_history, true_utt_list): tmp_single_history = [] for tmp_history in single_history: tmp_single_history.append([ word2idx[tmp] if tmp in word2idx.keys() else word2idx['_UNK'] for tmp in tmp_history ]) tmp_true_utt_list = [] for tmp_utt in true_utt_list: tmp_true_utt_list.append([ word2idx[tmp] if tmp in word2idx.keys() else word2idx['_UNK'] for tmp in tmp_utt ]) history = [tmp_single_history] * len(true_utt_list) true_utt = tmp_true_utt_list self.all_candidate_scores = [] history, history_len = utils.multi_sequences_padding( history, self.max_sentence_len) history, history_len = np.array(history), np.array(history_len) true_utt_len = np.array( utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array( pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) low = 0 while True: feed_dict = { self.utterance_ph: np.concatenate([history[low:low + 200]], axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + 200]], axis=0), self.response_ph: np.concatenate([true_utt[low:low + 200]], axis=0), self.response_len: np.concatenate([true_utt_len[low:low + 200]], axis=0), } candidate_scores = sess.run(self.y_pred, feed_dict=feed_dict) self.all_candidate_scores.append(candidate_scores[:, 1]) low = low + 200 if low >= history.shape[0]: break all_candidate_scores = np.concatenate(self.all_candidate_scores, axis=0) return all_candidate_scores
def Evaluate(self,sess): with open(evaluate_file, 'rb') as f: history, true_utt,labels = pickle.load(f) self.all_candidate_scores = [] history, history_len = utils.multi_sequences_padding(history, self.max_sentence_len) history, history_len = np.array(history), np.array(history_len) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array(pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) low = 0 while True: feed_dict = {self.utterance_ph: np.concatenate([history[low:low + 200]], axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + 200]], axis=0), self.response_ph: np.concatenate([true_utt[low:low + 200]], axis=0), self.response_len: np.concatenate([true_utt_len[low:low + 200]], axis=0), } candidate_scores = sess.run(self.y_pred, feed_dict=feed_dict) self.all_candidate_scores.append(candidate_scores[:, 1]) low = low + 200 if low >= history.shape[0]: break all_candidate_scores = np.concatenate(self.all_candidate_scores, axis=0) Evaluate.ComputeR10_1(all_candidate_scores,labels) Evaluate.ComputeR2_1(all_candidate_scores,labels)
def TrainModel(self, countinue_train = False, previous_modelpath = "model"): """ 读取数据 """ with open(data_file, mode='rb') as f: results = pickle.load(f) print('read data done') history, true_utt, false_utt = results['history'], results['true_utt'], results['false_utt'] history, history_len = utils.multi_sequences_padding(history, self.max_sentence_len) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array(pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) false_utt_len = np.array(utils.get_sequences_length(false_utt, maxlen=self.max_sentence_len)) false_utt = np.array(pad_sequences(false_utt, padding='post', maxlen=self.max_sentence_len)) history, history_len = np.array(history), np.array(history_len) print('data index and padding done') print('history top 10', history[:10]) print('true top 10 ', true_utt[:10]) print('false top 10', false_utt[:10]) print('his len top 10', history_len[:10]) print('true len top 10', true_utt_len[:10]) print('false len top 10', false_utt_len[:10]) import random randnum = random.randint(0,100) random.seed(randnum) random.shuffle(history) random.seed(randnum) random.shuffle(history_len) random.seed(randnum) random.shuffle(true_utt) random.seed(randnum) random.shuffle(true_utt_len) random.seed(randnum) random.shuffle(false_utt) random.seed(randnum) random.shuffle(false_utt_len) """ 参数初始化 """ low = 0 epoch = 1 all_samples_len = len(true_utt) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: if countinue_train == False: sess.run(init) sess.run(self.embedding_init, feed_dict={self.embedding_ph: embeddings}) else: saver.restore(sess,previous_modelpath) while epoch < self.epoch: n_sample = min(low + self.batch_size, history.shape[0]) - low negative_indices = [np.random.randint(0, false_utt.shape[0], n_sample) for _ in range(self.negative_samples)] negs = np.asarray([false_utt[negative_indices[i]] for i in range(self.negative_samples)]) negs_len = np.asarray([false_utt_len[negative_indices[i]] for i in range(self.negative_samples)]) negs = np.reshape(negs, (n_sample, -1)) negs_len = np.reshape(negs_len, (n_sample)) #print('negs shape', negs.shape) #print('negs len shape', negs_len.shape) #print('true utt shape', true_utt.shape) feed_dict = {self.utterance_ph: np.concatenate([history[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.response_ph: np.concatenate([true_utt[low:low + n_sample], negs], axis=0), self.response_len: np.concatenate([true_utt_len[low:low + n_sample], negs_len], axis=0), self.y_true: np.concatenate([np.ones(n_sample)] + [np.zeros(n_sample)] * self.negative_samples, axis=0) } _, logits, loss, prob = sess.run([self.train_op, self.logits, self.total_loss, self.y_pred], feed_dict=feed_dict) low += n_sample if low >= history.shape[0]: print(" ######## epoch:{}, 进度:{:.2f}%, total_loss:{:.4f}".format(epoch, low/all_samples_len*100, loss)) print(logits[:4]) print(prob[:4]) low = 0 saver.save(sess,"model/model.{0}".format(epoch)) epoch += 1 """ save pb model """ pb_dir = './model' from tensorflow.python.framework import graph_util # print(sess) # print(sess.graph_def) trained_graph = graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=['logits/preds']) tf.train.write_graph(trained_graph, pb_dir, 'model-' + str(epoch) + ".pb", as_text=False)
def TrainModel(self, countinue_train=False, previous_modelpath="model"): print("preprocessing .. train model ") f_write_loss = open('result_log_2.txt', 'w') init = tf.global_variables_initializer() saver = tf.train.Saver() merged = tf.summary.merge_all() with tf.Session() as sess: writer = tf.summary.FileWriter("output_mini", sess.graph) train_writer = tf.summary.FileWriter('output_mini', sess.graph) print("open file .....") with open(re_path, 'rb') as f: actions = pickle.load(f) with open(word_embeddings_path, 'rb') as f: embeddings = pickle.load(f, encoding="bytes") with open(char_embeddings_path, 'rb') as f: charembeddings = pickle.load(f, encoding="bytes") with open(utt_path, 'rb') as f: history, true_utt = pickle.load(f) with open(re_char_path, 'rb') as f: ch_actions = pickle.load(f) with open(utt_char_path, 'rb') as f: utt_char, true_ch_utt = pickle.load(f) # with open("data/biglearn_test_small.txt", encoding="utf8") as f: # lines = f.readlines() # history, true_utt = utils.build_evaluate_data(lines) print("np file padding .....") history, history_len = utils.multi_sequences_padding( history, self.max_sentence_len) utt_char = multi_char_sequences_padding(utt_char, 50) true_utt_len = np.array( utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array( pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) true_ch_utt = np.array( pad_sequences(true_ch_utt, padding='post', maxlen=self.max_sentence_len)) actions_len = np.array( utils.get_sequences_length(actions, maxlen=self.max_sentence_len)) actions = np.array( pad_sequences(actions, padding='post', maxlen=self.max_sentence_len)) ch_actions = np.array( pad_sequences(ch_actions, padding='post', maxlen=self.max_sentence_len)) history, history_len = np.array(history), np.array(history_len) if countinue_train == False: sess.run(init) sess.run(self.embedding_init, feed_dict={self.embedding_ph: embeddings}) sess.run(self.char_embeddings_init, feed_dict={self.embedding_cph: charembeddings}) else: saver.restore(sess, previous_modelpath) low = 0 epoch = 1 dro = 0.1 print("starting epoch ....") while epoch < 10: n_sample = min(low + self.batch_size, history.shape[0]) - low # sess.run(self.N, feed_dict={self.N: n_sample}) negative_indices = [ np.random.randint(0, actions.shape[0], n_sample) for _ in range(self.negative_samples) ] negs = [ actions[negative_indices[i], :] for i in range(self.negative_samples) ] negs_ch = [ ch_actions[negative_indices[i], :] for i in range(self.negative_samples) ] negs_len = [ actions_len[negative_indices[i]] for i in range(self.negative_samples) ] # print("feed_dict ......") feed_dict = { self.utterance_ph: np.concatenate([history[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.all_utterance_len_ph: np.concatenate([history_len[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.response_ph: np.concatenate([true_utt[low:low + n_sample]] + negs, axis=0), self.response_len: np.concatenate([true_utt_len[low:low + n_sample]] + negs_len, axis=0), self.y_true: np.concatenate( [np.ones(n_sample)] + [np.zeros(n_sample)] * self.negative_samples, axis=0), self.response_cph: np.concatenate([true_ch_utt[low:low + n_sample]] + negs_ch, axis=0), # todo negs self.utterance_cph: np.concatenate([utt_char[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.dropout: dro, self.N: n_sample, self.sample_numbers: self.negative_samples + 1 } # print("starting run model .....") _, summary = sess.run([self.train_op, merged], feed_dict=feed_dict) train_writer.add_summary(summary) low += n_sample if low % 102400 == 0: # print("loss",sess.run(self.total_loss, feed_dict=feed_dict)) w_loss = sess.run(self.total_loss, feed_dict=feed_dict) print(w_loss) f_write_loss.write("loss: ") f_write_loss.write(str(w_loss)) f_write_loss.write("\n") computeR10_1, computeR2_1 = self.Evaluate(sess) # print(computeR10_1) # print(computeR2_1) f_write_loss.write("computeR10_1: ") f_write_loss.write(str(computeR10_1)) f_write_loss.write("\n") f_write_loss.write("computeR2_1: ") f_write_loss.write(str(computeR2_1)) f_write_loss.write("\n") # print("eva end......") # break if low >= history.shape[0]: low = 0 saver.save(sess, "model_1/model.{0}".format(epoch)) # print(sess.run(self.total_loss, feed_dict=feed_dict)) h_loss = sess.run(self.total_loss, feed_dict=feed_dict) print(h_loss) f_write_loss.write("h_loss: ") f_write_loss.write(str(h_loss)) f_write_loss.write("\n") print('epoch={i}'.format(i=epoch)) f_write_loss.write("epoch: ") f_write_loss.write(str(epoch)) f_write_loss.write("\n") epoch += 1 f_write_loss.close()
def train_model_with_fixed_data(self, file_src_dict, response_num=3, continue_train=False, previous_model_path="model"): init = tf.global_variables_initializer() saver = tf.train.Saver() merged = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 1.0 # 只分配40%的显存 # prepare data for val: with open(evaluate_file, 'rb') as f: val_history, val_response, val_labels = pickle.load(f) val_history, val_history_len = utils.multi_sequences_padding(val_history, self.max_sentence_len) val_history, val_history_len = np.array(val_history), np.array(val_history_len) val_response_len = np.array(utils.get_sequences_length(val_response, maxlen=self.max_sentence_len)) val_response = np.array(pad_sequences(val_response, padding='post', maxlen=self.max_sentence_len)) val_data = [val_history, val_history_len, val_response, val_response_len, val_labels] with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter('output2', sess.graph) # prepare data for train: with open(file_src_dict['train_file'], 'rb') as f: history, responses, labels = pickle.load(f) history, history_len = utils.multi_sequences_padding(history, self.max_sentence_len) responses_len = np.array(utils.get_sequences_length(responses, maxlen=self.max_sentence_len)) responses = np.array(pad_sequences(responses, padding='post', maxlen=self.max_sentence_len)) history, history_len = np.array(history), np.array(history_len) if continue_train is False: sess.run(init) sess.run(self.embedding_init, feed_dict={self.embedding_ph: self.embedding}) else: saver.restore(sess, previous_model_path) low = 0 epoch = 1 start_time = time.time() sess.graph.finalize() best_score=100 while epoch < 10: # low means the start location of the array of data should be feed in next # n_samples means how many group-samples will be feed in next time # one group-samples means one context and its true response and some neg responses n_sample = min(low + self.batch_size * response_num, history.shape[0]) - low feed_dict = { self.utterance_ph: np.array(history[low:low + n_sample]), self.all_utterance_len_ph: np.array(history_len[low:low + n_sample]), self.response_ph: np.array(responses[low:low + n_sample]), self.response_len: np.array(responses_len[low:low + n_sample]), self.y_true: np.array(labels[low:low + n_sample]) } _, summary = sess.run([self.train_op, merged], feed_dict=feed_dict) train_writer.add_summary(summary) low += n_sample if low % (self.batch_size * self.print_batch) == 0: time_dif = self.__get_time_dif(start_time) r10_1,loss=self.evaluate_val_for_train(sess, val_data) if best_score>loss: best_score=loss saver.save(sess, "model/model_best.{0}".format(low)) print("train loss:", sess.run(self.total_loss, feed_dict=feed_dict), "; val evaluation:",r10_1 ,loss, "time:", time_dif) if low >= history.shape[0]: # 即low>=total conversations number low = 0 saver.save(sess, "model/model.{0}".format(epoch)) print(sess.run(self.total_loss, feed_dict=feed_dict)) print('epoch={i}'.format(i=epoch), 'ended') epoch += 1
def train_model_with_random_sample(self, continue_train=False, previous_model_path="model"): init = tf.global_variables_initializer() saver = tf.train.Saver() merged = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.per_process_gpu_memory_fraction = 0.7 # 只分配40%的显存 # prepare data for val: with open(evaluate_file, 'rb') as f: val_history, val_response, val_labels = pickle.load(f) with tf.Session(config=config) as sess: train_writer = tf.summary.FileWriter('output2', sess.graph) # prepare data for train: with open(response_file, 'rb') as f: actions = pickle.load(f) # action is a list of response-candidates with open(history_file, 'rb') as f: # history is a 3d-list.1d:samples;2d:one utterance if a sample;3d:one word of a utterance # true_utt is a 2d-list.1d:sample;2d the true response of the sample history, true_utt = pickle.load(f) actions=self.copy_list(actions) actions_len = np.array(utils.get_sequences_length(actions, maxlen=self.max_sentence_len)) actions = np.array(pad_sequences(actions, padding='post', maxlen=self.max_sentence_len)) history, history_len = utils.multi_sequences_padding(history, self.max_sentence_len) true_utt_len = np.array(utils.get_sequences_length(true_utt, maxlen=self.max_sentence_len)) true_utt = np.array(pad_sequences(true_utt, padding='post', maxlen=self.max_sentence_len)) history, history_len = np.array(history), np.array(history_len) if continue_train == False: sess.run(init) sess.run(self.embedding_init, feed_dict={self.embedding_ph: self.embedding}) else: saver.restore(sess, previous_model_path) low = 0 epoch = 1 start_time = time.time() sess.graph.finalize() best_score=100 while epoch < 10: # low means the start location of the array of data should be feed in next # n_samples means how many group-samples will be feed in next time # one group-samples means one context and its true response and some neg responses n_sample = min(low + self.batch_size, history.shape[0]) - low # negative_samples means the num of neg for one context # negative_indices is a 2d-list(negative_samples*n_sample) negative_indices = [np.random.randint(0, actions.shape[0], n_sample) for _ in range(self.negative_samples)] # # negs's shape is negative_samples*n_sample*sentence_max_len negs = [actions[negative_indices[i], :] for i in range(self.negative_samples)] negs_len = [actions_len[negative_indices[i]] for i in range(self.negative_samples)] feed_dict = { self.utterance_ph: np.concatenate([history[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.all_utterance_len_ph: np.concatenate( [history_len[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.response_ph: np.concatenate([true_utt[low:low + n_sample]] + negs, axis=0), self.response_len: np.concatenate([true_utt_len[low:low + n_sample]] + negs_len, axis=0), self.y_true: np.concatenate([np.ones(n_sample)] + [np.zeros(n_sample)] * self.negative_samples, axis=0) } _, summary = sess.run([self.train_op, merged], feed_dict=feed_dict) train_writer.add_summary(summary) low += n_sample if low % (self.batch_size * self.print_batch) == 0: time_dif = self.__get_time_dif(start_time) r10_1,loss=self.evaluate_val_for_train(sess, [val_history, val_response, val_labels]) if best_score>loss: best_score=loss saver.save(sess, "model/model_best.{0}".format(low)) print("train loss:", sess.run(self.total_loss, feed_dict=feed_dict), "; val evaluation:",r10_1, "time:", time_dif) print('loss',loss) if low >= history.shape[0]: # 即low>=total conversations number low = 0 saver.save(sess, "model/model.{0}".format(epoch)) print(sess.run(self.total_loss, feed_dict=feed_dict)) print('epoch={i}'.format(i=epoch), 'ended') epoch += 1
def TrainModel(self, continue_train=False, previous_modelpath="model"): init = tf.global_variables_initializer() saver = tf.train.Saver() merged = tf.summary.merge_all() with tf.Session() as sess: train_writer = tf.summary.FileWriter('output', sess.graph) with open(embedding_file, 'rb') as f: embeddings = pickle.load(f, encoding="bytes") with open(utterance_file, 'rb') as f: utterances, last_utterance = pickle.load(f) with open(response_file, 'rb') as f: responses = pickle.load(f) utterances, utterances_len = utils.multi_sequences_padding( utterances, self.max_num_utterance, self.max_sentence_len) last_utterance_len = np.array( utils.get_sequences_length(last_utterance, self.max_sentence_len)) last_utterance = np.array( pad_sequences(last_utterance, padding='post', maxlen=self.max_sentence_len)) responses_len = np.array( utils.get_sequences_length(responses, self.max_sentence_len)) responses = np.array( pad_sequences(responses, padding='post', maxlen=self.max_sentence_len)) utterances, utterances_len = np.array(utterances), np.array( utterances_len) if continue_train == False: sess.run(init) sess.run(self.embedding_init, feed_dict={self.embedding_ph: embeddings}) else: saver.restore(sess, previous_modelpath) low = 0 epoch = 1 while epoch < self.max_epoch: n_sample = min(low + self.batch_size, utterances.shape[0]) - low negative_indices = [ np.random.randint(0, responses.shape[0], n_sample) for _ in range(self.negative_samples) ] negs = [ responses[negative_indices[i], :] for i in range(self.negative_samples) ] negs_len = [ responses_len[negative_indices[i]] for i in range(self.negative_samples) ] feed_dict = { self.utterance_ph: np.concatenate([utterances[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.all_utterance_len_ph: np.concatenate([utterances_len[low:low + n_sample]] * (self.negative_samples + 1), axis=0), self.response_ph: np.concatenate([last_utterance[low:low + n_sample]] + negs, axis=0), self.response_len_ph: np.concatenate([last_utterance_len[low:low + n_sample]] + negs_len, axis=0), self.y_true: np.concatenate( [np.ones(n_sample)] + [np.zeros(n_sample)] * self.negative_samples, axis=0) } _, summary = sess.run([self.train_op, merged], feed_dict=feed_dict) train_writer.add_summary(summary) low += n_sample if low % 102400 == 0: print("loss", sess.run(self.total_loss, feed_dict=feed_dict)) self.Evaluate(sess) if low >= utterances.shape[0]: low = 0 saver.save(sess, "model/model.{0}".format(epoch)) print(sess.run(self.total_loss, feed_dict=feed_dict)) print('epoch={i}'.format(i=epoch)) epoch += 1