def dev_step(): results = defaultdict(list) num_test = 0 num_correct = 0.0 valid_batches = data_helpers.batch_iter(valid_dataset, FLAGS.batch_size, 1, target_loss_weight, FLAGS.max_utter_len, FLAGS.max_utter_num, FLAGS.max_response_len, charVocab, FLAGS.max_word_length, shuffle=True) for valid_batch in valid_batches: x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs, x_u_char, x_u_char_len, x_r_char, x_r_char_len = valid_batch feed_dict = { imn.utterances: x_utterances, imn.response: x_response, imn.utterances_len: x_utterances_len, imn.response_len: x_response_len, imn.utters_num: x_utters_num, imn.target: x_target, imn.target_loss_weight: x_target_weight, imn.dropout_keep_prob: 1.0, imn.u_charVec: x_u_char, imn.u_charLen: x_u_char_len, imn.r_charVec: x_r_char, imn.r_charLen: x_r_char_len, } batch_accuracy, predicted_prob = sess.run( [imn.accuracy, imn.probs], feed_dict) num_test += len(predicted_prob) if num_test % 1000 == 0: print(num_test) num_correct += len(predicted_prob) * batch_accuracy for i, prob_score in enumerate(predicted_prob): question_id, response_id, label = id_pairs[i] results[question_id].append( (response_id, label, prob_score)) #calculate top-1 precision print('num_test_samples: {} test_accuracy: {}'.format( num_test, num_correct / num_test)) accu, precision, recall, f1, loss = metrics.classification_metrics( results) print('Accuracy: {}, Precision: {} Recall: {} F1: {} Loss: {}'. format(accu, precision, recall, f1, loss)) mvp = metrics.mean_average_precision(results) mrr = metrics.mean_reciprocal_rank(results) top_1_precision = metrics.top_1_precision(results) total_valid_query = metrics.get_num_valid_query(results) print( 'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}' .format(mvp, mrr, top_1_precision, total_valid_query)) return mrr
def dev_step(): results = defaultdict(list) num_test = 0 num_correct = 0.0 valid_batches = data_helpers.batch_iter(valid_dataset, FLAGS.batch_size, 1, FLAGS.max_utter_num, FLAGS.max_utter_len, \ FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \ charVocab, FLAGS.max_word_length, shuffle=True) for valid_batch in valid_batches: x_utterances, x_utterances_len, x_response, x_response_len, \ x_utters_num, x_target, x_ids, \ x_u_char, x_u_char_len, x_r_char, x_r_char_len, \ x_personas, x_personas_len, x_p_char, x_p_char_len, x_personas_num = valid_batch feed_dict = { dim.utterances: x_utterances, dim.utterances_len: x_utterances_len, dim.responses: x_response, dim.responses_len: x_response_len, dim.utters_num: x_utters_num, dim.target: x_target, dim.dropout_keep_prob: 1.0, dim.u_charVec: x_u_char, dim.u_charLen: x_u_char_len, dim.r_charVec: x_r_char, dim.r_charLen: x_r_char_len, dim.personas: x_personas, dim.personas_len: x_personas_len, dim.p_charVec: x_p_char, dim.p_charLen: x_p_char_len, dim.personas_num: x_personas_num } batch_accuracy, predicted_prob = sess.run([dim.accuracy, dim.probs], feed_dict) num_test += len(predicted_prob) if num_test % 1000 == 0: print(num_test) num_correct += len(predicted_prob) * batch_accuracy # predicted_prob = [batch_size, max_response_num] for i in range(len(predicted_prob)): probs = predicted_prob[i] us_id = x_ids[i] label = x_target[i] labels = np.zeros(FLAGS.max_response_num) labels[label] = 1 for r_id, prob in enumerate(probs): results[us_id].append((str(r_id), labels[r_id], prob)) #calculate top-1 precision print('num_test_samples: {} test_accuracy: {}'.format(num_test, num_correct/num_test)) accu, precision, recall, f1, loss = metrics.classification_metrics(results) print('Accuracy: {}, Precision: {} Recall: {} F1: {} Loss: {}'.format(accu, precision, recall, f1, loss)) mvp = metrics.mean_average_precision(results) mrr = metrics.mean_reciprocal_rank(results) top_1_precision = metrics.top_1_precision(results) total_valid_query = metrics.get_num_valid_query(results) print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(mvp, mrr, top_1_precision, total_valid_query)) return mrr
def dev_step(): results = [] num_test = 0 num_correct = 0.0 valid_batches = data_helpers.batch_iter(valid_dataset, FLAGS.batch_size, 1, SEQ_LEN, shuffle=True) for valid_batch in valid_batches: x_question, x_answer, x_question_len, x_answer_len, x_lastTurn, x_lastTurn_len, q_id_list, as_id_list, x_target = valid_batch feed_dict = { esim.question: x_question, esim.answer: x_answer, esim.question_len: x_question_len, esim.answer_len: x_answer_len, esim.target: x_target, esim.dropout_keep_prob: 1.0, esim.lastTurn: x_lastTurn, esim.lastTurn_len: x_lastTurn_len } batch_accuracy, predicted_prob = sess.run([esim.accuracy, esim.probs], feed_dict) num_test += len(predicted_prob) if num_test % 10 == 0: print(num_test) num_correct += len(predicted_prob) * batch_accuracy results.append( (predicted_prob, x_target) ) probs_list = [] labels_list = [] for probs, labels in results: probs_list.append(probs) labels_list.append(labels) probs_aggre = np.concatenate(probs_list, axis=0) labels_aggre = np.concatenate(labels_list, axis=0) #calculate top-1 precision print('num_test_samples: {} test_accuracy: {}'.format(num_test, num_correct/num_test)) recall, mrr = recall_metrics.compute_recall(probs_aggre, labels_aggre) print('recall@1: {}, recall@2: {}, recall@5: {}, recall@10: {}'.format(recall['@1'], recall['@2'], recall['@5'], recall['@10'])) return recall['@1']
def check_step(dataset, shuffle=False): results = defaultdict(list) num_test = 0 num_correct = 0.0 batches = data_helpers.batch_iter(dataset, FLAGS.batch_size, 1, idf, SEQ_LEN, shuffle=shuffle) for batch in batches: x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \ targets, extra_feature, p_features, h_features = batch feed_dict = { esim.premise: x_premise, esim.hypothesis: x_hypothesis, esim.premise_len: x_premise_len, esim.hypothesis_len: x_hypothesis_len, esim.target: targets, esim.dropout_keep_prob: 1.0, esim.extra_feature: extra_feature, esim.p_word_feature: p_features, esim.h_word_feature: h_features } batch_accuracy, predicted_prob = sess.run( [esim.accuracy, esim.probs], feed_dict) num_test += len(predicted_prob) if num_test % 1000 == 0: print(num_test) num_correct += len(predicted_prob) * batch_accuracy # calculate Accuracy acc = num_correct / num_test print('num_test_samples: {} accuracy: {}'.format(num_test, acc)) return acc
r_char_feature = graph.get_operation_by_name( "response_char").outputs[0] r_char_len = graph.get_operation_by_name( "response_char_len").outputs[0] # Tensors we want to evaluate prob = graph.get_operation_by_name("prediction_layer/prob").outputs[0] results = defaultdict(list) num_test = 0 test_batches = data_helpers.batch_iter(test_dataset, FLAGS.batch_size, 1, target_loss_weight, FLAGS.max_utter_len, FLAGS.max_utter_num, FLAGS.max_response_len, charVocab, FLAGS.max_word_length, shuffle=False) for test_batch in test_batches: x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, \ x_target, x_target_weight, id_pairs, \ x_u_char, x_u_char_len, x_r_char, x_r_char_len = test_batch feed_dict = { utterances: x_utterances, response: x_response, utterances_len: x_utterances_len, response_len: x_response_len, utterances_num: x_utters_num, dropout_keep_prob: 1.0,
print('Accuracy: {}, Precision: {} Recall: {} F1: {} Loss: {}'. format(accu, precision, recall, f1, loss)) mvp = metrics.mean_average_precision(results) mrr = metrics.mean_reciprocal_rank(results) top_1_precision = metrics.top_1_precision(results) total_valid_query = metrics.get_num_valid_query(results) print( 'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}' .format(mvp, mrr, top_1_precision, total_valid_query)) return mrr best_mrr = 0.0 batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, FLAGS.max_utter_num, FLAGS.max_utter_len, \ FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \ charVocab, FLAGS.max_word_length, shuffle=True) for batch in batches: x_utterances, x_utterances_len, x_response, x_response_len, \ x_utters_num, x_target, x_ids, \ x_u_char, x_u_char_len, x_r_char, x_r_char_len, \ x_personas, x_personas_len, x_p_char, x_p_char_len, x_personas_num = batch train_step(x_utterances, x_utterances_len, x_response, x_response_len, x_utters_num, x_target, x_ids, x_u_char, x_u_char_len, x_r_char, x_r_char_len, x_personas, x_personas_len, x_p_char, x_p_char_len, x_personas_num) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") valid_mrr = dev_step() if valid_mrr > best_mrr:
personas = graph.get_operation_by_name("personas").outputs[0] personas_len = graph.get_operation_by_name("personas_len").outputs[0] personas_num = graph.get_operation_by_name("personas_num").outputs[0] p_char_feature = graph.get_operation_by_name("personas_char").outputs[0] p_char_len = graph.get_operation_by_name("personas_char_len").outputs[0] dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0] # Tensors we want to evaluate pred_prob = graph.get_operation_by_name("prediction_layer/prob").outputs[0] results = defaultdict(list) num_test = 0 test_batches = data_helpers.batch_iter(test_dataset, FLAGS.batch_size, 1, FLAGS.max_utter_num, FLAGS.max_utter_len, \ FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \ charVocab, FLAGS.max_word_length, shuffle=False) for test_batch in test_batches: x_utterances, x_utterances_len, x_utterances_num, x_u_char, x_u_char_len, \ x_response, x_response_len, x_r_char, x_r_char_len, \ x_personas, x_personas_len, x_personas_num, x_p_char, x_p_char_len, \ x_target, x_ids = test_batch feed_dict = { utterances: x_utterances, utterances_len: x_utterances_len, responses: x_response, responses_len: x_response_len, utterances_num: x_utterances_num, dropout_keep_prob: 1.0, u_char_feature: x_u_char,
question_len_x = graph.get_operation_by_name("question_len").outputs[0] answer_len_x = graph.get_operation_by_name("answer_len").outputs[0] lastTurn_len_x = graph.get_operation_by_name("lastTurn_len").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate prob = graph.get_operation_by_name("convolution-1/prob").outputs[0] results = [] num_test = 0 test_batches = data_helpers.batch_iter(test_dataset, FLAGS.batch_size, 1, SEQ_LEN, shuffle=False) for test_batch in test_batches: x_question, x_answer, x_question_len, x_answer_len, x_lastTurn, x_lastTurn_len, x_q_id_list, x_as_id_list, x_target = test_batch feed_dict = { question_x: x_question, answer_x: x_answer, lastTurn_x: x_lastTurn, question_len_x: x_question_len, answer_len_x: x_answer_len, lastTurn_len_x: x_lastTurn_len, dropout_keep_prob: 1.0 } predicted_prob = sess.run(prob, feed_dict) num_test += len(predicted_prob)
def check_step(dataset, shuffle, is_test=False, path=None): results = defaultdict(list) num_test = 0 num_correct = 0.0 conv_correct = 0.0 if is_test: file = open(path, 'w') valid_batches = data_helpers.batch_iter(dataset, FLAGS.batch_size, 1, target_loss_weight, FLAGS.max_utter_len, FLAGS.max_utter_num, charVocab, FLAGS.max_word_length, shuffle=shuffle) for valid_batch in valid_batches: x_utterances, x_utterances_len, x_utterances_num, x_utterances_char, x_utterances_char_len, x_target, x_id, x_target_weight, dialogue_label = valid_batch feed_dict = { model.utterances: x_utterances, model.utterances_len: x_utterances_len, model.utterances_num: x_utterances_num, model.target: x_target, model.target_loss_weight: x_target_weight, model.dropout_keep_prob: 1.0, model.u_charVec: x_utterances_char, model.u_charLen: x_utterances_char_len, model.dialogue_label: dialogue_label, } batch_accuracy, predicted_prob, conv_acc = sess.run( [model.accuracy, model.probs, model.conv_acc], feed_dict) num_test += len(predicted_prob) if num_test % 100000 == 0: print(num_test) # method 1 conv_correct += len(dialogue_lable) * conv_acc # method 2 predicted_target = np.argmax( predicted_prob, axis=2) # [batch_size, max_utter_num] for i in range(len(predicted_prob)): i_utterances_num = x_utterances_num[i] i_predicted_target = predicted_target[i][:i_utterances_num] i_target = x_target[i][:i_utterances_num] if np.sum((i_predicted_target == i_target ).astype(int)) == x_utterances_num[i]: num_correct += 1 if is_test: for i in range(len(x_id)): x_id_ = x_id[i] i_utterances_num = x_utterances_num[i] for j in range(i_utterances_num): i_predicted_target = predicted_target[i][j] i_target = x_target[i][j] file.write(str(x_id_)) file.write('\t') file.write(str(i_utterances_num)) file.write('\t') file.write(str(i_predicted_target)) file.write('\t') file.write(str(i_target)) file.write('\n') # calculate Accuracy acc = num_correct / num_test cov_acc = conv_correct / num_test print('num_test_samples: {} accuracy: {} \n'.format( num_test, acc)) print('conversation accuracy: {} \n'.format(cov_acc)) if is_test: file.close() return acc
acc = num_correct / num_test cov_acc = conv_correct / num_test print('num_test_samples: {} accuracy: {} \n'.format( num_test, acc)) print('conversation accuracy: {} \n'.format(cov_acc)) if is_test: file.close() return acc EPOCH = 0 best_acc = 0.0 batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, target_loss_weight, FLAGS.max_utter_len, FLAGS.max_utter_num, charVocab, FLAGS.max_word_length, shuffle=True) for batch in batches: x_utterances, x_utterances_len, x_utterances_num, x_utterances_char, x_utterances_char_len, x_target, x_id, x_target_weight, dialogue_label = batch train_step(x_utterances, x_utterances_len, x_utterances_num, x_utterances_char, x_utterances_char_len, x_target, x_id, x_target_weight, dialogue_label) current_step = tf.train.global_step(sess, global_step) if current_step == 10000: train_op = train_op2 print('change to train_op2') if current_step % FLAGS.evaluate_every == 0: EPOCH += 1
if num_test % 1000 == 0: print(num_test) num_correct += len(predicted_prob) * batch_accuracy # calculate Accuracy acc = num_correct / num_test print('num_test_samples: {} accuracy: {}'.format(num_test, acc)) return acc best_acc = 0.0 EPOCH = 0 batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, idf, SEQ_LEN, shuffle=True) for batch in batches: x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \ targets, extra_feature, p_features, h_features = batch train_step(x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, targets, extra_feature, p_features, h_features) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: EPOCH += 1 print("\nEPOCH: {}".format(EPOCH)) print("Evaluation on dev:") valid_acc = check_step(dev_dataset, shuffle=True) print("\nEvaluation on test:")
results[question_id].append((response_id, label, prob_score)) #calculate top-1 precision print('num_test_samples: {} test_accuracy: {}'.format(num_test, num_correct/num_test)) accu, precision, recall, f1, loss = metrics.classification_metrics(results) print('Accuracy: {}, Precision: {} Recall: {} F1: {} Loss: {}'.format(accu, precision, recall, f1, loss)) mvp = metrics.mean_average_precision(results) mrr = metrics.mean_reciprocal_rank(results) top_1_precision = metrics.top_1_precision(results) total_valid_query = metrics.get_num_valid_query(results) print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(mvp, mrr, top_1_precision, total_valid_query)) return mrr best_mrr = 0.0 batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, target_loss_weight, FLAGS.max_utter_len, FLAGS.max_utter_num, FLAGS.max_response_len, shuffle=True) for batch in batches: x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs = batch train_step(x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") valid_mrr = dev_step() if valid_mrr > best_mrr: best_mrr = valid_mrr test_mrr = test_step() path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))