Пример #1
0
        def dev_step():
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            valid_batches = data_helpers.batch_iter(valid_dataset, FLAGS.batch_size, 1, FLAGS.max_utter_num, FLAGS.max_utter_len, \
                                      FLAGS.max_response_num, FLAGS.max_response_len, FLAGS.max_persona_num, FLAGS.max_persona_len, \
                                      charVocab, FLAGS.max_word_length, shuffle=True)
            for valid_batch in valid_batches:
                x_utterances, x_utterances_len, x_response, x_response_len, \
                    x_utters_num, x_target, x_ids, \
                    x_u_char, x_u_char_len, x_r_char, x_r_char_len, \
                    x_personas, x_personas_len, x_p_char, x_p_char_len, x_personas_num = valid_batch
                feed_dict = {
                  dim.utterances: x_utterances,
                  dim.utterances_len: x_utterances_len,
                  dim.responses: x_response,
                  dim.responses_len: x_response_len,
                  dim.utters_num: x_utters_num,
                  dim.target: x_target,
                  dim.dropout_keep_prob: 1.0,
                  dim.u_charVec: x_u_char,
                  dim.u_charLen: x_u_char_len,
                  dim.r_charVec: x_r_char,
                  dim.r_charLen: x_r_char_len,
                  dim.personas: x_personas,
                  dim.personas_len: x_personas_len,
                  dim.p_charVec: x_p_char,
                  dim.p_charLen: x_p_char_len,
                  dim.personas_num: x_personas_num
                }
                batch_accuracy, predicted_prob = sess.run([dim.accuracy, dim.probs], feed_dict)

                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)
                num_correct += len(predicted_prob) * batch_accuracy

                # predicted_prob = [batch_size, max_response_num]
                for i in range(len(predicted_prob)):
                    probs = predicted_prob[i]
                    us_id = x_ids[i]
                    label = x_target[i]
                    labels = np.zeros(FLAGS.max_response_num)
                    labels[label] = 1
                    for r_id, prob in enumerate(probs):
                        results[us_id].append((str(r_id), labels[r_id], prob))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(num_test, num_correct/num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr
Пример #2
0
        def dev_step():
            results = defaultdict(list)
            num_test = 0
            num_correct = 0.0
            valid_batches = data_helpers.batch_iter(valid_dataset,
                                                    FLAGS.batch_size,
                                                    1,
                                                    target_loss_weight,
                                                    FLAGS.max_utter_len,
                                                    FLAGS.max_utter_num,
                                                    FLAGS.max_response_len,
                                                    charVocab,
                                                    FLAGS.max_word_length,
                                                    shuffle=True)
            for valid_batch in valid_batches:
                x_utterances, x_response, x_utterances_len, x_response_len, x_utters_num, x_target, x_target_weight, id_pairs, x_u_char, x_u_char_len, x_r_char, x_r_char_len = valid_batch
                feed_dict = {
                    imn.utterances: x_utterances,
                    imn.response: x_response,
                    imn.utterances_len: x_utterances_len,
                    imn.response_len: x_response_len,
                    imn.utters_num: x_utters_num,
                    imn.target: x_target,
                    imn.target_loss_weight: x_target_weight,
                    imn.dropout_keep_prob: 1.0,
                    imn.u_charVec: x_u_char,
                    imn.u_charLen: x_u_char_len,
                    imn.r_charVec: x_r_char,
                    imn.r_charLen: x_r_char_len,
                }
                batch_accuracy, predicted_prob = sess.run(
                    [imn.accuracy, imn.probs], feed_dict)
                num_test += len(predicted_prob)
                if num_test % 1000 == 0:
                    print(num_test)

                num_correct += len(predicted_prob) * batch_accuracy
                for i, prob_score in enumerate(predicted_prob):
                    question_id, response_id, label = id_pairs[i]
                    results[question_id].append(
                        (response_id, label, prob_score))

            #calculate top-1 precision
            print('num_test_samples: {}  test_accuracy: {}'.format(
                num_test, num_correct / num_test))
            accu, precision, recall, f1, loss = metrics.classification_metrics(
                results)
            print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.
                  format(accu, precision, recall, f1, loss))

            mvp = metrics.mean_average_precision(results)
            mrr = metrics.mean_reciprocal_rank(results)
            top_1_precision = metrics.top_1_precision(results)
            total_valid_query = metrics.get_num_valid_query(results)
            print(
                'MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'
                .format(mvp, mrr, top_1_precision, total_valid_query))

            return mrr
Пример #3
0
            print('num_test_sample={}'.format(num_test))

            for i in range(len(predicted_prob)):
                probs = predicted_prob[i]
                us_id = x_ids[i]
                label = x_target[i]
                labels = np.zeros(FLAGS.max_response_num)
                labels[label] = 1
                for r_id, prob in enumerate(probs):
                    results[us_id].append((str(r_id), labels[r_id], prob))


accu, precision, recall, f1, loss = metrics.classification_metrics(results)
print('Accuracy: {}, Precision: {}  Recall: {}  F1: {} Loss: {}'.format(accu, precision, recall, f1, loss))

mvp = metrics.mean_average_precision(results)
mrr = metrics.mean_reciprocal_rank(results)
top_1_precision = metrics.top_1_precision(results)
total_valid_query = metrics.get_num_valid_query(results)
print('MAP (mean average precision: {}\tMRR (mean reciprocal rank): {}\tTop-1 precision: {}\tNum_query: {}'.format(mvp, mrr, top_1_precision, total_valid_query))

out_path = FLAGS.output_file
print("Saving evaluation to {}".format(out_path))
with open(out_path, 'w') as f:
    f.write("query_id\tdocument_id\tscore\trank\trelevance\n")
    for us_id, v in results.items():
        v.sort(key=operator.itemgetter(2), reverse=True)
        for i, rec in enumerate(v):
            r_id, label, prob_score = rec
            rank = i+1
            f.write('{}\t{}\t{}\t{}\t{}\n'.format(us_id, r_id, prob_score, rank, label))