Пример #1
0
def do_eval(sess, error, placeholder,dev, devtest, curr_best, FLAGS,error_file_name, rel2idx, word2idx):
  feed_dict_dev = feeder.fill_feed_dict(dev, placeholder, rel2idx, 0)
  true_label = feed_dict_dev[placeholder['label_placeholder']]
  pred_error = sess.run(error, feed_dict = feed_dict_dev)
  print('Dev Stats:', end = '')
  thresh, _ = best_threshold(pred_error, true_label)

  #evaluat devtest
  feed_dict_devtest = feeder.fill_feed_dict(devtest, placeholder, rel2idx, 0)
  true_label_devtest = feed_dict_devtest[placeholder['label_placeholder']]
  devtest_he_error = sess.run(error, feed_dict = feed_dict_devtest)

  pred = devtest_he_error <= thresh
  correct = (pred == true_label_devtest)
  accuracy = float(correct.astype('float32').mean())
  wrong_indices = np.logical_not(correct).nonzero()[0]
  wrong_preds = pred[wrong_indices]

  if accuracy>curr_best:
  # #evaluat devtest
    error_file = open(error_file_name+"_test.txt",'wt')
    if FLAGS.rel_acc:
      rel_acc_checker(feed_dict_devtest, placeholder, correct, dev, error_file, rel2idx)

    if FLAGS.error_analysis:
      err_analysis(dev, wrong_indices, feed_dict_devtest, placeholder, error_file, rel2idx, word2idx, devtest_he_error)

  return accuracy
Пример #2
0
def accuracy_eval(sess, error, placeholder, data_set, rel2idx, FLAGS, error_file_name):
    feed_dict = feeder.fill_feed_dict(data_set, placeholder, rel2idx, 0)
    true_label = feed_dict[placeholder['label_placeholder']]
    pred_error = sess.run(error, feed_dict=feed_dict)
    _, acc = best_f1_threshold(pred_error, true_label)
    print('auc', calc_auc(pred_error, true_label))
    return acc
Пример #3
0
def dev_eval(sess, error, placeholder, data_set, rel2idx, FLAGS, error_file_name):
    feed_dict = feeder.fill_feed_dict(data_set, placeholder, rel2idx, 0)
    true_label = feed_dict[placeholder['label_placeholder']]
    pred_error = sess.run(error, feed_dict=feed_dict)
    pred_prob = np.exp(-1 * np.asarray(pred_error))
    pred_prob = np.clip(pred_prob, 0, 1)

    kldiv_mean = kl_divergence_batch(pred_prob, true_label)
    # pears_corr = np.corrcoef(pred_prob, true_label)[0,1] # Pearson
    pears_corr = pearsonr(pred_prob, true_label)[0]  # Pearson
    spear_corr = spearmanr(pred_prob, true_label)[0]  # Spearman
    return kldiv_mean, pears_corr, spear_corr
Пример #4
0
def do_eval(sess, error, placeholder, dev, devtest, curr_best, FLAGS,
            error_file_name, rel2idx, word2idx):
    feed_dict_dev = feeder.fill_feed_dict(dev, placeholder, rel2idx, 0)
    true_label = feed_dict_dev[placeholder['label_placeholder']]
    pred_error = sess.run(error, feed_dict=feed_dict_dev)

    print('Dev Stats:', end='')
    print('AUC', calc_auc(pred_error, true_label))
    # print('average precision')
    # return average_precision_score(true_label, -pred_error)

    thresh, _ = best_f1_threshold(pred_error, true_label)
    # thresh, _ = best_accu_threshold(pred_error, true_label)

    # evaluat devtest
    feed_dict_devtest = feeder.fill_feed_dict(devtest, placeholder, rel2idx, 0)
    true_label_devtest = feed_dict_devtest[placeholder['label_placeholder']]
    devtest_he_error = sess.run(error, feed_dict=feed_dict_devtest)
    print('Dev Test AUC', calc_auc(devtest_he_error, true_label_devtest))
    # f1 score calculation
    tp, tn, fp, fn = 0, 0, 0, 0
    for n in range(len(devtest_he_error)):
        if devtest_he_error[n] <= thresh and true_label_devtest[n] == 1:
            tp += 1
        if devtest_he_error[n] <= thresh and true_label_devtest[n] == 0:
            fp += 1
        if devtest_he_error[n] > thresh and true_label_devtest[n] == 1:
            fn += 1
        if devtest_he_error[n] > thresh and true_label_devtest[n] == 0:
            tn += 1
    if (tp + fp) > 0:
        precision = tp / (tp + fp)
    else:
        precision = 0
    if (tp + fn) > 0:
        recall = tp / (tp + fn)
    else:
        recall = 0
    print('precision, recall', precision, recall)
    if precision + recall > 0:
        f1 = (2 * precision * recall) / (precision + recall)
    else:
        f1 = 0

    # accuracy calculation
    pred = devtest_he_error <= thresh
    correct = (pred == true_label_devtest)
    accuracy = float(correct.astype('float32').mean())
    wrong_indices = np.logical_not(correct).nonzero()[0]
    wrong_preds = pred[wrong_indices]

    if FLAGS.error_analysis:
        error_file = open(error_file_name + "_test.txt", 'wt')
        print('error analysis')
        err_analysis(dev, wrong_indices, feed_dict_devtest, placeholder, error_file, rel2idx, word2idx,
                     devtest_he_error)

    if accuracy > curr_best:
        # #evaluat devtest
        error_file = open(error_file_name + "_test.txt", 'wt')
        if FLAGS.rel_acc:
            rel_acc_checker(feed_dict_devtest, placeholder, correct, dev, error_file, rel2idx)

        if FLAGS.error_analysis:
            print('error analysis')
            err_analysis(dev, wrong_indices, feed_dict_devtest, placeholder, error_file, rel2idx, word2idx,
                         devtest_he_error)

    return f1
Пример #5
0
def run_training():
    exp_name = 'time' + str(datetime.now()) + 'train_file' + str(FLAGS.train_file) + 'freeze_grad' + str(
        FLAGS.freeze_grad) + 'neg' + str(FLAGS.neg) + 'model' + str(FLAGS.model) + '_measure' + str(FLAGS.measure) + \
               '_w1' + str(FLAGS.w1) + '_w2' + str(FLAGS.w2) + '_learning_rate' + str(
        FLAGS.learning_rate) + '_batchsize' + str(FLAGS.batch_size) + '_dim' + str(FLAGS.embed_dim) + \
               '_cube_eps' + str(FLAGS.cube_eps) + '_steps' + str(FLAGS.max_steps) + '_softfreeze' + str(
        FLAGS.softfreeze) + '_r1' + str(FLAGS.r1) + '_paireval' + str(FLAGS.pair_eval)
    print('experiment file name', exp_name)
    error_file_name = FLAGS.error_file + exp_name + '.txt'
    save_model_name = FLAGS.params_file + exp_name + '.pkl'
    log_folder = FLAGS.log_file + exp_name + '/'

    # define evalution number list
    train_acc_list, dev2_acc_list = [], []
    curr_best = 0.0

    # read data set is a one time thing, so even it takes a little bit longer, it's fine.
    data_sets = data_loader.read_data_sets(FLAGS)
    if FLAGS.overfit:
        train_data = data_sets.dev
        train_test_data = data_sets.dev
    else:
        train_data = data_sets.train
        train_test_data = data_sets.train_test

    with tf.Graph().as_default():
        print('Build Model...')
        placeholder = feeder.define_placeholder()
        if FLAGS.model == 'transe':
            model = transe_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'cube' and FLAGS.rel_size > 1:
            model = multi_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'cube' and FLAGS.rel_size == 1:
            model = tf_model(data_sets, placeholder, FLAGS)
        else:
            raise ValueError('no valid model combination, transe or cube')
        eval_neg_prob = model.eval_prob
        print('Build Training Function...')
        train_op = model.training(model.loss, FLAGS.epsilon,
                                  FLAGS.learning_rate)

        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)
        if not os.path.exists(log_folder):
            os.makedirs(log_folder)
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(log_folder, graph=sess.graph)

        for step in range(FLAGS.max_steps):
            start_time = time.time()
            train_feed_dict = feeder.fill_feed_dict(train_data, placeholder,
                                                    data_sets.rel2idx,
                                                    FLAGS.batch_size)
            _, loss_value, summary = sess.run(
                [train_op, model.loss, summary_op], feed_dict=train_feed_dict)

            summary_writer.add_summary(summary, step)
            duration = time.time() - start_time

            if (step % (FLAGS.print_every) == 0):
                print('=' * 100)
                print('Epoch %d: kb_loss = %.5f (%.3f sec)' %
                      (train_data._epochs_completed, loss_value, duration))
                print('Training Stats:', end='')
                train_acc = evaluater.single_eval(sess, eval_neg_prob,
                                                  placeholder, train_test_data,
                                                  data_sets.rel2idx, FLAGS,
                                                  error_file_name)
                train_acc_list.append(train_acc)

                dev2_acc = evaluater.do_eval(sess, eval_neg_prob, placeholder,
                                             data_sets.dev, data_sets.devtest,
                                             curr_best, FLAGS, error_file_name,
                                             data_sets.rel2idx,
                                             data_sets.word2idx)
                dev2_acc_list.append(dev2_acc)
                print("Accuracy for Devtest: %.5f" % dev2_acc)

                if dev2_acc >= curr_best:
                    curr_best = dev2_acc
                    if FLAGS.save:
                        save_model(save_model_name, sess, model)
                print('current best accurancy', curr_best)

        print('Average of Top 10 Training Score',
              np.mean(sorted(train_acc_list, reverse=True)[:10]))
        opt_idx = np.argmax(np.asarray(dev2_acc_list))
        print('Epoch', opt_idx)
        print('Best Dev2 Score: %.5f' % dev2_acc_list[opt_idx])
Пример #6
0
def single_eval(sess, error, placeholder, data_set, rel2idx, FLAGS, error_file_name):
  feed_dict = feeder.fill_feed_dict(data_set, placeholder, rel2idx, 0)
  true_label = feed_dict[placeholder['label_placeholder']]
  pred_error = sess.run(error, feed_dict = feed_dict)
  _, acc = best_threshold(pred_error, true_label)
  return acc
Пример #7
0
def run_training():
    # exp_name = 'time' + str(datetime.now()) + 'train_file' + str(FLAGS.train_file) + 'freeze_grad' + str(
    #     FLAGS.freeze_grad) + 'neg' + str(FLAGS.neg) + 'model' + str(FLAGS.model) + '_measure' + str(FLAGS.measure) + \
    #            '_w1' + str(FLAGS.w1) + '_w2' + str(FLAGS.w2) + '_learning_rate' + str(
    #     FLAGS.learning_rate) + '_batchsize' + str(FLAGS.batch_size) + '_dim' + str(FLAGS.embed_dim) + \
    #            '_cube_eps' + str(FLAGS.cube_eps) + '_steps' + str(FLAGS.max_steps) + '_softfreeze' + str(
    #     FLAGS.softfreeze) + '_r1' + str(FLAGS.r1) + '_paireval' + str(FLAGS.pair_eval)

    exp_name = 'time' + str(datetime.now()) + '_EXP' + str(FLAGS.train_dir) + \
    '_w1' + str(FLAGS.w1) + '_w2' + str(FLAGS.w2) + '_r1' + str(FLAGS.r1) + \
    '_dim' + str(FLAGS.embed_dim) + '_lr' + str(FLAGS.learning_rate)

    exp_name = exp_name.replace(":", "-")
    exp_name = exp_name.replace("/", "-")
    print('experiment file name:-', exp_name)
    error_file_name = FLAGS.error_file + exp_name + '.txt'
    save_model_name = FLAGS.params_file + exp_name.split("_EXP.-")[1] + '.pkl'
    log_folder = FLAGS.log_file + exp_name + '/'

    loss_file = log_folder + 'losses.txt'
    eval_file = log_folder + 'evals.txt'
    dev_res = log_folder + 'dev_results.txt'
    viz_dict_file = log_folder + 'viz_dict.npy'
    viz_dict = {}  # key: epoch_item1_item2, val: conditional prop

    if FLAGS.init_embedding == "pre_train":
        loss_file = log_folder + 'pre_train_losses.txt'
        eval_file = log_folder + 'pre_train_evals.txt'
        dev_res = log_folder + 'pre_train_dev_results.txt'
        if not FLAGS.init_embedding_file:
            FLAGS.init_embedding_file = save_model_name

    curr_best = np.inf  # maximum value for kl-divergence

    # read data set is a one time thing, so even it takes a little bit longer, it's fine.
    data_sets = data_loader.read_data_sets(FLAGS)
    if FLAGS.overfit:
        train_data = data_sets.dev
        train_test_data = data_sets.dev
    else:
        train_data = data_sets.train
        train_test_data = data_sets.train_test

    with tf.Graph().as_default():
        print('Build Model...')
        placeholder = feeder.define_placeholder()
        if FLAGS.model == 'transe':
            model = transe_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'oe':
            print('OE')
            model = oe_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'cube' and FLAGS.rel_size > 1:
            model = multi_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'cube' and FLAGS.rel_size == 1:
            print('hard box')
            model = tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'softbox':
            print('softbox')
            model = soft_model.tf_model(data_sets, placeholder, FLAGS)
        else:
            raise ValueError('no valid model combination, transe or cube')
        eval_neg_prob = model.eval_prob
        print('Build Training Function...')
        train_op = model.training(model.loss, FLAGS.epsilon,
                                  FLAGS.learning_rate)

        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)
        # gradient plot
        # grad_norm_list = []
        # plt.ion()

        i = 0  #variable to track performance on dev set and stop if no perf gain.
        log_folder = log_folder.replace(":", "-")[:150]
        if not os.path.exists(log_folder):
            os.makedirs(log_folder)
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(log_folder, graph=sess.graph)
        if FLAGS.marginal_method == 'softplus' or FLAGS.model == 'box':
            sess.run([model.project_op])

        for step in range(FLAGS.max_steps):
            start_time = time.time()
            train_feed_dict = feeder.fill_feed_dict(train_data, placeholder,
                                                    data_sets.rel2idx,
                                                    FLAGS.batch_size)
            if FLAGS.marginal_method == 'softplus' or FLAGS.model == 'box':
                sess.run([model.project_op], feed_dict=train_feed_dict)
            _, cond_loss, marg_loss, reg_loss, loss_value, temperature, summary = sess.run(
                [
                    train_op, model.cond_loss, model.marg_loss,
                    model.regularization, model.loss, model.temperature,
                    summary_op
                ],
                feed_dict=train_feed_dict)

            # grad_norm_list.append(grad_norm)
            # moving_average = np.convolve(grad_norm_list, np.ones((50,))/50, mode='valid')
            # if step %5:
            #     plt.figure(1)
            #     plt.plot(moving_average)
            #     plt.draw()
            #     plt.pause(0.0001)
            #     plt.clf()
            # min_embed, delta_embed = sess.run([model.min_embed, model.delta_embed], feed_dict=train_feed_dict)
            debug, loss_value, summary = sess.run(
                [model.debug, model.loss, summary_op],
                feed_dict=train_feed_dict)
            summary_writer.add_summary(summary, step)
            duration = time.time() - start_time

            if (step % (FLAGS.print_every) == 0) and step > 1:
                print('=' * 100)
                print('step', step)
                print('temperature', temperature)
                if temperature > 0.0001:
                    sess.run(model.temperature_update)
                print('Epoch %d: Total_loss = %.5f (%.3f sec)' %
                      (train_data._epochs_completed, loss_value, duration))
                print(
                    'Conditional loss: %.5f, Marginal loss: %.5f , Regularization loss: %.5f'
                    % (cond_loss, marg_loss, reg_loss))
                print('w Stats:', end='')

                loss_tuple = (loss_value, cond_loss, marg_loss, reg_loss)

                # Should be calculated on the subset of training data, not the traintest!
                # train_eval is a tuple of (KL, Pearson, Spearman)
                train_eval = evaluater.kl_corr_eval(sess, eval_neg_prob,
                                                    placeholder,
                                                    train_test_data,
                                                    data_sets.rel2idx, FLAGS,
                                                    error_file_name)
                print("Train eval KL & Corr:", train_eval, end='\n')

                with open(loss_file, "a") as lfile:
                    lfile.write(str(loss_tuple)[1:-1] + '\n')

                with open(eval_file, "a") as efile:
                    efile.write(str(train_eval)[1:-1] + '\n')

                # # Over-write any saved model by the current model
                if FLAGS.save:
                    save_model(save_model_name, sess, model)

            if FLAGS.visualize:
                # Process data for visualizing confusion matrix and rectangle plots
                viz_dict = evaluater.visualization(
                    sess, model, viz_dict, train_feed_dict,
                    train_data._epochs_completed)

        # DEV SET EVAL -- eval on dev set after training is over!
        dev_err_file = 'dev_' + error_file_name
        dev_eval = evaluater.kl_corr_eval(sess, eval_neg_prob, placeholder,
                                          data_sets.dev, data_sets.rel2idx,
                                          FLAGS, dev_err_file)

        # Save the dev set results
        print("DEV data eval:", dev_eval)
        with open(dev_res, 'w') as dfile:
            dfile.write(str(dev_eval)[1:-1])

        if FLAGS.visualize:
            print("Saved viz dict to file:", viz_dict_file)
            np.save(viz_dict_file, viz_dict)
            np.save(log_folder + "word2idx.npy", data_sets.word2idx)
            np.save(log_folder + "idx2word.npy", data_sets.idx2word)
Пример #8
0
def run_training():
    exp_name = 'time' + str(datetime.now()) + 'train_file' + str(FLAGS.train_file) + 'freeze_grad' + str(
        FLAGS.freeze_grad) + 'neg' + str(FLAGS.neg) + 'model' + str(FLAGS.model) + '_measure' + str(FLAGS.measure) + \
               '_w1' + str(FLAGS.w1) + '_w2' + str(FLAGS.w2) + '_learning_rate' + str(
        FLAGS.learning_rate) + '_batchsize' + str(FLAGS.batch_size) + '_dim' + str(FLAGS.embed_dim) + \
               '_cube_eps' + str(FLAGS.cube_eps) + '_steps' + str(FLAGS.max_steps) + '_softfreeze' + str(
        FLAGS.softfreeze) + '_r1' + str(FLAGS.r1) + '_paireval' + str(FLAGS.pair_eval)
    print('experiment file name', exp_name)
    error_file_name = FLAGS.error_file + exp_name + '.txt'
    save_model_name = FLAGS.params_file + exp_name + '.pkl'
    log_folder = FLAGS.log_file + exp_name + '/'

    # define evalution number list
    train_acc_list, dev2_acc_list = [], []
    curr_best = 0.0

    # read data set is a one time thing, so even it takes a little bit longer, it's fine.
    data_sets = data_loader.read_data_sets(FLAGS)
    if FLAGS.overfit:
        train_data = data_sets.dev
        train_test_data = data_sets.dev
    else:
        train_data = data_sets.train
        train_test_data = data_sets.train_test

    with tf.Graph().as_default():
        print('Build Model...')
        placeholder = feeder.define_placeholder()
        if FLAGS.model == 'transe':
            model = transe_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'oe':
            print('OE')
            model = oe_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'cube' and FLAGS.rel_size > 1:
            model = multi_model.tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'cube' and FLAGS.rel_size == 1:
            print('hard box')
            model = tf_model(data_sets, placeholder, FLAGS)
        elif FLAGS.model == 'softbox':
            print('softbox')
            model = soft_model.tf_model(data_sets, placeholder, FLAGS)
        else:
            raise ValueError('no valid model combination, transe or cube')
        eval_neg_prob = model.eval_prob
        print('Build Training Function...')
        train_op = model.training(model.loss, FLAGS.epsilon,
                                  FLAGS.learning_rate)

        sess = tf.Session()
        init = tf.global_variables_initializer()
        sess.run(init)
        # gradient plot
        # grad_norm_list = []
        # plt.ion()

        i = 0
        if not os.path.exists(log_folder):
            os.makedirs(log_folder)
        summary_op = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(log_folder, graph=sess.graph)
        if FLAGS.marginal_method == 'softplus' or FLAGS.model == 'box':
            sess.run([model.project_op])
        for step in range(FLAGS.max_steps):
            start_time = time.time()
            train_feed_dict = feeder.fill_feed_dict(train_data, placeholder,
                                                    data_sets.rel2idx,
                                                    FLAGS.batch_size)
            if FLAGS.marginal_method == 'softplus' or FLAGS.model == 'box':
                sess.run([model.project_op], feed_dict=train_feed_dict)
            _, cond_loss, marg_loss, reg_loss, loss_value, temperature, summary = sess.run(
                [
                    train_op, model.cond_loss, model.marg_loss,
                    model.regularization, model.loss, model.temperature,
                    summary_op
                ],
                feed_dict=train_feed_dict)
            # grad_norm_list.append(grad_norm)
            # moving_average = np.convolve(grad_norm_list, np.ones((50,))/50, mode='valid')
            # if step %5:
            #     plt.figure(1)
            #     plt.plot(moving_average)
            #     plt.draw()
            #     plt.pause(0.0001)
            #     plt.clf()
            # min_embed, delta_embed = sess.run([model.min_embed, model.delta_embed], feed_dict=train_feed_dict)
            debug, loss_value, summary = sess.run(
                [model.debug, model.loss, summary_op],
                feed_dict=train_feed_dict)
            summary_writer.add_summary(summary, step)
            duration = time.time() - start_time
            if (step % (FLAGS.print_every) == 0) and step > 1:
                print('=' * 100)
                print('step', step)
                print('temperature', temperature)
                if temperature > 0.0001:
                    sess.run(model.temperature_update)
                print('Epoch %d: kb_loss = %.5f (%.3f sec)' %
                      (train_data._epochs_completed, loss_value, duration))
                print(
                    'Conditional loss: %.5f, Marginal loss: %.5f , Regularization loss: %.5f'
                    % (cond_loss, marg_loss, reg_loss))
                print('Training Stats:', end='')
                train_acc = evaluater.accuracy_eval(sess, eval_neg_prob,
                                                    placeholder,
                                                    train_test_data,
                                                    data_sets.rel2idx, FLAGS,
                                                    error_file_name)
                train_acc_list.append(train_acc)

                dev2_acc = evaluater.do_eval(sess, eval_neg_prob, placeholder,
                                             data_sets.dev, data_sets.devtest,
                                             curr_best, FLAGS, error_file_name,
                                             data_sets.rel2idx,
                                             data_sets.word2idx)
                dev2_acc_list.append(dev2_acc)
                print("Accuracy for Devtest: %.5f" % dev2_acc)

                print(i)
                if dev2_acc >= curr_best:
                    i = 0
                    curr_best = dev2_acc
                    if FLAGS.save:
                        save_model(save_model_name, sess, model)
                # elif dev2_acc < curr_best and i<50:
                #     i+=1
                # elif i>=50:
                # sys.exit()
                print('current best accurancy: %.5f' % curr_best)
                print('Average of dev2 score %.5f' %
                      (np.mean(sorted(dev2_acc_list, reverse=True)[:10])))

        print('Average of Top 10 Training Score',
              np.mean(sorted(train_acc_list, reverse=True)[:10]))
        opt_idx = np.argmax(np.asarray(dev2_acc_list))
        print('Epoch', opt_idx)
        # print('Best Dev2 Score: %.5f' %dev2_acc_list[opt_idx])
        print('Average of dev2 score %.5f' %
              (np.mean(sorted(dev2_acc_list, reverse=True)[:10])))