示例#1
0
def valid_eval(data_in, task='FNER', eval_type=None, final=False):
    m1 = data_in['mention']
    l1 = data_in['left_context']
    r1 = data_in['right_context']
    lab = data_in['label']
    lf_id = pad_single(l1)
    rt_id = pad_single(r1)
    m_ = pad_single(m1)
    # m_, lf_id, rt_id = pad_method(m1, l1, r1)
    collector = []
    true = []
    eval_loss = []
    iters = 0
    p1 = 100
    total_loss = []
    iters = 0
    for k in range(0, len(m_), p1):
        s = Model.predict(lf_id[k:k + p1],
                          rt_id[k:k + p1],
                          context_data=None,
                          mention_representation_data=m_[k:k + p1],
                          feature_data=None,
                          doc_vector=None)
        loss_val = Model.error(lf_id[k:k + p1],
                               rt_id[k:k + p1],
                               lab[k:k + p1],
                               context_data=None,
                               mention_representation_data=m_[k:k + p1],
                               feature_data=None,
                               doc_vector=None)

        r = lab[k:k + p1]
        collector.append(s)
        true.append(r)
        total_loss.append(loss_val)
        iters += 1
    average_eval_loss = sum(total_loss) / iters
    print(task + " Loss: ", average_eval_loss)
    collector = np.array(collector)
    collector = np.vstack(collector)
    collector = np.squeeze(collector)
    true = np.array(true)
    true = np.vstack(true)
    print(collector.shape, true.shape)
    strict_f1 = acc_hook(collector, true)
    logging.info(str(eval_type) + " FNER loss: {}".format(average_eval_loss))
    if final:
        fname = args.dataset + "_" + args.encoder + "_" + str(
            args.feature) + "_" + str(args.hier) + "_" + str(
                args.dataset_kge) + ".txt"
        save_predictions(collector, true, dicts["id2label"], fname)
    return strict_f1
示例#2
0
def valid_eval(data_in, task, eval_type=None, final=False):
    if task == 'FNER':
        collector = []
        true = []
        iters = 0
        total_loss = []
        c_, m_, lab, f, d, s_in, m_id, l_id, r_id = data_in.next()
        lf_id = pad_single(l_id)
        rt_id = pad_single(r_id)
        rt_id = np.flip(rt_id, axis=-1)
        p1 = 100
        for k in range(0, len(c_), p1):
            s = Model.predict(lf_id[k:k + p1],
                              rt_id[k:k + p1],
                              context_data=None,
                              mention_representation_data=m_[k:k + p1],
                              feature_data=f[k:k + p1],
                              doc_vector=None)
            loss_val = Model.error(lf_id[k:k + p1],
                                   rt_id[k:k + p1],
                                   lab[k:k + p1],
                                   context_data=None,
                                   mention_representation_data=m_[k:k + p1],
                                   feature_data=f[k:k + p1],
                                   doc_vector=None)

            r = lab[k:k + p1]
            collector.append(s)
            true.append(r)
            total_loss.append(loss_val)
            iters += 1
        average_eval_loss = sum(total_loss) / iters
        print(task + " Loss: ", average_eval_loss)
        collector = np.array(collector)
        collector = np.vstack(collector)
        collector = np.squeeze(collector)
        true = np.array(true)
        true = np.vstack(true)
        print(collector.shape, true.shape)
        # print(collector)
        # print(true)
        strict_f1 = acc_hook(collector, true)
        logging.info(
            str(eval_type) + " FNER loss: {}".format(average_eval_loss))
        if final:
            fname = args.dataset + "_" + args.encoder + "_" + str(
                args.feature) + "_" + str(args.hier) + "_" + str(
                    args.dataset_kge) + ".txt"
            save_predictions(collector, true, dicts["id2label"], fname)
        return strict_f1
示例#3
0
test_batcher = Batcher(test_dataset["storage"], test_dataset["data"],
                       test_dataset["data"].shape[0], 10, dicts["id2vec"])

step_par_epoch = 2000 if args.dataset == "figer" else 150

print "start trainning"
for epoch in range(5):
    train_batcher.shuffle()
    print "epoch", epoch
    for i in range(step_par_epoch):
        context_data, mention_representation_data, target_data, feature_data = train_batcher.next(
        )
        model.train(context_data, mention_representation_data, target_data,
                    feature_data)

    print "------dev--------"
    context_data, mention_representation_data, target_data, feature_data = dev_batcher.next(
    )
    scores = model.predict(context_data, mention_representation_data,
                           feature_data)
    acc_hook(scores, target_data)

print "Training completed.  Below are the final test scores: "
print "-----test--------"
context_data, mention_representation_data, target_data, feature_data = test_batcher.next(
)
scores = model.predict(context_data, mention_representation_data, feature_data)
acc_hook(scores, target_data)

print "Cheers!"
示例#4
0
文件: train.py 项目: xiaoanshi/LME
                          vocab_size)

test_dataset = joblib.load("data/" + d + "/test_" + args.dataset + ".pkl")
test_batch_size = test_dataset["data"].shape[0]
if args.cs:
    test_batch_size = 1
print "test_size: ", test_dataset["data"].shape[0]

test_batcher = Batcher(test_dataset["storage"], test_dataset["data"],
                       test_batch_size, 10, dicts["id2vec"], vocab_size)
if args.test:
    #only works for Wiki
    model.load_all("./Models/" + d + "/lamb" + str(args.lamb) + "/model")
    batch_data = test_batcher.next()
    scores = model.predict(batch_data)
    acc_hook(scores, batch_data["Y"])
    sys.exit(0)

step_par_epoch = train_dataset["data"].shape[0] / batch_size

if args.cs:
    #only works for wiki

    id2word = lambda y: map(lambda x: dicts["id2word"][x], y
                            )  #list of id to word
    sent = []
    enti = []
    ybase = []
    yseds = []
    y_ = []
示例#5
0
            #     print str(epoch)+" "+str(i)+" label:"+str(label)+" train time: "+str((endtime-time4).total_seconds())

        endtime = datetime.datetime.now()
        print str(epoch) + " " + str(i) + " train time: " + str(
            (endtime - time3).total_seconds())
    # print "loss:"+str(loss)
    endtime = datetime.datetime.now()
    print "epoch" + str(epoch) + " train time: " + str(
        (endtime - time2).total_seconds())

    print "------dev--------"
    context_data, mention_representation_data, target_data, feature_data = dev_batcher.next(
    )
    scores = model.predict(context_data, mention_representation_data,
                           feature_data, 0)
    acc_hook(scores, target_data, args.gaussian, 0, 1, args.path,
             label_hierarchy)
    if args.gaussian:
        np.savetxt(args.resultpath + "/scores_epoch" + str(epoch),
                   scores,
                   fmt='%f')
        scores = np.sort(a=scores, axis=1)
        np.savetxt(args.resultpath + "/sorted_scores_epoch" + str(epoch),
                   scores,
                   fmt='%f')
    print "-----test--------"
    context_data, mention_representation_data, target_data, feature_data = test_batcher.next(
    )
    scores = model.predict(context_data, mention_representation_data,
                           feature_data, args.gaussian)
    acc_hook(scores, target_data, args.gaussian, 0, 1, args.path,
             label_hierarchy)