Пример #1
0
def evaluate(sess, model, name, data, id_to_tag, logger):
    """
    evaluate F1 based on dev dataset and test dataset
    """
    logger.info("evaluate:{}".format(name))  # name: dev/test
    ner_results = model.evaluate(
        sess, data, id_to_tag)  # ner_results dimension: eval/test样本数量*每句实际长度
    # 每个节点是字符串: 字符 正确标签 预测标签
    eval_lines = test_ner(ner_results, FLAGS.result_path)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])

    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
    elif name == "test":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
Пример #2
0
def evaluate(sess, model, name, data, id_to_tag, logger):
    """
    evaluate the model.
    :param sess: tf.Session()
    :param model: model
    :param name: "dev" or "test"
    :param data: dev data or test data
    :param id_to_tag:
    :param logger:
    :return:
    """
    logger.info("evaluate:{}".format(name))
    ner_results = model.evaluate(sess, data, id_to_tag)  # get the result
    eval_lines = test_ner(ner_results, FLAGS.result_path)  # run the conlleval
    for line in eval_lines:
        logger.info(line)
    # get the F1 value
    f1 = float(eval_lines[1].strip().split()[-1])
    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1, f1
    elif name == "test":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1, f1
Пример #3
0
 def eval(self, sess, mode, data_manager):
     logger.info("=> Evaluate mode: {}".format(mode))
     # evaluate result
     ner_res = self.model.evaluate(sess, data_manager, self.id_2_tag)
     report = utils.test_ner(ner_res, FLAGS.result_path)
     for line in report:
         logger.info(line)
     # score: find best score model on dev set!!!
     f1 = float(report[1].strip().split()[-1])
     if mode == "dev":
         best_test_f1 = self.model.best_dev_f1.eval()
         if f1 > best_test_f1:
             tf.assign(self.model.best_dev_f1, f1).eval()
             logger.info(">>> new best dev f1 score:{:>.3f}".format(f1))
             self.model.save_model(sess,
                                   FLAGS.ckpt_path,
                                   name="best_score.ckpt")
             logger.info(">>> best model saved. ")
         return f1 > best_test_f1
     elif mode == "test":
         best_test_f1 = self.model.best_test_f1.eval()
         if f1 > best_test_f1:
             tf.assign(self.model.best_test_f1, f1).eval()
             logger.info(">>> !!! Test f1 score:{:>.3f}".format(f1))
         return f1 > best_test_f1
Пример #4
0
def CRF_eval(data, test_index, y_pred, path, self_eval):
    test_char = [data[i] for i in test_index]
    if self_eval:
        datawpred = [[[data[0], data[-1]] + [pred]
                      for data, pred in zip(test_char[j], y_pred[j])]
                     for j in range(len(y_pred))]
    else:
        datawpred = [[[data, pred]
                      for data, pred in zip(test_char[j], y_pred[j])]
                     for j in range(len(y_pred))]
    with open(path + "pred{}.conll".format(self_eval != True),
              'w',
              encoding='utf-8') as f:
        write_conll(f, input_data_transform(datawpred))
    if self_eval:
        test_ner(path)
def evaluate(sess, model, name, data, id_tag):
    ner_results = model.evaluate(sess, data,
                                 id_tag)  #这一批验证数据的句子和每个字真实的tag和预测的tag
    eval_lines = test_ner(
        ner_results,
        FLAGS.result_path)  #将验证结果写入文件中(ner_results),然后计算得到F1等批判模型好坏的指标
    f1 = float(eval_lines[1].strip().split()[-1])  #截取出f1指标
    return f1
Пример #6
0
def evaluate(sess, model, name, data_manager, id_to_tag, logger, config):
    logger.info("evaluate:{}".format(name))
    ner_results = evaluate_(sess, model, data_manager, id_to_tag, config)
    eval_lines = test_ner(ner_results, config.result_path)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])

    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
Пример #7
0
def eval_model(id_to_char, id_to_tag, test_manager, device, model_name=None):
    print("Eval ......")
    if not model_name:
        model_name = args.log_name
    old_weights = np.random.rand(len(id_to_char), args.word_embed_dim)
    pre_word_embed = load_word2vec("100.utf8", id_to_char, args.word_embed_dim,
                                   old_weights)
    e_model = Model(args, id_to_tag, device, pre_word_embed).to(device)
    e_model.load_state_dict(torch.load("./models/" + model_name + ".pkl"))
    print("model loaded ...")

    e_model.eval()
    all_results = []
    for batch in test_manager.iter_batch():

        strs, lens, chars, segs, subtypes, tags, adj, dep = batch
        chars = torch.LongTensor(chars).to(device)
        _lens = torch.LongTensor(lens).to(device)
        subtypes = torch.LongTensor(subtypes).to(device)
        tags = torch.LongTensor(tags).to(device)
        adj = torch.FloatTensor(adj).to(device)
        dep = torch.LongTensor(dep).to(device)
        logits, _ = e_model(chars, _lens, subtypes, adj, dep)
        """ Evaluate """
        # Decode
        batch_paths = []
        for index in range(len(logits)):
            length = lens[index]
            score = logits[index][:length]  # [seq, dim]
            probs = F.softmax(score, dim=-1)  # [seq, dim]
            path = torch.argmax(probs, dim=-1)  # [seq]
            batch_paths.append(path)

        for i in range(len(strs)):
            result = []
            string = strs[i][:lens[i]]
            gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lens[i]]])
            pred = iobes_iob(
                [id_to_tag[int(x)] for x in batch_paths[i][:lens[i]]])
            for char, gold, pred in zip(string, gold, pred):
                result.append(" ".join([char, gold, pred]))
            all_results.append(result)

    all_eval_lines = test_ner(all_results, args.result_path, args.log_name)
    res_info = all_eval_lines[1].strip()
    f1 = float(res_info.split()[-1])
    print("eval: f1: {}".format(f1))
    return f1, res_info
Пример #8
0
def evaluate_ner(sess, model, name, data, id_to_tag_ner, logger):
    logger.info("evaluate_ner:{}".format(name))
    ner_results_ner = model.evaluate_ner(sess, data, id_to_tag_ner)
    eval_lines = test_ner(ner_results_ner, FLAGS.result_path)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])

    if name == "dev_ner":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev_ner f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
    elif name == "test_ner":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test_ner f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
Пример #9
0
def evaluate(sess, model, name, data, id_to_tag, logger):
    logger.info("evaluate:{}".format(name))
    ner_results = model.evaluate(sess, data, id_to_tag)
    eval_lines = test_ner(ner_results, FLAGS.result_path)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])

    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
    elif name == "test":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
Пример #10
0
def evaluate(sess, model, name, data, id_to_tag, logger):
    logger.info("evaluate:{}".format(name))
    #ner_result[0]: ['14313 B-c B-c', '10243 I-c I-c', '19167 I-c I-c', '19936 O O', '15274 O O']
    ner_results = model.evaluate(sess, data, id_to_tag)
    eval_lines = test_ner(ner_results, FLAGS.result_path)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])

    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
    elif name == "test":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
Пример #11
0
def dev_epoch(epoch, model, dev_manager, id_to_tag, device):
    # dev
    model.eval()
    all_results = []
    for batch in dev_manager.iter_batch():

        strs, lens, chars, segs, subtypes, tags, adj, dep = batch
        chars = torch.LongTensor(chars).to(device)
        _lens = torch.LongTensor(lens).to(device)
        subtypes = torch.LongTensor(subtypes).to(device)
        tags = torch.LongTensor(tags).to(device)
        adj = torch.FloatTensor(adj).to(device)
        dep = torch.LongTensor(dep).to(device)
        logits, _ = model(chars, _lens, subtypes, adj,
                          dep)  # [batch, seq, dim]
        """ Evaluate """
        # Decode
        batch_paths = []
        for index in range(len(logits)):
            length = lens[index]
            score = logits[index][:length]  # [seq, dim]
            probs = F.softmax(score, dim=-1)  # [seq, dim]
            path = torch.argmax(probs, dim=-1)  # [seq]
            batch_paths.append(path)

        for i in range(len(strs)):
            result = []
            string = strs[i][:lens[i]]
            gold = iobes_iob([id_to_tag[int(x)] for x in tags[i][:lens[i]]])
            pred = iobes_iob(
                [id_to_tag[int(x)] for x in batch_paths[i][:lens[i]]])
            for char, gold, pred in zip(string, gold, pred):
                result.append(" ".join([char, gold, pred]))
            all_results.append(result)

    all_eval_lines = test_ner(all_results, args.result_path, args.log_name)
    log_handler.info("epoch: {}, info: {}".format(epoch + 1,
                                                  all_eval_lines[1].strip()))
    f1 = float(all_eval_lines[1].strip().split()[-1])
    return f1, model
def evaluate(sess, model, name, data, id_to_tag, logger,iter_times):
    logger.info("evaluate:{}".format(name))
    ner_results = model.evaluate(sess, data, id_to_tag)
    eval_lines = test_ner(ner_results, FLAGS.result_path)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])

    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        elif iter_times==100:
            logger.info("训练完成,最佳的F值为:{:>.3f}".format(best_test_f1))
        return f1 > best_test_f1
    elif name == "test":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test f1 score:{:>.3f}".format(f1))
        return f1 > best_test_f1
Пример #13
0
def evaluate(sess, model, name, data, id_to_tag, logger):
    # 拿到对应的一个批次测试结果集
    ner_results = model.evaluate(sess, data, id_to_tag)
    # 预测结果保存到结果集
    eval_lines = test_ner(ner_results, FLAGS.result_path)
    # 这里是打印报告结果
    for line in eval_lines:
        logger.info(line)
    # 这里就拿到F1值
    f1 = float(eval_lines[1].strip().split()[-1])
    # 这里返回最佳的F1值
    if name == "dev":
        best_test_f1 = model.best_dev_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_dev_f1, f1).eval()
            logger.info("new best dev f1 score:{:>.3f}".format(f1))
        return f1
    elif name == "test":
        best_test_f1 = model.best_test_f1.eval()
        if f1 > best_test_f1:
            tf.assign(model.best_test_f1, f1).eval()
            logger.info("new best test f1 score:{:>.3f}".format(f1))
        return f1
Пример #14
0
        def eval_step():
            global max_a
            ner_results = []
            correct = 0
            total = 0
            with torch.no_grad():
                for x, y, seq_length in zip(VX, VY, seq_lengths_V):
                    x = torch.tensor(x)
                    y = torch.tensor(y)

                    # model.hidden = model.init_hidden()
                    _, predicted = model(x, seq_length)

                    for bt in range(MB_SIZE):
                        bt_list = predicted[bt].tolist()
                        lenth = len(bt_list) - torch.t(
                            y)[bt].numpy().tolist().count(0)
                        total += lenth
                        correct += (torch.tensor(
                            bt_list[0:lenth]) == torch.t(y)[bt][0:lenth]
                                    ).sum().item()

                        block = []
                        for c in range(lenth):
                            block.append(id2char[torch.t(x)[bt][c].tolist()] +
                                         ' ' + id2tag[bt_list[c]] + ' ' +
                                         id2tag[torch.t(y)[bt][c].tolist()])
                        ner_results.append(block)
            eval_lines = test_ner(ner_results, '')
            a = float(eval_lines[1].strip().split()[-1])

            # a = 100.0 * correct / total
            print 'acc = %.2f' % a
            if a > max_a:
                # torch.save(model, 'model' + '_%.2f' % a + '.pkl')
                max_a = a
def main():
    # load data sets
    global args
    args = parser.parse_args()
    pp.pprint(vars(args))
    # running_name = 'X'
    use_cuda = cuda_model.ifUseCuda(args.gpu_id, args.multiGpu)
    # use_cuda = False

    # train_file = 'data/example.train'
    # dev_file = 'data/example.dev'
    test_file = 'data/example.test'
    # embedding_file = 'data/vec.txt'
    map_file = 'map.pkl'
    # config_file = 'config_file_pytorch'
    tag_file = 'tag.pkl'
    # embedding_easy_file = 'data/easy_embedding.npy'
    # train_sentences = load_sentences(train_file)
    # dev_sentences = load_sentences(dev_file)
    test_sentences = load_sentences(test_file)
    # train_sentences = dev_sentences
    # update_tag_scheme(train_sentences, args.tag_schema)
    update_tag_scheme(test_sentences, args.tag_schema)
    # update_tag_scheme(dev_sentences, args.tag_schema)

    if not os.path.isfile(tag_file):
        print("Tag file {:s} Not found".format(tag_file))
        sys.exit(-1)
    else:
        with open(tag_file, 'rb') as t:
            tag_to_id, id_to_tag = pickle.load(t)

    if not os.path.isfile(map_file):
        print("Map file {:s} Not found".format(map_file))
        # create dictionary for word
        # dico_chars_train = char_mapping(train_sentences)[0]
        # dico_chars, char_to_id, id_to_char = augment_with_pretrained(
        #     dico_chars_train.copy(),
        #     embedding_file,
        #     list(itertools.chain.from_iterable(
        #         [[w[0] for w in s] for s in test_sentences])
        #     )
        # )
        # # _, tag_to_id, id_to_tag = tag_mapping(train_sentences)
        #
        # with open(map_file, "wb") as f:
        #     pickle.dump([char_to_id, id_to_char], f)
    else:
        with open(map_file, "rb") as f:
            char_to_id, id_to_char = pickle.load(f)

    test_data = prepare_dataset(test_sentences, char_to_id, tag_to_id)

    print("{:d} sentences in  test.".format(len(test_data)))

    test_manager = BatchManager(test_data, 1)

    save_places = dir_utils.save_places(args.eval)

    # log_path = os.path.join("log", FLAGS.log_file)
    logger = get_logger(
        os.path.join(save_places.log_save_dir,
                     'evaluation-{:d}.txt'.format(args.fileid)))
    config = config_model(char_to_id, tag_to_id, args)
    print_config(config, logger)

    logger.info("start training")

    #Update: create model and embedding!
    model = NERModel.CNERPointer(char_dim=args.char_dim,
                                 seg_dim=args.seg_dim,
                                 hidden_dim=args.hidden_dim,
                                 max_length=15,
                                 output_classes=4,
                                 dropout=args.dropout,
                                 embedding_path=None,
                                 id_to_word=id_to_char,
                                 easy_load=None)
    print("Number of Params\t{:d}".format(
        sum([p.data.nelement() for p in model.parameters()])))

    #Update: this won't work!
    # model = cuda_model.convertModel2Cuda(model, gpu_id=args.gpu_id, multiGpu=args.multiGpu)
    if use_cuda:
        model = model.cuda()

    model.eval()
    if args.eval is not None:
        # if os.path.isfile(args.resume):
        ckpt_filename = os.path.join(
            save_places.model_save_dir,
            'checkpoint_{:04d}.pth.tar'.format(args.fileid))
        assert os.path.isfile(
            ckpt_filename), 'Error: no checkpoint directory found!'

        checkpoint = torch.load(ckpt_filename,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'], strict=True)
        train_iou = checkpoint['IoU']
        print("=> loading checkpoint '{}', current iou: {:.04f}".format(
            ckpt_filename, train_iou))

    ner_results = evaluate(model, test_manager, id_to_tag, use_cuda, max_len=5)
    eval_lines = test_ner(ner_results, save_places.summary_save_dir)
    for line in eval_lines:
        logger.info(line)
    f1 = float(eval_lines[1].strip().split()[-1])
    return f1
Пример #16
0
def main(_):
    if not os.path.isdir(FLAGS.log_path):
        os.makedirs(FLAGS.log_path)
    if not os.path.isdir(FLAGS.model_path):
        os.makedirs(FLAGS.model_path)
    if not os.path.isdir(FLAGS.result_path):
        os.makedirs(FLAGS.result_path)
    tag_to_id = {
        "O": 0,
        "B-LOC": 1,
        "I-LOC": 2,
        "B-PER": 3,
        "I-PER": 4,
        "B-ORG": 5,
        "I-ORG": 6
    }
    # load data
    id_to_word, id_to_tag, train_data, dev_data, test_data = load_data(
        FLAGS, tag_to_id)
    train_manager = BatchManager(train_data, len(id_to_tag),
                                 FLAGS.word_max_len, FLAGS.batch_size)
    dev_manager = BatchManager(dev_data, len(id_to_tag), FLAGS.word_max_len,
                               FLAGS.valid_batch_size)
    test_manager = BatchManager(test_data, len(id_to_tag), FLAGS.word_max_len,
                                FLAGS.valid_batch_size)
    with tf.Session() as sess:
        model = create_model(sess, id_to_word, id_to_tag)
        loss = 0
        best_test_f1 = 0
        steps_per_epoch = len(train_data) // FLAGS.batch_size + 1
        for _ in range(FLAGS.max_epoch):
            iteration = (model.global_step.eval()) // steps_per_epoch + 1
            train_manager.shuffle()
            for batch in train_manager.iter_batch():
                global_step = model.global_step.eval()
                step = global_step % steps_per_epoch
                batch_loss = model.run_step(sess, True, batch)
                loss += batch_loss / FLAGS.steps_per_checkpoint
                if global_step % FLAGS.steps_per_checkpoint == 0:
                    model.logger.info(
                        "iteration:{} step:{}/{}, NER loss:{:>9.6f}".format(
                            iteration, step, steps_per_epoch, loss))
                    loss = 0

            model.logger.info("validating ner")
            ner_results = model.predict(sess, dev_manager)
            eval_lines = test_ner(ner_results, FLAGS.result_path)
            for line in eval_lines:
                model.logger.info(line)
            test_f1 = float(eval_lines[1].strip().split()[-1])
            if test_f1 > best_test_f1:
                best_test_f1 = test_f1
                model.logger.info("new best f1 score:{:>.3f}".format(test_f1))
                model.logger.info("saving model ...")
                checkpoint_path = os.path.join(FLAGS.model_path,
                                               "translate.ckpt")
                model.saver.save(sess,
                                 checkpoint_path,
                                 global_step=model.global_step)
        # test model
        model.logger.info("testing ner")
        ckpt = tf.train.get_checkpoint_state(FLAGS.model_path)
        model.logger.info("Reading model parameters from %s" %
                          ckpt.model_checkpoint_path)
        model.saver.restore(sess, ckpt.model_checkpoint_path)
        ner_results = model.predict(sess, test_manager)
        eval_lines = test_ner(ner_results, FLAGS.result_path)
        for line in eval_lines:
            model.logger.info(line)