示例#1
0
def conlleval(label_predict, label_path, metric_path):
    """
    :param label_predict:
    :param label_path:
    :param metric_path:
    :return:
    """
    gold_all = []
    pred_all = []

    for sent_result in label_predict:
        gold_one = []
        pred_one = []
        for char, tag, tag_ in sent_result:
            if tag != 'O':
                t, head = tag.split('-', 1)
                if head == 'M':
                    head = 'I'
                tag = head + '-' + t
            gold_one.append(tag)

            if tag_ == 0:
                tag_ = 'O'
            else:
                t_, head_ = tag_.split('-', 1)
                if head_ == 'M':
                    head_ = 'I'
                tag_ = head_ + '-' + t_
            pred_one.append(tag_)
        
        gold_all.append(gold_one)
        pred_all.append(pred_one)

    precision, recall, f_measure, acc = get_ner_fmeasure(gold_all, pred_all, "BIOES")

    with open(label_path, "w") as fw:
        line = []
        for sent_result in label_predict:
            for char, tag, tag_ in sent_result:
                tag = '0' if tag == 'O' else tag
                line.append("{} {} {}\n".format(char, tag, tag_))
            line.append("\n")
        fw.writelines(line)
    # eval(label_path, metric_path)
    with open(metric_path,'w') as fr:
        fr.write('token_acc:' + '\t' + str(precision))
        fr.write('\npre:' + '\t' + str(precision))
        fr.write('\nrecall:' + '\t' + str(recall))
        fr.write('\nf1:' + '\t' + str(f_measure))
    return precision, recall, f_measure
示例#2
0
def do_pass(data, token_to_id, tag_to_id, id_to_tag, expressions, train, epo):
    model, optimizer = expressions

    # Loop over batches
    loss = 0
    gold_lists, pred_lists = [], []
    for start in range(0, len(data), BATCH_SIZE):
        batch = data[start:start + BATCH_SIZE]
        batch.sort(key=lambda x: -len(x[0]))

        # Prepare inputs
        cur_batch_size = len(batch)
        max_length = len(batch[0][0])
        lengths = [len(v[0]) for v in batch]
        input_array = torch.zeros((cur_batch_size, max_length)).long()
        mask_array = torch.zeros((cur_batch_size, max_length)).byte()
        output_array = torch.zeros((cur_batch_size, max_length)).long()
        for n, (tokens, tags) in enumerate(batch):
            token_ids = [token_to_id.get(simplify_token(t), 1) for t in tokens]
            tag_ids = [tag_to_id[t] for t in tags]
            mask_ids = [1 for t in tags]
            input_array[n, :len(tokens)] = torch.LongTensor(token_ids)
            mask_array[n, :len(tokens)] = torch.LongTensor(mask_ids)
            output_array[n, :len(tags)] = torch.LongTensor(tag_ids)

        model.to(device)
        # Construct computation
        batch_loss, output = model(input_array.to(device),
                                   mask_array.to(device),
                                   output_array.to(device), lengths,
                                   cur_batch_size, epo)

        # Run computations
        if train:
            batch_loss.backward()
            optimizer.step()
            model.zero_grad()
            loss += batch_loss.item()
        predicted = output.cpu().data.numpy()

        for (_, g), a in zip(batch, predicted):
            gold_list, pred_list = [], []
            for gt, at in zip(g, a):
                at = id_to_tag[at]
                gold_list.append(gt)
                pred_list.append(at)
            gold_lists.append(gold_list)
            pred_lists.append(pred_list)
    return loss, get_ner_fmeasure(gold_lists, pred_lists)[-1]
示例#3
0
def evaluate(data, model, name):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name,", name)
    right_token = 0
    whole_token = 0
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = 1
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        gaz_list, batch_word, batch_biword, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
            instance, data.HP_gpu, True)
        tag_seq = model(gaz_list, batch_word, batch_biword, batch_wordlen,
                        batch_char, batch_charlen, batch_charrecover, mask)
        # print "tag:",tag_seq
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / (decode_time + 0.0000000001)
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    return speed, acc, p, r, f, pred_results
示例#4
0
def evaluate(data, wordseq, model, name, nbest=None):
    if name == "train":
        instances = data.train_Ids
    elif name == "dev":
        instances = data.dev_Ids
    elif name == 'test':
        instances = data.test_Ids
    elif name == 'raw':
        instances = data.raw_Ids
    else:
        print("Error: wrong evaluate name")
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    wordseq.eval()
    model.eval()
    batch_size = data.HP_batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if not instance:
            continue
        batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, _ = batchify_with_label(
            instance, data.HP_gpu, True)
        if nbest:
            hidden = wordseq.forward(batch_word, batch_features, batch_wordlen,
                                     batch_char, batch_charlen,
                                     batch_charrecover, None, None)
            scores, nbest_tag_seq = model.decode_nbest(hidden, mask, nbest)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:, :, 0]
        else:
            hidden = wordseq.forward(batch_word, batch_features, batch_wordlen,
                                     batch_char, batch_charlen,
                                     batch_charrecover, None, None)
            tag_seq = model(hidden, mask)
        # print "tag:",tag_seq
        pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                               data.label_alphabet,
                                               batch_wordrecover)
        pred_results += pred_label
        gold_results += gold_label
    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, data.tagScheme)
    if nbest:
        return speed, acc, p, r, f, nbest_pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores
def evaluate(args,
             model,
             tokenizer,
             labels,
             pad_token_label_id,
             mode,
             prefix=""):
    eval_dataset = load_and_cache_examples(args,
                                           tokenizer,
                                           labels,
                                           pad_token_label_id,
                                           mode=mode)

    args.eval_batch_size = args.per_gpu_eval_batch_size * max(1, args.n_gpu)
    # Note that DistributedSampler samples randomly
    eval_sampler = SequentialSampler(
        eval_dataset) if args.local_rank == -1 else DistributedSampler(
            eval_dataset)
    eval_dataloader = DataLoader(eval_dataset,
                                 sampler=eval_sampler,
                                 batch_size=args.eval_batch_size)

    # multi-gpu evaluate
    if args.n_gpu > 1:
        model = torch.nn.DataParallel(model)

    # Eval!
    logger.info("***** Running %s evaluation %s *****", mode, prefix)
    logger.info("  Num examples = %d", len(eval_dataset))
    logger.info("  Batch size = %d", args.eval_batch_size)
    eval_loss = 0.0
    nb_eval_steps = 0
    preds = None
    out_label_ids = None
    model.eval()

    for batch in eval_dataloader:
        batch = tuple(t.to(args.device) for t in batch)

        with torch.no_grad():
            inputs = {
                "input_ids": batch[0],
                "attention_mask": batch[1],
                "labels": batch[3]
            }
            if args.model_type != "distilbert":
                inputs["token_type_ids"] = (
                    batch[2] if args.model_type in ["bert", "xlnet"] else None
                )  # XLM and RoBERTa don"t use segment_ids

            outputs = model(**inputs)
            tmp_eval_loss, logits = outputs[:2]

            if args.n_gpu > 1:
                tmp_eval_loss = tmp_eval_loss.mean(
                )  # mean() to average on multi-gpu parallel evaluating

            eval_loss += tmp_eval_loss.item()
        nb_eval_steps += 1
        if preds is None:
            preds = logits.detach().cpu().numpy()
            out_label_ids = inputs["labels"].detach().cpu().numpy()
        else:
            preds = np.append(preds, logits.detach().cpu().numpy(), axis=0)
            out_label_ids = np.append(out_label_ids,
                                      inputs["labels"].detach().cpu().numpy(),
                                      axis=0)

    eval_loss = eval_loss / nb_eval_steps
    preds = np.argmax(preds, axis=2)

    label_map = {i: label for i, label in enumerate(labels)}

    out_label_list = [[] for _ in range(out_label_ids.shape[0])]
    preds_list = [[] for _ in range(out_label_ids.shape[0])]

    for i in range(out_label_ids.shape[0]):
        for j in range(out_label_ids.shape[1]):
            if out_label_ids[i, j] != pad_token_label_id:
                out_label_list[i].append(label_map[out_label_ids[i][j]])
                preds_list[i].append(label_map[preds[i][j]])
    acc, p, r, f = get_ner_fmeasure(out_label_list, preds_list, 'BIO')
    results = {
        "loss": eval_loss,
        "precision": p,
        "recall": r,
        "f1": f,
        "acc": acc,
    }

    logger.info("***** Eval results %s *****", prefix)
    for key in sorted(results.keys()):
        logger.info("  %s = %s", key, str(results[key]))

    return results, preds_list
示例#6
0
def evaluate(data, opt, model, name, bEval, nbest=0):
    if name == "train":
        instances = data.train_Ids
        instances_text = data.train_texts
    elif name == "dev":
        instances = data.dev_Ids
        instances_text = data.dev_texts
    elif name == 'test':
        instances = data.test_Ids
        instances_text = data.test_texts
    else:
        logging.error("wrong evaluate name, {}".format(name))
    right_token = 0
    whole_token = 0
    nbest_pred_results = []
    pred_scores = []
    pred_results = []
    gold_results = []
    ## set model in eval model
    model.eval()
    batch_size = opt.batch_size
    start_time = time.time()
    train_num = len(instances)
    total_batch = train_num // batch_size + 1
    for batch_id in range(total_batch):
        start = batch_id * batch_size
        end = (batch_id + 1) * batch_size
        if end > train_num:
            end = train_num
        instance = instances[start:end]
        if opt.elmo:
            instance_text = instances_text[start:end]
        else:
            instance_text = None
        if not instance:
            continue
        batch_word, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_features, batch_text  \
            = batchify_with_label(data, instance, instance_text, opt.gpu)
        if nbest > 0:
            scores, nbest_tag_seq = model.decode_nbest(
                batch_word, batch_wordlen, batch_char, batch_charlen,
                batch_charrecover, mask, nbest, batch_features, batch_text)
            nbest_pred_result = recover_nbest_label(nbest_tag_seq, mask,
                                                    data.label_alphabet,
                                                    batch_wordrecover)
            nbest_pred_results += nbest_pred_result
            pred_scores += scores[batch_wordrecover].cpu().data.numpy().tolist(
            )
            ## select the best sequence to evalurate
            tag_seq = nbest_tag_seq[:, :, 0]
        else:
            tag_seq = model(batch_word, batch_wordlen, batch_char,
                            batch_charlen, batch_charrecover, mask,
                            batch_features, batch_text)
        # print "tag:",tag_seq
        if bEval:
            pred_label, gold_label = recover_label(tag_seq, batch_label, mask,
                                                   data.label_alphabet,
                                                   batch_wordrecover)
            pred_results += pred_label
            gold_results += gold_label
        else:
            pred_label, _ = recover_label(tag_seq, batch_label, mask,
                                          data.label_alphabet,
                                          batch_wordrecover)
            pred_results += pred_label

    decode_time = time.time() - start_time
    speed = len(instances) / decode_time
    if bEval:
        acc, p, r, f = get_ner_fmeasure(gold_results, pred_results, opt.schema)
    else:
        acc, p, r, f = None, None, None, None
    # if nbest>0:
    #     return speed, acc, p, r, f, nbest_pred_results, pred_scores
    # return speed, acc, p, r, f, pred_results, pred_scores
    return speed, acc, p, r, f, pred_results, pred_scores