Exemplo n.º 1
0
def evaluate_model(config: Config, conf_conll, conf_ontonotes,
                   model: MT_LSTMCRF, batch_insts_ids, name: str,
                   insts: List[Instance]):
    ## evaluation
    metrics_conll = np.asarray([0, 0, 0], dtype=int)
    metrics_notes = np.asarray([0, 0, 0], dtype=int)
    batch_id = 0
    batch_size = config.batch_size
    for batch in batch_insts_ids:
        one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) *
                                batch_size]
        sorted_batch_insts = sorted(one_batch_insts,
                                    key=lambda inst: len(inst.input.words),
                                    reverse=True)
        # batch_max_scores, batch_max_ids = model.decode(batch)
        bestScores_conll, decodeIdx_conll, bestScores_notes, decodeIdx_notes, mask_conll, mask_ontonotes = model.decode(
            batch)
        metrics_conll += eval.evaluate_num(sorted_batch_insts, decodeIdx_conll,
                                           batch[6], batch[1],
                                           conf_conll.idx2labels, mask_conll)
        metrics_notes += eval.evaluate_num(sorted_batch_insts, decodeIdx_notes,
                                           batch[7], batch[1],
                                           conf_ontonotes.idx2labels,
                                           mask_ontonotes)
        batch_id += 1
    p_conll, total_predict_conll, total_entity_conll = metrics_conll[
        0], metrics_conll[1], metrics_conll[2]
    precision_conll = p_conll * 1.0 / total_predict_conll * 100 if total_predict_conll != 0 else 0
    recall_conll = p_conll * 1.0 / total_entity_conll * 100 if total_entity_conll != 0 else 0
    fscore_conll = 2.0 * precision_conll * recall_conll / (
        precision_conll +
        recall_conll) if precision_conll != 0 or recall_conll != 0 else 0
    print("[%s conll set] Precision: %.2f, Recall: %.2f, F1: %.2f" %
          (name, precision_conll, recall_conll, fscore_conll),
          flush=True)

    p_notes, total_predict_notes, total_entity_notes = metrics_notes[
        0], metrics_notes[1], metrics_notes[2]
    precision_notes = p_notes * 1.0 / total_predict_notes * 100 if total_predict_notes != 0 else 0
    recall_notes = p_notes * 1.0 / total_entity_notes * 100 if total_entity_notes != 0 else 0
    fscore_notes = 2.0 * precision_notes * recall_notes / (
        precision_notes +
        recall_notes) if precision_notes != 0 or recall_notes != 0 else 0
    print("[%s notes set] Precision: %.2f, Recall: %.2f, F1: %.2f" %
          (name, precision_notes, recall_notes, fscore_notes),
          flush=True)

    # return [precision_notes, recall_notes, fscore_notes]
    return [
        precision_conll, recall_conll, fscore_conll, precision_notes,
        recall_notes, fscore_notes
    ]
Exemplo n.º 2
0
def evaluate_model(config: Config, model: NNCRF, batch_insts_ids, name: str,
                   insts: List[Instance]):
    ## evaluation
    metrics = np.asarray([0, 0, 0], dtype=int)
    batch_id = 0
    batch_size = config.batch_size
    for batch in batch_insts_ids:
        one_batch_insts = insts[batch_id * batch_size:(batch_id + 1) *
                                batch_size]
        sorted_batch_insts = sorted(one_batch_insts,
                                    key=lambda inst: len(inst.input.words),
                                    reverse=True)
        batch_max_scores, batch_max_ids = model.decode(batch)
        metrics += eval.evaluate_num(sorted_batch_insts, batch_max_ids,
                                     batch[-1], batch[1], config.idx2labels)
        batch_id += 1
    p, total_predict, total_entity = metrics[0], metrics[1], metrics[2]
    precision = p * 1.0 / total_predict * 100 if total_predict != 0 else 0
    recall = p * 1.0 / total_entity * 100 if total_entity != 0 else 0
    fscore = 2.0 * precision * recall / (
        precision + recall) if precision != 0 or recall != 0 else 0
    print("[%s set] Precision: %.2f, Recall: %.2f, F1: %.2f" %
          (name, precision, recall, fscore),
          flush=True)
    return [precision, recall, fscore]