def evaluate(self, model):
        self.pad_process()
        train_X, test_X, train_y, test_y = self.train_test_split()

        test_pred = model.predict(test_X, verbose=1)

        pred_labels = self._evaluate(test_pred)
        test_labels = self._evaluate(test_y)
        print("F1-score: {:.1%}".format(f1_score(test_labels, pred_labels)))
        print(classification_report(test_labels, pred_labels))
 def score(self, X, y, sample_weight=None):
     y_pred = self.predict(X)
     if not self.is_nested:
         score = f1_score(y, y_pred, average="macro")
     else:
         y_pred_ids = self.multi_label_to_id(y_pred)
         y_true_ids = self.multi_label_to_id(y)
         score = (y_pred_ids == y_true_ids).float().mean()
         score = score.item()
     return score
Пример #3
0
def test_evaluate(model, test_dataloader):
    test_l, n = 0.0, 0
    out_epoch, label_eppch = [], []
    global f1_min
    model.eval()
    with torch.no_grad():
        for data_x, data_y, batch_seq_len in test_dataloader:
            _, out = model(data_x.to(device),
                           batch_seq_len)  #out就是路径序列 [10, 40]

            label = [line.numpy().tolist() for line in data_y]
            for line in label:
                for i in range(data_x.shape[1] - len(line)):
                    line.append(line[len(line) - 1])
            loss = model.loss_function(data_x.to(device), label, batch_seq_len)

            for line in out:
                for i in range(data_x.shape[1] - len(line)):
                    line.append(line[len(line) - 1])
            label = torch.tensor(label).view(-1, 1).squeeze(-1).to(
                device)  #torch.Size([274])
            out = torch.tensor(out).view(-1, 1).squeeze(-1).to(
                device)  #torch.Size([274])
            out, label = processing_len(out, label, batch_seq_len)

            #测试集评价指标
            out_epoch.extend(out)
            label_eppch.extend(label)
            test_l += loss.item()
            n += 1

        #print(classification_report(label_eppch, out_epoch, target_names=target_names, digits=6))
        label_eppch = [[id2label[label] for label in label_eppch]]
        out_epoch = [[id2label[label] for label in out_epoch]]
        print(classification_report(label_eppch, out_epoch, digits=6))
        #report = classification_report(label_eppch, out_epoch, output_dict=True)
        report = classification_report(label_eppch, out_epoch, digits=6)
        if f1_score(label_eppch, out_epoch) > f1_min:
            f1_min = f1_score(label_eppch, out_epoch)
            torch.save(model.state_dict(), args.ckp)
            print("save model......")

        return test_l / n
Пример #4
0
    def _predict_with_seqeval(self, sample, model):
        # select target locations, that are not pad

        from seqeval.metrics import classification_report, f1_score, accuracy_score

        with torch.no_grad():
            if hasattr(
                    model,
                    'tagging_heads') and 'tagging_head' in model.tagging_heads:
                logits, _ = model(
                    **sample['net_input'],
                    features_only=True,
                    tagging_head_name='tagging_head',
                )
            else:
                logits = model(**sample['net_input'])[0]

            predictions = logits.argmax(dim=-1)
            targets = model.get_targets(sample, [logits])

        # making sure of dimensions
        assert predictions.size() == targets.size()

        predicted_labels = predictions.detach().cpu().numpy()
        label_ids = targets.cpu().numpy()

        y_true = []
        y_pred = []
        for i, cur_label in enumerate(label_ids):
            temp_1 = []
            temp_2 = []

            for j, m in enumerate(cur_label):
                if targets[i][j] not in [
                        self.target_dictionary.bos(),
                        self.target_dictionary.eos(),
                        self.target_dictionary.pad()
                ]:  # if it's a valid label
                    temp_1.append(self.target_dictionary[m])
                    temp_2.append(
                        self.target_dictionary[predicted_labels[i][j]])

            assert len(temp_1) == len(temp_2)
            y_true.append(temp_1)
            y_pred.append(temp_2)

        f1 = f1_score(y_true, y_pred, average='macro')
        acc = accuracy_score(y_true, y_pred)

        return {
            'F1-score': f1,
            'Accuracy': acc,
            'y_true': y_true,
            'y_pred': y_pred
        }
Пример #5
0
def eval(iter_data, model):
    logger.info("starting to evaluate")
    model = model.eval()
    eval_loss, eval_accuracy = 0, 0
    nb_eval_steps = 0
    predictions, true_labels = [], []
    for batch in tqdm(iter_data):
        batch = tuple(t.to(device) for t in batch)

        b_input_ids, b_labels, b_input_mask, b_token_type_ids, b_label_masks = batch

        with torch.no_grad():
            tmp_eval_loss, logits, reduced_labels = model(
                b_input_ids,
                token_type_ids=b_token_type_ids,
                attention_mask=b_input_mask,
                labels=b_labels,
                label_masks=b_label_masks)

        logits = torch.argmax(F.log_softmax(logits, dim=2), dim=2)
        logits = logits.detach().cpu().numpy()
        reduced_labels = reduced_labels.to('cpu').numpy()

        labels_to_append = []
        predictions_to_append = []

        for prediction, r_label in zip(logits, reduced_labels):
            preds = []
            labels = []
            for pred, lab in zip(prediction, r_label):
                if lab.item(
                ) == -1:  # masked label; -1 means do not collect this label
                    continue
                preds.append(pred)
                labels.append(lab)
            predictions_to_append.append(preds)
            labels_to_append.append(labels)

        predictions.extend(predictions_to_append)
        true_labels.append(labels_to_append)

        eval_loss += tmp_eval_loss.mean().item()

        nb_eval_steps += 1
    eval_loss = eval_loss / nb_eval_steps
    logger.info("Validation loss: {}".format(eval_loss))
    pred_tags = [tags_vals[p_i] for p in predictions for p_i in p]
    valid_tags = [
        tags_vals[l_ii] for l in true_labels for l_i in l for l_ii in l_i
    ]
    logger.info("Seq eval accuracy: {}".format(
        accuracy_score(valid_tags, pred_tags)))
    logger.info("F1-Score: {}".format(f1_score(valid_tags, pred_tags)))
    logger.info("Classification report: -- ")
    logger.info(classification_report(valid_tags, pred_tags))
Пример #6
0
    def score(self, y_true, y_pred):

        f_score = f1_score(y_true, y_pred)
        r_score = recall_score(y_true, y_pred)
        p_score = precision_score(y_true, y_pred)

        print(
            'NER Métricas > precision_score: {:04.2f}  --  recall_score: {:04.2f}  --  f1_score: {:04.2f}'
            .format(p_score, r_score, f_score))

        return f_score, r_score, p_score
Пример #7
0
def calculate_token_class_metrics(pred_toks, targ_toks, metric_key):
    if (metric_key == 'accuracy'):
        return seq_metrics.accuracy_score(targ_toks, pred_toks)
    if (metric_key == 'precision'):
        return seq_metrics.precision_score(targ_toks, pred_toks)
    if (metric_key == 'recall'):
        return seq_metrics.recall_score(targ_toks, pred_toks)
    if (metric_key == 'f1'): return seq_metrics.f1_score(targ_toks, pred_toks)

    if (metric_key == 'classification_report'):
        return seq_metrics.classification_report(targ_toks, pred_toks)
Пример #8
0
 def compute_metrics(p: EvalPrediction) -> Dict:
     preds_list, out_label_list = align_predictions(p.predictions, p.label_ids)
     report = classification_report(out_label_list, preds_list)
     output_report_file = os.path.join(training_args.output_dir, "classification_report.txt")
     with open(output_report_file, "w") as writer:
         writer.write(report)
     return {
         "precision": precision_score(out_label_list, preds_list),
         "recall": recall_score(out_label_list, preds_list),
         "f1": f1_score(out_label_list, preds_list),
     }
Пример #9
0
    def evaluate(self,
                 x_data,
                 y_data,
                 batch_size=None) -> Tuple[float, float, Dict]:
        y_pred = self.predict(x_data, batch_size=batch_size)

        weighted_f1 = f1_score(y_data, y_pred)
        weighted_recall = recall_score(y_data, y_pred)
        report = classification_report(y_data, y_pred)
        print(classification_report(y_data, y_pred))
        return weighted_f1, weighted_recall, report
Пример #10
0
def ner_eval(gold_tags, idx2tag, pred_probs):
    lengths = [
        min(len(tag), pred_prob.shape[0])
        for tag, pred_prob in zip(gold_tags, pred_probs)
    ]
    pred_tags = ner_tag_decode(idx2tag, pred_probs, lengths)

    r = metrics.recall_score(gold_tags, pred_tags)
    p = metrics.precision_score(gold_tags, pred_tags)
    f1 = metrics.f1_score(gold_tags, pred_tags)
    return r, p, f1
Пример #11
0
def scores(epoch_trues, epoch_preds):
            
    f1 = f1_score(epoch_trues, epoch_preds)
    rec = recall_score(epoch_trues, epoch_preds)
    prec = precision_score(epoch_trues, epoch_preds)
    acc = accuracy_score(epoch_trues, epoch_preds)
    
    return {"f1": np.around(f1, 4), 
            "rec": np.around(rec, 4),  
            "prec": np.around(prec, 4), 
            "acc": np.around(acc, 4)}
Пример #12
0
def evaluate_ner(args, model):
    if args.onto:
        ner_examples = get_onto_ner_examples(args.ner_data)
    else:
        ner_examples = get_ner_examples(args.ner_data)

    total_pred_labels = []
    true_labels = []
    total_f1 = 0.0
    start = time.time()
    total_labels_clean = []
    all_ner_words = []
    model.tokenizer = Tokenizer(model.vocab)
    for exp in tqdm(ner_examples):

        words = exp[0]
        labels = exp[1]

        idxs = []
        text = ""
        for word in words:
            idxs += [len(text)]
            text += word + " "

        tokens = model(text)
        pred_ner_labels = []
        for tk in tokens:
            # if tk.idx in idxs:
            #     pred_label = tk.ent_type_
            #     if len(pred_label)> 0:
            #         pred_ner_labels.append(pred_label)
            #     else:
            #         pred_ner_labels.append("O")
            pred_label = tk.ent_type_
            if len(pred_label) > 0:
                pred_ner_labels.append(pred_label)
            else:
                pred_ner_labels.append("O")

        assert len(pred_ner_labels) == len(labels)

        total_labels_clean.append(labels)
        total_pred_labels.append(pred_ner_labels)
    end = time.time()

    ## evaluate
    print("pred", total_pred_labels[:20])
    print("true labels", total_labels_clean[:20])
    total = sum([len(x) for x in total_pred_labels])

    print("sents per second", total * 1.0000000 / (end - start))
    print("ner tag time cost", end - start)
    print("ner acc", precision_score(total_labels_clean, total_pred_labels))
    print("ner f1", f1_score(total_labels_clean, total_pred_labels))
Пример #13
0
def caculate_report(y_true, y_pred, transform_func):
    """
    计算预测的分数
    """
    for i in range(len(y_true)):
        y_true[i] = transform_func(y_true[i])
    for i in range(len(y_pred)):
        y_pred[i] = transform_func(y_pred[i])
    return f1_score(y_true, y_pred), precision_score(y_true,
                                                     y_pred), recall_score(
                                                         y_true, y_pred)
Пример #14
0
def evaluate(gold_label_list, preds_list, config):
    tag_vocab = dataset_utils.get_tag_vocab(config)
    gold_label_list = dataset_utils.tagid2tag_seq(tag_vocab, gold_label_list)
    preds_list = dataset_utils.tagid2tag_seq(tag_vocab, preds_list)

    results = {
        "precision": precision_score(gold_label_list, preds_list),
        "recall": recall_score(gold_label_list, preds_list),
        "f1": f1_score(gold_label_list, preds_list)
    }
    return results
Пример #15
0
def model_metrics(true_labels, pre_labels):
    start_time = time.time()
    acc = accuracy_score(true_labels, pre_labels)
    f1score = f1_score(true_labels, pre_labels, average='macro')
    report = classification_report(true_labels, pre_labels, digits=4)
    msg = '\nTest Acc: {0:>6.2%}, Test f1: {1:>6.2%}'
    logger.info(msg.format(acc, f1score))
    logger.info("\nPrecision, Recall and F1-Score...")
    logger.info("\n{}".format(report))
    time_dif = time.time() - start_time
    logger.info("Time usage:{0:>.6}s".format(time_dif))
 def _simple_score(self, model, iter):
     model.eval()
     y_true, y_pred = [], []
     for batch in iter:
         predict_tags = model(batch)
         _, _, _, label_seq_tensor = batch
         y_true.extend(convert(label_seq_tensor.tolist(), self.label_dict))
         y_pred.extend(convert(predict_tags, self.label_dict))
     return accuracy_score(y_true, y_pred), recall_score(y_true,
                                                         y_pred), f1_score(
                                                             y_true, y_pred)
Пример #17
0
def evaluate(model, config, val_loader):
    model.eval()
    opt = config['opt']
    pad_label_id = config['pad_label_id']

    eval_loss = 0.
    criterion = nn.CrossEntropyLoss(ignore_index=pad_label_id).to(opt.device)
    n_batches = len(val_loader)
    prog = Progbar(target=n_batches)
    preds = None
    ys    = None
    with torch.no_grad():
        for i, (x,y) in enumerate(val_loader):
            x = to_device(x, opt.device)
            y = to_device(y, opt.device)
            if opt.use_crf:
                logits, prediction = model(x)
                mask = torch.sign(torch.abs(x[0])).to(torch.uint8).to(opt.device)
                log_likelihood = model.crf(logits, y, mask=mask, reduction='mean')
                loss = -1 * log_likelihood
            else:
                logits = model(x)
                loss = criterion(logits.view(-1, model.label_size), y.view(-1))
            if preds is None:
                if opt.use_crf: preds = to_numpy(prediction)
                else: preds = to_numpy(logits)
                ys = to_numpy(y)
            else:
                if opt.use_crf: preds = np.append(preds, to_numpy(prediction), axis=0)
                else: preds = np.append(preds, to_numpy(logits), axis=0)
                ys = np.append(ys, to_numpy(y), axis=0)
            eval_loss += loss.item()
            prog.update(i+1,
                        [('eval curr loss', loss.item())])
    eval_loss = eval_loss / n_batches
    if not opt.use_crf: preds = np.argmax(preds, axis=2)
    # compute measure using seqeval
    labels = model.labels
    ys_lbs = [[] for _ in range(ys.shape[0])]
    preds_lbs = [[] for _ in range(ys.shape[0])]
    for i in range(ys.shape[0]):     # foreach sentence 
        for j in range(ys.shape[1]): # foreach token
            if ys[i][j] != pad_label_id:
                ys_lbs[i].append(labels[ys[i][j]])
                preds_lbs[i].append(labels[preds[i][j]])
    ret = {
        "loss": eval_loss,
        "precision": precision_score(ys_lbs, preds_lbs),
        "recall": recall_score(ys_lbs, preds_lbs),
        "f1": f1_score(ys_lbs, preds_lbs),
        "report": classification_report(ys_lbs, preds_lbs, digits=4),
    }
    print(ret['report'])
    return ret
Пример #18
0
 def score(self, y_true, y_pred):
     """Calculate f1 score.
     Args:
         y_true (list): true sequences.
         y_pred (list): predicted sequences.
     Returns:
         score: f1 score.
     """
     score = f1_score(y_true, y_pred)
     print(' - valid_f1: {:04.2f}'.format(score * 100))
     return score
Пример #19
0
def seqeval_classification_metrics(pred):
    labels = pred.label_ids
    preds = pred.predictions
    precision_macro = precision_score(labels, preds, average='macro')
    recall_macro = recall_score(labels, preds, average='macro')
    f1_macro = f1_score(labels, preds, average='macro')
    precision_micro = precision_score(labels, preds, average='micro')
    recall_micro = recall_score(labels, preds, average='micro')
    f1_micro = f1_score(labels, preds, average='micro')
    acc = accuracy_score(labels, preds)
    return {
        'accuracy': acc,
        'f1_micro': f1_micro,
        'precision_micro': precision_micro,
        'recall_micro': recall_micro,
        'f1_macro': f1_macro,
        'precision_macro': precision_macro,
        'recall_macro': recall_macro,
        'nb_samples': len(labels),
        'classification_report': classification_report(labels, preds, digits=4)
    }
Пример #20
0
def evaluate_model(model, eval_dataset, label_list, batch_size, device):
     """
     Evaluates an NER model on the eval_dataset provided.
     Returns:
          F1_score: Macro-average f1_score on the evaluation dataset.
          Report: detailed classification report 
     """

     # Run prediction for full data
     eval_sampler = SequentialSampler(eval_dataset)
     eval_dataloader = DataLoader(
          eval_dataset, sampler=eval_sampler, batch_size=batch_size)

     model.eval() # turn of dropout

     y_true = []
     y_pred = []

     label_map = {i: label for i, label in enumerate(label_list, 1)}

     for input_ids, label_ids, l_mask, valid_ids in eval_dataloader:

          input_ids = input_ids.to(device)
          label_ids = label_ids.to(device)

          valid_ids = valid_ids.to(device)
          l_mask = l_mask.to(device)

          with torch.no_grad():
               logits = model(input_ids, labels=None, labels_mask=None,
                              valid_mask=valid_ids)

          logits = torch.argmax(logits, dim=2)
          logits = logits.detach().cpu().numpy()
          label_ids = label_ids.cpu().numpy()

          for i, cur_label in enumerate(label_ids):
               temp_1 = []
               temp_2 = []

               for j, m in enumerate(cur_label):
                    if valid_ids[i][j]:  # if it's a valid label
                         temp_1.append(label_map[m])
                         temp_2.append(label_map[logits[i][j]])

               assert len(temp_1) == len(temp_2)
               y_true.append(temp_1)
               y_pred.append(temp_2)
               
     report = classification_report(y_true, y_pred, digits=4)
     f1 = f1_score(y_true, y_pred, average='Macro')

     return f1, report
Пример #21
0
    def test_epoch_end(self, outputs):
        preds = torch.cat([x["preds"] for x in outputs]).detach().cpu().numpy()
        labels = torch.cat([x["labels"]
                            for x in outputs]).detach().cpu().numpy()

        # remove padding
        out_label_list = [[] for _ in range(labels.shape[0])]
        preds_list = [[] for _ in range(preds.shape[0])]
        assert (len(out_label_list) == len(preds_list)
                ), "Prediction and Label are not matched."

        from torch.nn import CrossEntropyLoss
        pad_token_label_id = CrossEntropyLoss().ignore_index

        label_map = {
            i: label
            for i, label in enumerate(list(self.label_vocab.keys()))
        }

        for i in range(labels.shape[0]):
            for j in range(labels.shape[1]):
                if labels[i, j] != pad_token_label_id:
                    out_label_list[i].append(label_map[labels[i][j]])
                    preds_list[i].append(label_map[preds[i][j]])

        # metrics - Precision, Recall, F1
        result = {
            "precision":
            seqeval_metrics.precision_score(out_label_list, preds_list),
            "recall":
            seqeval_metrics.recall_score(out_label_list, preds_list),
            "f1":
            seqeval_metrics.f1_score(out_label_list, preds_list),
        }

        print()
        print(
            seqeval_metrics.classification_report(out_label_list,
                                                  preds_list,
                                                  digits=4))

        # dump predicted outputs
        predicted_outputs_fn = os.path.join(self.trainer.callbacks[1].dirpath,
                                            'predicted_outputs.txt')
        predicted_outputs = preds_list

        with open(predicted_outputs_fn, "w", encoding='utf-8') as f:
            for output in predicted_outputs:
                print(output, file=f)
            print("Predicted Outputs are dumped at {}".format(
                predicted_outputs_fn))

        return result
Пример #22
0
    def __epoch_valid(self, data_loader, prefix, writer=None, unseen_entity_set: set = None,
                      entity_span_prediction: bool = False):
        """ single epoch validation/test """
        # aggregate prediction and true label
        self.model.eval()
        seq_pred, seq_true = [], []
        for encode in data_loader:
            encode = {k: v.to(self.device) for k, v in encode.items()}
            labels_tensor = encode.pop('labels')
            logit = self.model(**encode, return_dict=True)['logits']
            _true = labels_tensor.cpu().detach().int().tolist()
            _pred = torch.max(logit, 2)[1].cpu().detach().int().tolist()
            for b in range(len(_true)):
                _pred_list, _true_list = [], []
                for s in range(len(_true[b])):
                    if _true[b][s] != PAD_TOKEN_LABEL_ID:
                        _true_list.append(self.id_to_label[_true[b][s]])
                        if unseen_entity_set is None:
                            _pred_list.append(self.id_to_label[_pred[b][s]])
                        else:
                            __pred = self.id_to_label[_pred[b][s]]
                            if __pred in unseen_entity_set:
                                _pred_list.append('O')
                            else:
                                _pred_list.append(__pred)
                assert len(_pred_list) == len(_true_list)
                if len(_true_list) > 0:
                    if entity_span_prediction:
                        # ignore entity type and focus on entity position
                        _true_list = [i if i == 'O' else '-'.join([i.split('-')[0], 'entity']) for i in _true_list]
                        _pred_list = [i if i == 'O' else '-'.join([i.split('-')[0], 'entity']) for i in _pred_list]
                    seq_true.append(_true_list)
                    seq_pred.append(_pred_list)

        # compute metrics
        metric = {
            "f1": f1_score(seq_true, seq_pred) * 100,
            "recall": recall_score(seq_true, seq_pred) * 100,
            "precision": precision_score(seq_true, seq_pred) * 100,
        }

        try:
            summary = classification_report(seq_true, seq_pred)
            logging.info('[epoch {}] ({}) \n {}'.format(self.__epoch, prefix, summary))
        except Exception:
            logging.exception('classification_report raises error')
            summary = ''
        metric['summary'] = summary
        if writer:
            writer.add_scalar('{}/f1'.format(prefix), metric['f1'], self.__epoch)
            writer.add_scalar('{}/recall'.format(prefix), metric['recall'], self.__epoch)
            writer.add_scalar('{}/precision'.format(prefix), metric['precision'], self.__epoch)
        return metric
Пример #23
0
    def on_epoch_end(self, epoch, logs={}):
        label_true = []
        label_pred = []
        for i in range(len(self.seq)):
            x_true, y_true = self.seq[i]
            lengths = self.get_lengths(y_true)
            y_pred = self.model.predict_on_batch(x_true)

            y_true = self.p.inverse_transform(y_true, lengths)
            y_pred = self.p.inverse_transform(y_pred, lengths)

            label_true.extend(y_true)
            label_pred.extend(y_pred)

        valid_score = f1_score(label_true, label_pred)
        print(' - f1-valid: {:04.2f}'.format(valid_score * 100))
        print('validation report :',
              classification_report(label_true, label_pred))

        label_true = []
        label_pred = []
        for i in range(len(self.train_seq)):
            x_true, y_true = self.train_seq[i]
            lengths = self.get_lengths(y_true)
            y_pred = self.model.predict_on_batch(x_true)

            y_true = self.p.inverse_transform(y_true, lengths)
            y_pred = self.p.inverse_transform(y_pred, lengths)

            label_true.extend(y_true)
            label_pred.extend(y_pred)

        train_score = f1_score(label_true, label_pred)
        print(' - f1-train: {:04.2f}'.format(train_score * 100))
        print('train report :', classification_report(label_true, label_pred))
        logs['f1'] = {
            "epoch": epoch,
            "dev_score": valid_score,
            "train_score": train_score
        }
Пример #24
0
    def test_metrics_for_inv_data(self):
        with open(self.file_name) as f:
            acc_pred = accuracy_score(self.y_true, self.y_pred)
            p_pred = precision_score(self.y_true, self.y_pred)
            r_pred = recall_score(self.y_true, self.y_pred)
            f1_pred = f1_score(self.y_true, self.y_pred)

            acc_pred_inv = accuracy_score(self.y_true_inv, self.y_pred_inv)
            p_pred_inv = precision_score(self.y_true_inv,
                                         self.y_pred_inv,
                                         suffix=True)
            r_pred_inv = recall_score(self.y_true_inv,
                                      self.y_pred_inv,
                                      suffix=True)
            f1_pred_inv = f1_score(self.y_true_inv,
                                   self.y_pred_inv,
                                   suffix=True)

            self.assertLess(abs(acc_pred - acc_pred_inv), 1e-4)
            self.assertLess(abs(p_pred - p_pred_inv), 1e-4)
            self.assertLess(abs(r_pred - r_pred_inv), 1e-4)
            self.assertLess(abs(f1_pred - f1_pred_inv), 1e-4)
Пример #25
0
def calculate_seqeval_metrics(predictions, labels, tags=None, binary=False):
    if tags is not None:
        map2label = {v:k for k,v in tags.items()}
        # pdb.set_trace()
        for i in range(len(predictions)):
            predictions[i] = [map2label[v] for v in predictions[i]]
            labels[i] = [map2label[v] for v in labels[i]]
    
    accuracy = seq_metrics.accuracy_score(labels, predictions)
    precision = seq_metrics.precision_score(labels, predictions)
    recall = seq_metrics.recall_score(labels, predictions)
    f1_score = seq_metrics.f1_score(labels, predictions)
    return accuracy, precision, recall, f1_score
Пример #26
0
def acc_and_f1(preds, labels):
    acc = accuracy_score(preds, labels)
    f1 = f1_score(y_true=labels, y_pred=preds)
    p = precision_score(y_true=labels, y_pred=preds)
    r = recall_score(y_true=labels, y_pred=preds)
    report = classification_report(y_true=labels, y_pred=preds)
    return {
        "acc": acc,
        "f1": f1,
        "acc_and_f1": (acc + f1) / 2,
        "precision:": p,
        "recall": r,
    }
Пример #27
0
    def on_epoch_end(self, epoch, logs=None):
        pred_probs = self.model.predict(self.valid_features)
        y_pred = self.preprocessor.label_decode(pred_probs, self.get_lengths(pred_probs))

        r = metrics.recall_score(self.valid_labels, y_pred)
        p = metrics.precision_score(self.valid_labels, y_pred)
        f1 = metrics.f1_score(self.valid_labels, y_pred)

        logs['val_r'] = r
        logs['val_p'] = p
        logs['val_f1'] = f1
        print('Epoch {}: val_r: {}, val_p: {}, val_f1: {}'.format(epoch, r, p, f1))
        print(metrics.classification_report(self.valid_labels, y_pred))
Пример #28
0
def f1_pre_rec(labels, preds, is_ner=True):
    if is_ner:
        return {
            "precision": seqeval_metrics.precision_score(labels, preds, suffix=True),
            "recall": seqeval_metrics.recall_score(labels, preds, suffix=True),
            "f1": seqeval_metrics.f1_score(labels, preds, suffix=True),
        }
    else:
        return {
            "precision": sklearn_metrics.precision_score(labels, preds, average="macro"),
            "recall": sklearn_metrics.recall_score(labels, preds, average="macro"),
            "f1": sklearn_metrics.f1_score(labels, preds, average="macro"),
        }
Пример #29
0
	def on_epoch_end(self, epoch, logs = None):
		# self.model is auto set by keras
		yt, yp = [], []
		pred =  np.argmax(self.smodel.predict(self.X, batch_size=32), -1)
		lengths = [x.sum() for x in self.X[1]]
		for pseq, yseq, llen in zip(pred, self.Y, lengths):
			yt.append([self.tags[z] for z in pseq[1:llen-1]])
			yp.append([self.tags[z] for z in yseq[1:llen-1]])
		f1 = f1_score(yt, yp)
		self.best_f1 = max(self.best_f1, f1)
		accu = accuracy_score(yt, yp)
		print('\naccu: %.4f  F1: %.4f  BestF1: %.4f\n' % (accu, f1, self.best_f1))
		print(classification_report(yt, yp))
Пример #30
0
 def score(self, y_true, y_pred):
     """Calculate f1 score.
 Args:
     y_true (list): true sequences.
     y_pred (list): predicted sequences.
 Returns:
     score: f1 score.
 """
     score = f1_score(y_true, y_pred)
     print(' - f1: {:04.2f}'.format(score * 100))
     if self.digits:
         print(classification_report(y_true, y_pred, digits=self.digits))
     return score