示例#1
0
 def log_metric(self,
                metric: Metric,
                dataset_name: str,
                log_class_metrics=False):
     log.info(
         "{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}"
         .format(dataset_name, metric.f_score(), metric.accuracy(),
                 metric.get_tp(), metric.get_fp(), metric.get_fn(),
                 metric.get_tn()))
     if log_class_metrics:
         for cls in metric.get_classes():
             log.info(
                 "{0:<4}: f-score {1:.4f} - acc {2:.4f} - tp {3} - fp {4} - fn {5} - tn {6}"
                 .format(cls, metric.f_score(cls), metric.accuracy(cls),
                         metric.get_tp(cls), metric.get_fp(cls),
                         metric.get_fn(cls), metric.get_tn(cls)))
 def evaluate(self,
              data_loader: DataLoader,
              out_path: Path = None,
              embeddings_storage_mode: str = 'cpu') -> (Result, float):
     with torch.no_grad():
         eval_loss = 0
         metric = Metric('Evaluation')
         lines = []
         batch_count = 0
         for batch in data_loader:
             batch_count += 1
             (labels, loss) = self.forward_labels_and_loss(batch)
             eval_loss += loss
             sentences_for_batch = [
                 sent.to_plain_string() for sent in batch
             ]
             confidences_for_batch = [[
                 label.score for label in sent_labels
             ] for sent_labels in labels]
             predictions_for_batch = [[
                 label.value for label in sent_labels
             ] for sent_labels in labels]
             true_values_for_batch = [
                 sentence.get_label_names() for sentence in batch
             ]
             available_labels = self.label_dictionary.get_items()
             for (sentence, confidence, prediction, true_value) in zip(
                     sentences_for_batch, confidences_for_batch,
                     predictions_for_batch, true_values_for_batch):
                 eval_line = '{}\t{}\t{}\t{}\n'.format(
                     sentence, true_value, prediction, confidence)
                 lines.append(eval_line)
             for (predictions_for_sentence,
                  true_values_for_sentence) in zip(predictions_for_batch,
                                                   true_values_for_batch):
                 for label in available_labels:
                     if ((label in predictions_for_sentence)
                             and (label in true_values_for_sentence)):
                         metric.add_tp(label)
                     elif ((label in predictions_for_sentence)
                           and (label not in true_values_for_sentence)):
                         metric.add_fp(label)
                     elif ((label not in predictions_for_sentence)
                           and (label in true_values_for_sentence)):
                         metric.add_fn(label)
                     elif ((label not in predictions_for_sentence)
                           and (label not in true_values_for_sentence)):
                         metric.add_tn(label)
             store_embeddings(batch, embeddings_storage_mode)
         eval_loss /= batch_count
         detailed_result = ''.join([
             '\nMICRO_AVG: acc ', '{}'.format(metric.micro_avg_accuracy()),
             ' - f1-score ', '{}'.format(metric.micro_avg_f_score()),
             '\nMACRO_AVG: acc ', '{}'.format(metric.macro_avg_accuracy()),
             ' - f1-score ', '{}'.format(metric.macro_avg_f_score())
         ])
         for class_name in metric.get_classes():
             detailed_result += ''.join([
                 '\n', '{:<10}'.format(class_name), ' tp: ',
                 '{}'.format(metric.get_tp(class_name)), ' - fp: ',
                 '{}'.format(metric.get_fp(class_name)), ' - fn: ',
                 '{}'.format(metric.get_fn(class_name)), ' - tn: ',
                 '{}'.format(metric.get_tn(class_name)), ' - precision: ',
                 '{:.4f}'.format(metric.precision(class_name)),
                 ' - recall: ', '{:.4f}'.format(metric.recall(class_name)),
                 ' - accuracy: ', '{:.4f}'.format(
                     metric.accuracy(class_name)), ' - f1-score: ',
                 '{:.4f}'.format(metric.f_score(class_name))
             ])
         result = Result(main_score=metric.micro_avg_f_score(),
                         log_line=''.join([
                             '{}'.format(metric.precision()), '\t',
                             '{}'.format(metric.recall()), '\t',
                             '{}'.format(metric.micro_avg_f_score())
                         ]),
                         log_header='PRECISION\tRECALL\tF1',
                         detailed_results=detailed_result)
         if (out_path is not None):
             with open(out_path, 'w', encoding='utf-8') as outfile:
                 outfile.write(''.join(lines))
         return (result, eval_loss)
示例#3
0
 def evaluate(self,
              data_loader: DataLoader,
              out_path: Path = None,
              embeddings_storage_mode: str = 'cpu') -> (Result, float):
     with torch.no_grad():
         eval_loss = 0
         batch_no = 0
         metric = Metric('Evaluation')
         lines = []
         for batch in data_loader:
             batch_no += 1
             with torch.no_grad():
                 features = self.forward(batch)
                 loss = self._calculate_loss(features, batch)
                 (tags, _) = self._obtain_labels(features, batch)
             eval_loss += loss
             for (sentence, sent_tags) in zip(batch, tags):
                 for (token, tag) in zip(sentence.tokens, sent_tags):
                     token = token
                     token.add_tag_label('predicted', tag)
                     eval_line = '{} {} {} {}\n'.format(
                         token.text,
                         token.get_tag(self.tag_type).value, tag.value,
                         tag.score)
                     lines.append(eval_line)
                 lines.append('\n')
             for sentence in batch:
                 gold_tags = [(tag.tag, str(tag))
                              for tag in sentence.get_spans(self.tag_type)]
                 predicted_tags = [
                     (tag.tag, str(tag))
                     for tag in sentence.get_spans('predicted')
                 ]
                 for (tag, prediction) in predicted_tags:
                     if ((tag, prediction) in gold_tags):
                         metric.add_tp(tag)
                     else:
                         metric.add_fp(tag)
                 for (tag, gold) in gold_tags:
                     if ((tag, gold) not in predicted_tags):
                         metric.add_fn(tag)
                     else:
                         metric.add_tn(tag)
             store_embeddings(batch, embeddings_storage_mode)
         eval_loss /= batch_no
         if (out_path is not None):
             with open(out_path, 'w', encoding='utf-8') as outfile:
                 outfile.write(''.join(lines))
         detailed_result = ''.join([
             '\nMICRO_AVG: acc ', '{}'.format(metric.micro_avg_accuracy()),
             ' - f1-score ', '{}'.format(metric.micro_avg_f_score()),
             '\nMACRO_AVG: acc ', '{}'.format(metric.macro_avg_accuracy()),
             ' - f1-score ', '{}'.format(metric.macro_avg_f_score())
         ])
         for class_name in metric.get_classes():
             detailed_result += ''.join([
                 '\n', '{:<10}'.format(class_name), ' tp: ',
                 '{}'.format(metric.get_tp(class_name)), ' - fp: ',
                 '{}'.format(metric.get_fp(class_name)), ' - fn: ',
                 '{}'.format(metric.get_fn(class_name)), ' - tn: ',
                 '{}'.format(metric.get_tn(class_name)), ' - precision: ',
                 '{:.4f}'.format(metric.precision(class_name)),
                 ' - recall: ', '{:.4f}'.format(metric.recall(class_name)),
                 ' - accuracy: ', '{:.4f}'.format(
                     metric.accuracy(class_name)), ' - f1-score: ',
                 '{:.4f}'.format(metric.f_score(class_name))
             ])
         result = Result(main_score=metric.micro_avg_f_score(),
                         log_line=''.join([
                             '{}'.format(metric.precision()), '\t',
                             '{}'.format(metric.recall()), '\t',
                             '{}'.format(metric.micro_avg_f_score())
                         ]),
                         log_header='PRECISION\tRECALL\tF1',
                         detailed_results=detailed_result)
         return (result, eval_loss)