def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, mini_batch_size: int = 32, embeddings_in_memory: bool = False) -> (dict, float): """ Evaluates the model with the given list of sentences. :param sentences: the list of sentences :param eval_class_metrics: boolean indicating whether to print class metrics or not :param mini_batch_size: the mini batch size to use :param embeddings_in_memory: boolean value indicating, if embeddings should be kept in memory or not :return: list of metrics, and the loss """ with torch.no_grad(): eval_loss = 0 batches = [ sentences[x:x + mini_batch_size] for x in range(0, len(sentences), mini_batch_size) ] y_pred = [] y_true = [] for batch in batches: scores = self.model.forward(batch) labels = self.model.obtain_labels(scores) loss = self.model.calculate_loss(scores, batch) clear_embeddings( batch, also_clear_word_embeddings=not embeddings_in_memory) eval_loss += loss y_pred.extend( convert_labels_to_one_hot( [[label.value for label in sent_labels] for sent_labels in labels], self.label_dict)) y_true.extend( convert_labels_to_one_hot( [sentence.get_label_names() for sentence in batch], self.label_dict)) metrics = [ calculate_micro_avg_metric(y_true, y_pred, self.label_dict) ] if eval_class_metrics: metrics.extend( calculate_class_metrics(y_true, y_pred, self.label_dict)) eval_loss /= len(sentences) metrics_dict = {metric.name: metric for metric in metrics} return metrics_dict, eval_loss
def evaluate(self, sentences: List[Sentence], eval_class_metrics: bool = False, mini_batch_size: int = 32, embeddings_in_memory: bool = True) -> (dict, float): """ Evaluates the model with the given list of sentences. :param sentences: the list of sentences :param mini_batch_size: the mini batch size to use :return: list of metrics, and the loss """ eval_loss = 0 batches = [ sentences[x:x + mini_batch_size] for x in range(0, len(sentences), mini_batch_size) ] y_pred = [] y_true = [] for batch in batches: scores = self.model.forward(batch) labels = self.model.obtain_labels(scores) loss = self.model.calculate_loss(scores, batch) eval_loss += loss y_true.extend([sentence.get_label_names() for sentence in batch]) y_pred.extend([[label.name for label in sent_labels] for sent_labels in labels]) if not embeddings_in_memory: clear_embeddings(batch) y_pred = convert_labels_to_one_hot(y_pred, self.label_dict) y_true = convert_labels_to_one_hot(y_true, self.label_dict) metrics = [calculate_micro_avg_metric(y_true, y_pred, self.label_dict)] if eval_class_metrics: metrics.extend( calculate_class_metrics(y_true, y_pred, self.label_dict)) eval_loss /= len(sentences) metrics_dict = {metric.name: metric for metric in metrics} return metrics_dict, eval_loss
def _labels_to_one_hot(self, sentences: List[Sentence]): label_list = [sentence.get_label_names() for sentence in sentences] one_hot = convert_labels_to_one_hot(label_list, self.label_dictionary) one_hot = [torch.FloatTensor(l).unsqueeze(0) for l in one_hot] one_hot = torch.cat(one_hot, 0) one_hot = one_hot.to(flair.device) return one_hot
def _labels_to_one_hot(self, sentences: List[Sentence]): label_list = [sentence.get_label_names() for sentence in sentences] one_hot = convert_labels_to_one_hot(label_list, self.label_dictionary) one_hot = [torch.FloatTensor(l).unsqueeze(0) for l in one_hot] one_hot = torch.cat(one_hot, 0) if torch.cuda.is_available(): one_hot = one_hot.cuda() return one_hot
def test_convert_labels_to_one_hot(): label_dict = Dictionary(add_unk=False) label_dict.add_item(u'class-1') label_dict.add_item(u'class-2') label_dict.add_item(u'class-3') one_hot = convert_labels_to_one_hot([[u'class-2']], label_dict) assert (one_hot[0][0] == 0) assert (one_hot[0][1] == 1) assert (one_hot[0][2] == 0)
def _labels_to_one_hot(self, sentences: List[Sentence]): label_list = [] for sentence in sentences: label_list.append([label.value for label in sentence.get_labels(self.label_type)]) one_hot = convert_labels_to_one_hot(label_list, self.label_dictionary) one_hot = [torch.FloatTensor(l).unsqueeze(0) for l in one_hot] one_hot = torch.cat(one_hot, 0).to(flair.device) return one_hot
def test_convert_labels_to_one_hot(): label_dict = Dictionary(add_unk=False) label_dict.add_item("class-1") label_dict.add_item("class-2") label_dict.add_item("class-3") one_hot = convert_labels_to_one_hot([["class-2"]], label_dict) assert one_hot[0][0] == 0 assert one_hot[0][1] == 1 assert one_hot[0][2] == 0