Пример #1
0
def predict_embeddings(self, doc_dict, embedding, file_name):
    # torch.cuda.empty_cache()
    lm_file = h5py.File(file_name, "r")
    for key in doc_dict:
        if key == 'start':
            for i, sentence in enumerate(doc_dict[key]):
                for token, token_idx in zip(sentence.tokens,
                                            range(len(sentence.tokens))):
                    word_embedding = torch.zeros(
                        embedding.embedding_length).float()
                    word_embedding = torch.FloatTensor(word_embedding)

                    token.set_embedding(embedding.name, word_embedding)
            continue
        group = lm_file[key]
        num_sentences = len(list(group.keys()))
        sentences_emb = [group[str(i)][...] for i in range(num_sentences)]
        try:
            assert len(doc_dict[key]) == len(sentences_emb)
        except:
            pdb.set_trace()
        for i, sentence in enumerate(doc_dict[key]):
            for token, token_idx in zip(sentence.tokens,
                                        range(len(sentence.tokens))):
                word_embedding = sentences_emb[i][token_idx]
                word_embedding = torch.from_numpy(word_embedding).view(-1)

                token.set_embedding(embedding.name, word_embedding)
            store_embeddings([sentence], 'cpu')
Пример #2
0
    def evaluate(self, data_loader, embedding_storage_mode):
        with torch.no_grad():
            eval_loss = 0
            if self.use_crf:
                transitions = self.transitions.detach().cpu().numpy()
            else:
                transitions = None
            cm = utils.ConfusionMatrix()
            for batch in data_loader:
                sentences, tag_sequences = batch
                x = self._forward(sentences)
                loss = self._loss(x, tag_sequences)
                predicted_tag_sequences, confs = self._obtain_labels(x, sentences, transitions, self.tag_type_mode)
                for i in range(len(sentences)):
                    gold = tag_sequences[i].get_span()
                    pred = predicted_tag_sequences[i].get_span()
                    for pred_span in pred:
                        if pred_span in gold:
                            cm.add_tp(pred_span[0])
                        else:
                            cm.add_fp(pred_span[0])
                    for gold_span in gold:
                        if gold_span not in pred:
                            cm.add_fn(gold_span[0])
                eval_loss += loss.item()
                store_embeddings(sentences, embedding_storage_mode)

            eval_loss /= len(data_loader)
            if self.tag_type_mode == 'ner':
                res = utils.EvaluationResult(cm.micro_f_measure())
            else:
                res = utils.EvaluationResult(cm.micro_accuracy)
            res.add_metric('Confusion Matrix', cm)
            return eval_loss, res
Пример #3
0
 def predict(self,
             sentences: Union[(List[Sentence], Sentence)],
             mini_batch_size=32,
             embedding_storage_mode='none',
             verbose=False) -> List[Sentence]:
     with torch.no_grad():
         if isinstance(sentences, Sentence):
             sentences = [sentences]
         filtered_sentences = self._filter_empty_sentences(sentences)
         store_embeddings(filtered_sentences, 'none')
         filtered_sentences.sort(key=(lambda x: len(x)), reverse=True)
         batches = [
             filtered_sentences[x:(x + mini_batch_size)]
             for x in range(0, len(filtered_sentences), mini_batch_size)
         ]
         if verbose:
             batches = tqdm(batches)
         for (i, batch) in enumerate(batches):
             if verbose:
                 batches.set_description(''.join(
                     ['Inferencing on batch ', '{}'.format(i)]))
             with torch.no_grad():
                 feature = self.forward(batch)
                 (tags, all_tags) = self._obtain_labels(feature, batch)
             for (sentence, sent_tags,
                  sent_all_tags) in zip(batch, tags, all_tags):
                 for (token, tag,
                      token_all_tags) in zip(sentence.tokens, sent_tags,
                                             sent_all_tags):
                     token.add_tag_label(self.tag_type, tag)
                     token.add_tags_proba_dist(self.tag_type,
                                               token_all_tags)
             store_embeddings(batch, storage_mode=embedding_storage_mode)
         return sentences
Пример #4
0
    def calculate_prototypes(
        self,
        data: FlairDataset,
        encoder: DefaultClassifier,
        exempt_labels: List[str] = [],
        mini_batch_size=32,
    ):
        """
        Function that calclues a prototype for each class based on the euclidean average embedding over the whole dataset
        :param data: dataset for which to calculate prototypes
        :param encoder: encoder to use
        :param exempt_labels: labels to exclude
        :param mini_batch_size: number of sentences to embed at same time
        :return:
        """

        # gradients are not required for prototype computation
        with torch.no_grad():

            dataloader = DataLoader(data, batch_size=mini_batch_size)

            # reset prototypes for all classes
            new_prototypes = torch.zeros(self.num_prototypes,
                                         self.prototype_size,
                                         device=flair.device)

            counter: Counter = Counter()

            for batch in tqdm(dataloader):

                logits, labels = encoder.forward_pass(batch)  # type: ignore

                if len(labels) > 0:
                    # decode embeddings into prototype space
                    if self.metric_space_decoder is not None:
                        logits = self.metric_space_decoder(logits)

                    for logit, label in zip(logits, labels):
                        counter.update(label)

                        idx = encoder.label_dictionary.get_idx_for_item(
                            label[0])

                        new_prototypes[idx] += logit

                # embeddings need to be removed so that memory doesn't fill up
                store_embeddings(batch, storage_mode="none")

            # TODO: changes required
            for label, count in counter.most_common():
                average_prototype = new_prototypes[
                    encoder.label_dictionary.get_idx_for_item(label)] / count
                new_prototypes[encoder.label_dictionary.get_idx_for_item(
                    label)] = average_prototype

            for label in exempt_labels:
                label_idx = encoder.label_dictionary.get_idx_for_item(label)
                new_prototypes[label_idx] = self.prototype_vectors[label_idx]

            self.prototype_vectors.data = new_prototypes.to(flair.device)
Пример #5
0
    def predict(
        self,
        sentences: Union[Sentence, List[Sentence]],
        mini_batch_size: int = 32,
        embedding_storage_mode="none",
    ) -> List[Sentence]:

        with torch.no_grad():
            if type(sentences) is Sentence:
                sentences = [sentences]

            filtered_sentences = self._filter_empty_sentences(sentences)

            # remove previous embeddings
            store_embeddings(filtered_sentences, "none")

            batches = [
                filtered_sentences[x : x + mini_batch_size]
                for x in range(0, len(filtered_sentences), mini_batch_size)
            ]

            for batch in batches:
                scores = self.forward(batch)

                for (sentence, score) in zip(batch, scores.tolist()):
                    sentence.labels = [Label(value=str(score[0]))]

                # clearing token embeddings to save memory
                store_embeddings(batch, storage_mode=embedding_storage_mode)

            return sentences
Пример #6
0
    def train_batch(self, batch_no, batch):
        # process mini-batches
        start_time = time.time()

        self.model.set_output(self.model_mode)
        loss = self.model.forward_loss(batch)

        self.optimizer.zero_grad()
        # Backward
        loss.backward()

        torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5.0)
        self.optimizer.step()

        self.seen_batches += 1
        self.train_loss += loss.item()

        # depending on memory mode, embeddings are moved to CPU, GPU or deleted
        store_embeddings(batch, self.embedding_storage_mode)

        self.batch_time += time.time() - start_time
        if batch_no % self.modulo == 0:
            model_string = f'model: {self.display_name} - ' if self.display_name is not None else ''
            log.info(
                f"{model_string}epoch {self.cur_epoch + 1} - iter {batch_no}/{self.total_number_of_batches} - loss "
                f"{self.train_loss / self.seen_batches:.8f} - samples/sec: {self.mini_batch_size * self.modulo / self.batch_time:.2f}"
            )
            self.batch_time = 0
            iteration = self.cur_epoch * self.total_number_of_batches + batch_no
            if not self.param_selection_mode:
                self.weight_extractor.extract_weights(self.model.state_dict(),
                                                      iteration)
Пример #7
0
    def predict(
        self,
        sentences: Union[List[Sentence], Sentence],
        mini_batch_size=32,
        embedding_storage_mode="none",
        all_tag_prob: bool = False,
        verbose=False,
    ) -> List[Sentence]:
        with torch.no_grad():
            if isinstance(sentences, Sentence):
                sentences = [sentences]

            filtered_sentences = self._filter_empty_sentences(sentences)

            # remove previous embeddings
            store_embeddings(filtered_sentences, "none")

            # reverse sort all sequences by their length
            filtered_sentences.sort(key=lambda x: len(x), reverse=True)

            if self.use_crf:
                transitions = self.transitions.detach().cpu()
            else:
                transitions = None

            # make mini-batches
            batches = [
                filtered_sentences[x : x + mini_batch_size]
                for x in range(0, len(filtered_sentences), mini_batch_size)
            ]

            # progress bar for verbosity
            if verbose:
                batches = tqdm(batches)

            for i, batch in enumerate(batches):

                if verbose:
                    batches.set_description(f"Inferencing on batch {i}")

                feature = self.forward(batch)
                tags, all_tags = self._obtain_labels(
                    feature, batch, transitions, get_all_tags=all_tag_prob
                )

                for (sentence, sent_tags) in zip(batch, tags):
                    for (token, tag) in zip(sentence.tokens, sent_tags):
                        token.add_tag_label(self.tag_type, tag)

                # all_tags will be empty if all_tag_prob is set to False, so the for loop will be avoided
                for (sentence, sent_all_tags) in zip(batch, all_tags):
                    for (token, token_all_tags) in zip(sentence.tokens, sent_all_tags):
                        token.add_tags_proba_dist(self.tag_type, token_all_tags)

                # clearing token embeddings to save memory
                store_embeddings(batch, storage_mode=embedding_storage_mode)

            return sentences
Пример #8
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embeddings_storage_mode: str = "cpu",
    ) -> (Result, float):

        with torch.no_grad():
            eval_loss = 0

            batch_no: int = 0

            metric = Metric("Evaluation")

            lines: List[str] = []
            for batch in data_loader:
                batch_no += 1

                with torch.no_grad():
                    features = self.forward(batch)
                    loss = self.calculate_loss(features, batch)
                    tags, _ = self.obtain_labels(features, batch)

                eval_loss += loss

                metric = self.obtain_performance_metric(
                    batch, tags, lines, metric)

                store_embeddings(batch, embeddings_storage_mode)

            eval_loss /= batch_no

            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            detailed_result = (
                f"\nMICRO_AVG: acc {metric.micro_avg_accuracy()} - f1-score {metric.micro_avg_f_score()}"
                f"\nMACRO_AVG: acc {metric.macro_avg_accuracy()} - f1-score {metric.macro_avg_f_score()}"
            )
            for class_name in metric.get_classes():
                detailed_result += (
                    f"\n{class_name:<10} tp: {metric.get_tp(class_name)} - fp: {metric.get_fp(class_name)} - "
                    f"fn: {metric.get_fn(class_name)} - tn: {metric.get_tn(class_name)} - precision: "
                    f"{metric.precision(class_name):.4f} - recall: {metric.recall(class_name):.4f} - "
                    f"accuracy: {metric.accuracy(class_name):.4f} - f1-score: "
                    f"{metric.f_score(class_name):.4f}")

            result = Result(
                main_score=metric.micro_avg_f_score(),
                log_line=
                f"{metric.precision()}\t{metric.recall()}\t{metric.micro_avg_f_score()}",
                log_header="PRECISION\tRECALL\tF1",
                detailed_results=detailed_result,
            )

            return result, eval_loss
Пример #9
0
 def evaluate(self,
              data_loader: DataLoader,
              out_path: Path = None,
              embeddings_storage_mode: str = 'cpu') -> (Result, float):
     with torch.no_grad():
         eval_loss = 0
         metric = MetricRegression('Evaluation')
         lines = []
         total_count = 0
         for (batch_nr, batch) in enumerate(data_loader):
             if isinstance(batch, Sentence):
                 batch = [batch]
             (scores, loss) = self.forward_labels_and_loss(batch)
             true_values = []
             for sentence in batch:
                 total_count += 1
                 for label in sentence.labels:
                     true_values.append(float(label.value))
             results = []
             for score in scores:
                 if (type(score[0]) is Label):
                     results.append(float(score[0].score))
                 else:
                     results.append(float(score[0]))
             eval_loss += loss
             metric.true.extend(true_values)
             metric.pred.extend(results)
             for (sentence, prediction,
                  true_value) in zip(batch, results, true_values):
                 eval_line = '{}\t{}\t{}\n'.format(
                     sentence.to_original_text(), true_value, prediction)
                 lines.append(eval_line)
             store_embeddings(batch, embeddings_storage_mode)
         eval_loss /= total_count
         if (out_path is not None):
             with open(out_path, 'w', encoding='utf-8') as outfile:
                 outfile.write(''.join(lines))
         log_line = ''.join([
             '{}'.format(metric.mean_squared_error()), '\t',
             '{}'.format(metric.spearmanr()), '\t',
             '{}'.format(metric.pearsonr())
         ])
         log_header = 'MSE\tSPEARMAN\tPEARSON'
         detailed_result = ''.join([
             'AVG: mse: ', '{:.4f}'.format(metric.mean_squared_error()),
             ' - mae: ', '{:.4f}'.format(metric.mean_absolute_error()),
             ' - pearson: ', '{:.4f}'.format(metric.pearsonr()),
             ' - spearman: ', '{:.4f}'.format(metric.spearmanr())
         ])
         result = Result(metric.pearsonr(), log_header, log_line,
                         detailed_result)
         return (result, eval_loss)
Пример #10
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embeddings_storage_mode: str = "cpu",
    ) -> (Result, float):

        with torch.no_grad():
            eval_loss = 0

            metric = MetricRegression("Evaluation")

            lines: List[str] = []
            total_count = 0
            for batch_nr, batch in enumerate(data_loader):

                if isinstance(batch, Sentence):
                    batch = [batch]

                scores, loss = self.forward_labels_and_loss(batch)

                eval_loss += loss

                metric = self.obtain_performance_metric(batch, scores, lines, metric)

                total_count += len(batch)

                store_embeddings(batch, embeddings_storage_mode)

            eval_loss /= total_count

            ##TODO: not saving lines yet
            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            log_line = f"{metric.mean_squared_error()}\t{metric.spearmanr()}\t{metric.pearsonr()}"
            log_header = "MSE\tSPEARMAN\tPEARSON"

            detailed_result = (
                f"AVG: mse: {metric.mean_squared_error():.4f} - "
                f"mae: {metric.mean_absolute_error():.4f} - "
                f"pearson: {metric.pearsonr():.4f} - "
                f"spearman: {metric.spearmanr():.4f}"
            )

            result: Result = Result(
                metric.pearsonr(), log_header, log_line, detailed_result
            )

            return result, eval_loss
Пример #11
0
    def predict(
        self,
        sentences: Union[Sentence, List[Sentence]],
        mini_batch_size: int = 32,
        verbose: bool = False,
        label_name: Optional[str] = None,
        embedding_storage_mode="none",
    ) -> List[Sentence]:

        if label_name is None:
            label_name = self.label_name if self.label_name is not None else "label"

        with torch.no_grad():
            if not isinstance(sentences, list):
                sentences = [sentences]

            if not sentences:
                return sentences

            reordered_sentences = sorted(sentences,
                                         key=lambda s: len(s),
                                         reverse=True)

            if len(reordered_sentences) == 0:
                return sentences

            dataloader = DataLoader(
                dataset=FlairDatapointDataset(reordered_sentences),
                batch_size=mini_batch_size,
            )
            # progress bar for verbosity
            if verbose:
                progress_bar = tqdm(dataloader)
                progress_bar.set_description("Batch inference")
                dataloader = progress_bar

            for batch in dataloader:
                # stop if all sentences are empty
                if not batch:
                    continue
                scores = self.forward(batch)

                for (sentence, score) in zip(batch, scores.tolist()):
                    sentence.set_label(label_name, value=str(score[0]))

                # clearing token embeddings to save memory
                store_embeddings(batch, storage_mode=embedding_storage_mode)

            return sentences
Пример #12
0
 def _train_one_epoch_normal(self, train_loader, embedding_storage_mode,
                             print_every_batch):
     epoch_loss = 0
     for i, batch in enumerate(train_loader):
         self.optimizer.zero_grad()
         batch_loss = self.model.forward_loss(batch)
         batch_loss.backward()
         torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)
         self.optimizer.step()
         store_embeddings(batch[0], embedding_storage_mode)
         epoch_loss += batch_loss.item()
         if (i % print_every_batch == 0 or i + 1 == len(train_loader)):
             logger.log(
                 f'  Batch ({i+1}/{len(train_loader)}), loss: {epoch_loss/(i+1):.4f}'
             )
     return epoch_loss / len(train_loader)
    def predict(
        self,
        sentences: Union[List[Sentence], Sentence],
        mini_batch_size: int = 32,
        num_workers: int = 8,
        print_tree: bool = False,
        embedding_storage_mode="none",
    ) -> None:
        """
        Predict arcs and tags for Dependency Parser task
        :param sentences: a Sentence or a List of Sentence
        :param mini_batch_size: mini batch size to use
        :param print_tree: set to True to print dependency parser of sentence as tree shape
        :param embedding_storage_mode: default is 'none' which is always best. Only set to 'cpu' or 'gpu' if
        you wish to not only predict, but also keep the generated embeddings in CPU or GPU memory respectively.
        'gpu' to store embeddings in GPU memory.
        """
        if not isinstance(sentences, list):
            sentences = [sentences]
        sentence_dataset = FlairDatapointDataset(sentences)
        data_loader = DataLoader(sentence_dataset,
                                 batch_size=mini_batch_size,
                                 num_workers=num_workers)

        for batch in data_loader:
            with torch.no_grad():
                score_arc, score_rel = self.forward(batch)
                arc_prediction, relation_prediction = self._obtain_labels_(
                    score_arc, score_rel)

            for sentnce_index, (sentence, sent_tags, sent_arcs) in enumerate(
                    zip(batch, relation_prediction, arc_prediction)):

                for token_index, (token, tag, head_id) in enumerate(
                        zip(sentence.tokens, sent_tags, sent_arcs)):
                    token.add_tag(self.tag_type, tag,
                                  score_rel[sentnce_index][token_index])

                    token.head_id = int(head_id)

                if print_tree:
                    tree_printer(sentence, self.tag_type)
                    print("-" * 50)
            store_embeddings(batch, storage_mode=embedding_storage_mode)
Пример #14
0
 def predict(self,
             sentences: Union[(Sentence, List[Sentence])],
             mini_batch_size: int = 32,
             embedding_storage_mode='none') -> List[Sentence]:
     with torch.no_grad():
         if (type(sentences) is Sentence):
             sentences = [sentences]
         filtered_sentences = self._filter_empty_sentences(sentences)
         store_embeddings(filtered_sentences, 'none')
         batches = [
             filtered_sentences[x:(x + mini_batch_size)]
             for x in range(0, len(filtered_sentences), mini_batch_size)
         ]
         for batch in batches:
             scores = self.forward(batch)
             for (sentence, score) in zip(batch, scores.tolist()):
                 sentence.labels = [Label(value=str(score[0]))]
             store_embeddings(batch, storage_mode=embedding_storage_mode)
         return sentences
Пример #15
0
    def predict(
        self,
        sentences: Union[Sentence, List[Sentence]],
        mini_batch_size: int = 32,
        embedding_storage_mode="none",
        multi_class_prob: bool = False,
    ) -> List[Sentence]:
        """
        Predicts the class labels for the given sentences. The labels are directly added to the sentences.
        :param sentences: list of sentences
        :param mini_batch_size: mini batch size to use
        :param embedding_storage_mode: 'none' for the minimum memory footprint, 'cpu' to store embeddings in Ram,
        'gpu' to store embeddings in GPU memory.
        :param multi_class_prob : return probability for all class for multiclass
        :return: the list of sentences containing the labels
        """
        with torch.no_grad():
            if type(sentences) is Sentence:
                sentences = [sentences]

            filtered_sentences = self._filter_empty_sentences(sentences)

            # remove previous embeddings
            store_embeddings(filtered_sentences, "none")

            batches = [
                filtered_sentences[x : x + mini_batch_size]
                for x in range(0, len(filtered_sentences), mini_batch_size)
            ]

            for batch in batches:
                scores = self.forward(batch)
                predicted_labels = self._obtain_labels(
                    scores, predict_prob=multi_class_prob
                )

                for (sentence, labels) in zip(batch, predicted_labels):
                    sentence.labels = labels

                # clearing token embeddings to save memory
                store_embeddings(batch, storage_mode=embedding_storage_mode)

            return sentences
Пример #16
0
    def predict(self,
                sentence,
                all_tag_prob=False,
                embedding_storage_mode="none"):

        with torch.no_grad():

            result = sentence

            tags = _predict_sentence(self.__models, sentence)

            for (token, tag) in zip(sentence.tokens, tags):
                token.add_tag_label(self.tag_type, tag)

            # clearing token embeddings to save memory
            store_embeddings(sentence, storage_mode=embedding_storage_mode)

            result = sentence
            assert len(sentence) == len(result)
            return result
Пример #17
0
 def predict(self,
             sentences: Union[(Sentence, List[Sentence])],
             mini_batch_size: int = 32,
             embedding_storage_mode='none',
             multi_class_prob: bool = False) -> List[Sentence]:
     '\n        Predicts the class labels for the given sentences. The labels are directly added to the sentences.\n        :param sentences: list of sentences\n        :param mini_batch_size: mini batch size to use\n        :param multi_class_prob : return probability for all class for multiclass\n        :return: the list of sentences containing the labels\n        '
     with torch.no_grad():
         if (type(sentences) is Sentence):
             sentences = [sentences]
         filtered_sentences = self._filter_empty_sentences(sentences)
         store_embeddings(filtered_sentences, 'none')
         batches = [
             filtered_sentences[x:(x + mini_batch_size)]
             for x in range(0, len(filtered_sentences), mini_batch_size)
         ]
         for batch in batches:
             scores = self.forward(batch)
             predicted_labels = self._obtain_labels(
                 scores, predict_prob=multi_class_prob)
             for (sentence, labels) in zip(batch, predicted_labels):
                 sentence.labels = labels
             store_embeddings(batch, storage_mode=embedding_storage_mode)
         return sentences
Пример #18
0
 def _train_one_epoch_mixup(self, train_loader, n_passes,
                            embedding_storage_mode, print_every_batch):
     epoch_loss = 0
     for j in range(n_passes):
         logger.log(f'  Pass ({j+1}/{n_passes})')
         pass_loss = 0
         for i, big_batch in enumerate(train_loader):
             sz = len(big_batch[0]) // 2
             batch1 = (big_batch[0][:sz], big_batch[1][:sz])
             batch2 = (big_batch[0][sz:sz + sz], big_batch[1][sz:sz + sz])
             self.optimizer.zero_grad()
             batch_loss = self.model.forward_loss(batch1, batch2)
             batch_loss.backward()
             torch.nn.utils.clip_grad_norm_(self.model.parameters(), 5)
             self.optimizer.step()
             store_embeddings(batch1[0], embedding_storage_mode)
             store_embeddings(batch2[0], embedding_storage_mode)
             pass_loss += batch_loss.item()
             if (i % print_every_batch == 0 or i + 1 == len(train_loader)):
                 logger.log(
                     f'    Batch ({i+1}/{len(train_loader)}), loss: {pass_loss/(i+1):.4f}'
                 )
         epoch_loss += pass_loss
     return epoch_loss / (len(train_loader) * n_passes)
Пример #19
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embedding_storage_mode: str = "none",
    ) -> (Result, float):

        if type(out_path) == str:
            out_path = Path(out_path)

        with torch.no_grad():
            eval_loss = 0

            batch_no: int = 0

            metric = Metric("Evaluation", beta=self.beta)

            lines: List[str] = []

            if self.use_crf:
                transitions = self.transitions.detach().cpu().numpy()
            else:
                transitions = None

            for batch in data_loader:
                batch_no += 1

                with torch.no_grad():
                    features = self.forward(batch)
                    loss = self._calculate_loss(features, batch)
                    tags, _ = self._obtain_labels(
                        feature=features,
                        batch_sentences=batch,
                        transitions=transitions,
                        get_all_tags=False,
                    )

                eval_loss += loss

                for (sentence, sent_tags) in zip(batch, tags):
                    for (token, tag) in zip(sentence.tokens, sent_tags):
                        token: Token = token
                        token.add_tag("predicted", tag.value, tag.score)

                        # append both to file for evaluation
                        eval_line = "{} {} {} {}\n".format(
                            token.text,
                            token.get_tag(self.tag_type).value,
                            tag.value,
                            tag.score,
                        )
                        lines.append(eval_line)
                    lines.append("\n")

                for sentence in batch:
                    # make list of gold tags
                    gold_tags = [
                        (tag.tag, tag.text) for tag in sentence.get_spans(self.tag_type)
                    ]
                    # make list of predicted tags
                    predicted_tags = [
                        (tag.tag, tag.text) for tag in sentence.get_spans("predicted")
                    ]

                    # check for true positives, false positives and false negatives
                    for tag, prediction in predicted_tags:
                        if (tag, prediction) in gold_tags:
                            metric.add_tp(tag)
                        else:
                            metric.add_fp(tag)

                    for tag, gold in gold_tags:
                        if (tag, gold) not in predicted_tags:
                            metric.add_fn(tag)
                        else:
                            metric.add_tn(tag)

                store_embeddings(batch, embedding_storage_mode)

            eval_loss /= batch_no

            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            detailed_result = (
                f"\nMICRO_AVG: acc {metric.micro_avg_accuracy():.4f} - f1-score {metric.micro_avg_f_score():.4f}"
                f"\nMACRO_AVG: acc {metric.macro_avg_accuracy():.4f} - f1-score {metric.macro_avg_f_score():.4f}"
            )
            for class_name in metric.get_classes():
                detailed_result += (
                    f"\n{class_name:<10} tp: {metric.get_tp(class_name)} - fp: {metric.get_fp(class_name)} - "
                    f"fn: {metric.get_fn(class_name)} - tn: {metric.get_tn(class_name)} - precision: "
                    f"{metric.precision(class_name):.4f} - recall: {metric.recall(class_name):.4f} - "
                    f"accuracy: {metric.accuracy(class_name):.4f} - f1-score: "
                    f"{metric.f_score(class_name):.4f}"
                )

            result = Result(
                main_score=metric.micro_avg_f_score(),
                log_line=f"{metric.precision():.4f}\t{metric.recall():.4f}\t{metric.micro_avg_f_score():.4f}",
                log_header="PRECISION\tRECALL\tF1",
                detailed_results=detailed_result,
            )

            return result, eval_loss
Пример #20
0
    def train(
        self,
        base_path: Union[Path, str],
        learning_rate: float = 0.1,
        mini_batch_size: int = 32,
        mini_batch_chunk_size: int = None,
        max_epochs: int = 100,
        scheduler=AnnealOnPlateau,
        cycle_momentum: bool = False,
        anneal_factor: float = 0.5,
        patience: int = 3,
        initial_extra_patience=0,
        min_learning_rate: float = 0.0001,
        train_with_dev: bool = False,
        train_with_test: bool = False,
        monitor_train: bool = False,
        monitor_test: bool = False,
        embeddings_storage_mode: str = "cpu",
        checkpoint: bool = False,
        save_final_model: bool = True,
        anneal_with_restarts: bool = False,
        anneal_with_prestarts: bool = False,
        batch_growth_annealing: bool = False,
        shuffle: bool = True,
        param_selection_mode: bool = False,
        write_weights: bool = False,
        num_workers: int = 6,
        sampler=None,
        use_amp: bool = False,
        amp_opt_level: str = "O1",
        eval_on_train_fraction=0.0,
        eval_on_train_shuffle=False,
        save_model_at_each_epoch=False,
        **kwargs,
    ) -> dict:
        """
        Trains any class that implements the flair.nn.Model interface.
        :param base_path: Main path to which all output during training is logged and models are saved
        :param learning_rate: Initial learning rate (or max, if scheduler is OneCycleLR)
        :param mini_batch_size: Size of mini-batches during training
        :param mini_batch_chunk_size: If mini-batches are larger than this number, they get broken down into chunks of this size for processing purposes
        :param max_epochs: Maximum number of epochs to train. Terminates training if this number is surpassed.
        :param scheduler: The learning rate scheduler to use
        :param cycle_momentum: If scheduler is OneCycleLR, whether the scheduler should cycle also the momentum
        :param anneal_factor: The factor by which the learning rate is annealed
        :param patience: Patience is the number of epochs with no improvement the Trainer waits
         until annealing the learning rate
        :param min_learning_rate: If the learning rate falls below this threshold, training terminates
        :param train_with_dev: If True, training is performed using both train+dev data
        :param monitor_train: If True, training data is evaluated at end of each epoch
        :param monitor_test: If True, test data is evaluated at end of each epoch
        :param embeddings_storage_mode: One of 'none' (all embeddings are deleted and freshly recomputed),
        'cpu' (embeddings are stored on CPU) or 'gpu' (embeddings are stored on GPU)
        :param checkpoint: If True, a full checkpoint is saved at end of each epoch
        :param save_final_model: If True, final model is saved
        :param anneal_with_restarts: If True, the last best model is restored when annealing the learning rate
        :param shuffle: If True, data is shuffled during training
        :param param_selection_mode: If True, testing is performed against dev data. Use this mode when doing
        parameter selection.
        :param num_workers: Number of workers in your data loader.
        :param sampler: You can pass a data sampler here for special sampling of data.
        :param eval_on_train_fraction: the fraction of train data to do the evaluation on,
        if 0. the evaluation is not performed on fraction of training data,
        if 'dev' the size is determined from dev set size
        :param eval_on_train_shuffle: if True the train data fraction is determined on the start of training
        and kept fixed during training, otherwise it's sampled at beginning of each epoch
        :param save_model_at_each_epoch: If True, at each epoch the thus far trained model will be saved
        :param kwargs: Other arguments for the Optimizer
        :return:
        """

        if self.use_tensorboard:
            try:
                from torch.utils.tensorboard import SummaryWriter

                writer = SummaryWriter()
            except:
                log_line(log)
                log.warning(
                    "ATTENTION! PyTorch >= 1.1.0 and pillow are required for TensorBoard support!"
                )
                log_line(log)
                self.use_tensorboard = False
                pass

        if use_amp:
            if sys.version_info < (3, 0):
                raise RuntimeError(
                    "Apex currently only supports Python 3. Aborting.")
            if amp is None:
                raise RuntimeError(
                    "Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
                    "to enable mixed-precision training.")

        if mini_batch_chunk_size is None:
            mini_batch_chunk_size = mini_batch_size
        if learning_rate < min_learning_rate:
            min_learning_rate = learning_rate / 10

        initial_learning_rate = learning_rate

        # cast string to Path
        if type(base_path) is str:
            base_path = Path(base_path)

        log_handler = add_file_handler(log, base_path / "training.log")

        log_line(log)
        log.info(f'Model: "{self.model}"')
        log_line(log)
        log.info(f'Corpus: "{self.corpus}"')
        log_line(log)
        log.info("Parameters:")
        log.info(f' - learning_rate: "{learning_rate}"')
        log.info(f' - mini_batch_size: "{mini_batch_size}"')
        log.info(f' - patience: "{patience}"')
        log.info(f' - anneal_factor: "{anneal_factor}"')
        log.info(f' - max_epochs: "{max_epochs}"')
        log.info(f' - shuffle: "{shuffle}"')
        log.info(f' - train_with_dev: "{train_with_dev}"')
        log.info(f' - batch_growth_annealing: "{batch_growth_annealing}"')
        log_line(log)
        log.info(f'Model training base path: "{base_path}"')
        log_line(log)
        log.info(f"Device: {flair.device}")
        log_line(log)
        log.info(f"Embeddings storage mode: {embeddings_storage_mode}")
        if isinstance(self.model, SequenceTagger
                      ) and self.model.weight_dict and self.model.use_crf:
            log_line(log)
            log.warning(
                f'WARNING: Specified class weights will not take effect when using CRF'
            )

        # determine what splits (train, dev, test) to evaluate and log
        log_train = True if monitor_train else False
        log_test = (True if (not param_selection_mode and self.corpus.test
                             and monitor_test) else False)
        log_dev = False if train_with_dev or not self.corpus.dev else True
        log_train_part = (True if (eval_on_train_fraction == "dev"
                                   or eval_on_train_fraction > 0.0) else False)

        if log_train_part:
            train_part_size = (len(
                self.corpus.dev) if eval_on_train_fraction == "dev" else int(
                    len(self.corpus.train) * eval_on_train_fraction))
            assert train_part_size > 0
            if not eval_on_train_shuffle:
                train_part_indices = list(range(train_part_size))
                train_part = torch.utils.data.dataset.Subset(
                    self.corpus.train, train_part_indices)

        # prepare loss logging file and set up header
        loss_txt = init_output_file(base_path, "loss.tsv")

        weight_extractor = WeightExtractor(base_path)

        optimizer: torch.optim.Optimizer = self.optimizer(
            self.model.parameters(), lr=learning_rate, **kwargs)

        if use_amp:
            self.model, optimizer = amp.initialize(self.model,
                                                   optimizer,
                                                   opt_level=amp_opt_level)

        # minimize training loss if training with dev data, else maximize dev score
        anneal_mode = "min" if train_with_dev else "max"

        if scheduler == OneCycleLR:
            dataset_size = len(self.corpus.train)
            if train_with_dev:
                dataset_size += len(self.corpus.dev)
            lr_scheduler = OneCycleLR(
                optimizer,
                max_lr=learning_rate,
                steps_per_epoch=dataset_size // mini_batch_size + 1,
                epochs=max_epochs - self.
                epoch,  # if we load a checkpoint, we have already trained for self.epoch
                pct_start=0.0,
                cycle_momentum=cycle_momentum)
        else:
            lr_scheduler = scheduler(
                optimizer,
                factor=anneal_factor,
                patience=patience,
                initial_extra_patience=initial_extra_patience,
                mode=anneal_mode,
                verbose=True,
            )

        if (isinstance(lr_scheduler, OneCycleLR) and batch_growth_annealing):
            raise ValueError(
                "Batch growth with OneCycle policy is not implemented.")

        train_data = self.corpus.train

        # if training also uses dev/train data, include in training set
        if train_with_dev or train_with_test:

            parts = [self.corpus.train]
            if train_with_dev: parts.append(self.corpus.dev)
            if train_with_test: parts.append(self.corpus.test)

            train_data = ConcatDataset(parts)

        # initialize sampler if provided
        if sampler is not None:
            # init with default values if only class is provided
            if inspect.isclass(sampler):
                sampler = sampler()
            # set dataset to sample from
            sampler.set_dataset(train_data)
            shuffle = False

        dev_score_history = []
        dev_loss_history = []
        train_loss_history = []

        micro_batch_size = mini_batch_chunk_size

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            previous_learning_rate = learning_rate
            momentum = 0
            for group in optimizer.param_groups:
                if "momentum" in group:
                    momentum = group["momentum"]

            for self.epoch in range(self.epoch + 1, max_epochs + 1):
                log_line(log)

                if anneal_with_prestarts:
                    last_epoch_model_state_dict = copy.deepcopy(
                        self.model.state_dict())

                if eval_on_train_shuffle:
                    train_part_indices = list(range(self.corpus.train))
                    random.shuffle(train_part_indices)
                    train_part_indices = train_part_indices[:train_part_size]
                    train_part = torch.utils.data.dataset.Subset(
                        self.corpus.train, train_part_indices)

                # get new learning rate
                for group in optimizer.param_groups:
                    learning_rate = group["lr"]

                if learning_rate != previous_learning_rate and batch_growth_annealing:
                    mini_batch_size *= 2

                # reload last best model if annealing with restarts is enabled
                if ((anneal_with_restarts or anneal_with_prestarts)
                        and learning_rate != previous_learning_rate
                        and (base_path / "best-model.pt").exists()):
                    if anneal_with_restarts:
                        log.info("resetting to best model")
                        self.model.load_state_dict(
                            self.model.load(base_path /
                                            "best-model.pt").state_dict())
                    if anneal_with_prestarts:
                        log.info("resetting to pre-best model")
                        self.model.load_state_dict(
                            self.model.load(base_path /
                                            "pre-best-model.pt").state_dict())

                previous_learning_rate = learning_rate

                # stop training if learning rate becomes too small
                if (not isinstance(lr_scheduler, OneCycleLR)
                    ) and learning_rate < min_learning_rate:
                    log_line(log)
                    log.info("learning rate too small - quitting training!")
                    log_line(log)
                    break

                batch_loader = DataLoader(
                    train_data,
                    batch_size=mini_batch_size,
                    shuffle=shuffle,
                    num_workers=num_workers,
                    sampler=sampler,
                )

                self.model.train()

                train_loss: float = 0

                seen_batches = 0
                total_number_of_batches = len(batch_loader)

                modulo = max(1, int(total_number_of_batches / 10))

                # process mini-batches
                batch_time = 0
                for batch_no, batch in enumerate(batch_loader):
                    start_time = time.time()

                    # zero the gradients on the model and optimizer
                    self.model.zero_grad()
                    optimizer.zero_grad()

                    # if necessary, make batch_steps
                    batch_steps = [batch]
                    if len(batch) > micro_batch_size:
                        batch_steps = [
                            batch[x:x + micro_batch_size]
                            for x in range(0, len(batch), micro_batch_size)
                        ]

                    # forward and backward for batch
                    for batch_step in batch_steps:

                        # forward pass
                        loss = self.model.forward_loss(batch_step)

                        # Backward
                        if use_amp:
                            with amp.scale_loss(loss,
                                                optimizer) as scaled_loss:
                                scaled_loss.backward()
                        else:
                            loss.backward()

                    # do the optimizer step
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   5.0)
                    optimizer.step()

                    # do the scheduler step if one-cycle
                    if isinstance(lr_scheduler, OneCycleLR):
                        lr_scheduler.step()
                        # get new learning rate
                        for group in optimizer.param_groups:
                            learning_rate = group["lr"]
                            if "momentum" in group:
                                momentum = group["momentum"]

                    seen_batches += 1
                    train_loss += loss.item()

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(batch, embeddings_storage_mode)

                    batch_time += time.time() - start_time
                    if seen_batches % modulo == 0:
                        momentum_info = f' - momentum: {momentum:.4f}' if cycle_momentum else ''
                        log.info(
                            f"epoch {self.epoch} - iter {seen_batches}/{total_number_of_batches} - loss "
                            f"{train_loss / seen_batches:.8f} - samples/sec: {mini_batch_size * modulo / batch_time:.2f}"
                            f" - lr: {learning_rate:.6f}{momentum_info}")
                        batch_time = 0
                        iteration = self.epoch * total_number_of_batches + batch_no
                        if not param_selection_mode and write_weights:
                            weight_extractor.extract_weights(
                                self.model.state_dict(), iteration)

                train_loss /= seen_batches

                self.model.eval()

                log_line(log)
                log.info(
                    f"EPOCH {self.epoch} done: loss {train_loss:.4f} - lr {learning_rate:.7f}"
                )

                if self.use_tensorboard:
                    writer.add_scalar("train_loss", train_loss, self.epoch)

                # anneal against train loss if training with dev, otherwise anneal against dev score
                current_score = train_loss

                # evaluate on train / dev / test split depending on training settings
                result_line: str = ""

                if log_train:
                    train_eval_result, train_loss = self.model.evaluate(
                        self.corpus.train,
                        mini_batch_size=mini_batch_chunk_size,
                        num_workers=num_workers,
                        embedding_storage_mode=embeddings_storage_mode,
                    )
                    result_line += f"\t{train_eval_result.log_line}"

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(self.corpus.train,
                                     embeddings_storage_mode)

                if log_train_part:
                    train_part_eval_result, train_part_loss = self.model.evaluate(
                        train_part,
                        mini_batch_size=mini_batch_chunk_size,
                        num_workers=num_workers,
                        embedding_storage_mode=embeddings_storage_mode,
                    )
                    result_line += (
                        f"\t{train_part_loss}\t{train_part_eval_result.log_line}"
                    )
                    log.info(
                        f"TRAIN_SPLIT : loss {train_part_loss} - score {round(train_part_eval_result.main_score, 4)}"
                    )

                if log_dev:
                    dev_eval_result, dev_loss = self.model.evaluate(
                        self.corpus.dev,
                        mini_batch_size=mini_batch_chunk_size,
                        num_workers=num_workers,
                        out_path=base_path / "dev.tsv",
                        embedding_storage_mode=embeddings_storage_mode,
                    )
                    result_line += f"\t{dev_loss}\t{dev_eval_result.log_line}"
                    log.info(
                        f"DEV : loss {dev_loss} - score {round(dev_eval_result.main_score, 4)}"
                    )
                    # calculate scores using dev data if available
                    # append dev score to score history
                    dev_score_history.append(dev_eval_result.main_score)
                    dev_loss_history.append(dev_loss.item())

                    current_score = dev_eval_result.main_score

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(self.corpus.dev, embeddings_storage_mode)

                    if self.use_tensorboard:
                        writer.add_scalar("dev_loss", dev_loss, self.epoch)
                        writer.add_scalar("dev_score",
                                          dev_eval_result.main_score,
                                          self.epoch)

                if log_test:
                    test_eval_result, test_loss = self.model.evaluate(
                        self.corpus.test,
                        mini_batch_size=mini_batch_chunk_size,
                        num_workers=num_workers,
                        out_path=base_path / "test.tsv",
                        embedding_storage_mode=embeddings_storage_mode,
                    )
                    result_line += f"\t{test_loss}\t{test_eval_result.log_line}"
                    log.info(
                        f"TEST : loss {test_loss} - score {round(test_eval_result.main_score, 4)}"
                    )

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(self.corpus.test, embeddings_storage_mode)

                    if self.use_tensorboard:
                        writer.add_scalar("test_loss", test_loss, self.epoch)
                        writer.add_scalar("test_score",
                                          test_eval_result.main_score,
                                          self.epoch)

                # determine learning rate annealing through scheduler. Use auxiliary metric for AnnealOnPlateau
                if log_dev and isinstance(lr_scheduler, AnnealOnPlateau):
                    lr_scheduler.step(current_score, dev_loss)
                elif not isinstance(lr_scheduler, OneCycleLR):
                    lr_scheduler.step(current_score)

                train_loss_history.append(train_loss)

                # determine bad epoch number
                try:
                    bad_epochs = lr_scheduler.num_bad_epochs
                except:
                    bad_epochs = 0
                for group in optimizer.param_groups:
                    new_learning_rate = group["lr"]
                if new_learning_rate != previous_learning_rate:
                    bad_epochs = patience + 1
                    if previous_learning_rate == initial_learning_rate:
                        bad_epochs += initial_extra_patience

                # log bad epochs
                log.info(f"BAD EPOCHS (no improvement): {bad_epochs}")

                # output log file
                with open(loss_txt, "a") as f:

                    # make headers on first epoch
                    if self.epoch == 1:
                        f.write(
                            f"EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS"
                        )

                        if log_train:
                            f.write("\tTRAIN_" + "\tTRAIN_".join(
                                train_eval_result.log_header.split("\t")))
                        if log_train_part:
                            f.write("\tTRAIN_PART_LOSS\tTRAIN_PART_" +
                                    "\tTRAIN_PART_".join(
                                        train_part_eval_result.log_header.
                                        split("\t")))
                        if log_dev:
                            f.write("\tDEV_LOSS\tDEV_" + "\tDEV_".join(
                                dev_eval_result.log_header.split("\t")))
                        if log_test:
                            f.write("\tTEST_LOSS\tTEST_" + "\tTEST_".join(
                                test_eval_result.log_header.split("\t")))

                    f.write(
                        f"\n{self.epoch}\t{datetime.datetime.now():%H:%M:%S}\t{bad_epochs}\t{learning_rate:.4f}\t{train_loss}"
                    )
                    f.write(result_line)

                # if checkpoint is enabled, save model at each epoch
                if checkpoint and not param_selection_mode:
                    self.save_checkpoint(base_path / "checkpoint.pt")

                # if we use dev data, remember best model based on dev evaluation score
                if ((not train_with_dev or anneal_with_restarts
                     or anneal_with_prestarts) and not param_selection_mode
                        and not isinstance(lr_scheduler, OneCycleLR)
                        and current_score == lr_scheduler.best
                        and bad_epochs == 0):
                    print("saving best model")
                    self.model.save(base_path / "best-model.pt")

                    if anneal_with_prestarts:
                        current_state_dict = self.model.state_dict()
                        self.model.load_state_dict(last_epoch_model_state_dict)
                        self.model.save(base_path / "pre-best-model.pt")
                        self.model.load_state_dict(current_state_dict)

                if save_model_at_each_epoch:
                    print("saving model of current epoch")
                    model_name = "model_epoch_" + str(self.epoch) + ".pt"
                    self.model.save(base_path / model_name)

            # if we do not use dev data for model selection, save final model
            if save_final_model and not param_selection_mode:
                self.model.save(base_path / "final-model.pt")

        except KeyboardInterrupt:
            log_line(log)
            log.info("Exiting from training early.")

            if self.use_tensorboard:
                writer.close()

            if not param_selection_mode:
                log.info("Saving model ...")
                self.model.save(base_path / "final-model.pt")
                log.info("Done.")

        # test best model if test data is present
        if self.corpus.test and not train_with_test:
            final_score = self.final_test(base_path, mini_batch_chunk_size,
                                          num_workers)
        else:
            final_score = 0
            log.info("Test data not provided setting final score to 0")

        log.removeHandler(log_handler)

        if self.use_tensorboard:
            writer.close()

        return {
            "test_score": final_score,
            "dev_score_history": dev_score_history,
            "train_loss_history": train_loss_history,
            "dev_loss_history": dev_loss_history,
        }
Пример #21
0
    def predict(
        self,
        sentences: Union[List[Sentence], Sentence, List[str], str],
        mini_batch_size=32,
        embedding_storage_mode="none",
        all_tag_prob: bool = False,
        verbose: bool = False,
        use_tokenizer: Union[bool, Callable[[str], List[Token]]] = space_tokenizer,
    ) -> List[Sentence]:
        """
        Predict sequence tags for Named Entity Recognition task
        :param sentences: a Sentence or a string or a List of Sentence or a List of string.
        :param mini_batch_size: size of the minibatch, usually bigger is more rapid but consume more memory,
        up to a point when it has no more effect.
        :param embedding_storage_mode: 'none' for the minimum memory footprint, 'cpu' to store embeddings in Ram,
        'gpu' to store embeddings in GPU memory.
        :param all_tag_prob: True to compute the score for each tag on each token,
        otherwise only the score of the best tag is returned
        :param verbose: set to True to display a progress bar
        :param use_tokenizer: a custom tokenizer when string are provided (default is space based tokenizer).
        :return: List of Sentence enriched by the predicted tags
        """
        with torch.no_grad():
            if not sentences:
                return sentences

            if isinstance(sentences, Sentence) or isinstance(sentences, str):
                sentences = [sentences]

            if (flair.device.type == "cuda") and embedding_storage_mode == "cpu":
                log.warning(
                    "You are inferring on GPU with parameter 'embedding_storage_mode' set to 'cpu'."
                    "This option will slow down your inference, usually 'none' (default value) "
                    "is a better choice."
                )

            # reverse sort all sequences by their length
            rev_order_len_index = sorted(
                range(len(sentences)), key=lambda k: len(sentences[k]), reverse=True
            )
            original_order_index = sorted(
                range(len(rev_order_len_index)), key=lambda k: rev_order_len_index[k]
            )

            reordered_sentences: List[Union[Sentence, str]] = [
                sentences[index] for index in rev_order_len_index
            ]

            if isinstance(sentences[0], Sentence):
                # remove previous embeddings
                store_embeddings(reordered_sentences, "none")
                dataset = SentenceDataset(reordered_sentences)
            else:
                dataset = StringDataset(
                    reordered_sentences, use_tokenizer=use_tokenizer
                )
            dataloader = DataLoader(
                dataset=dataset, batch_size=mini_batch_size, collate_fn=lambda x: x
            )

            if self.use_crf:
                transitions = self.transitions.detach().cpu().numpy()
            else:
                transitions = None

            # progress bar for verbosity
            if verbose:
                dataloader = tqdm(dataloader)

            results: List[Sentence] = []
            for i, batch in enumerate(dataloader):

                if verbose:
                    dataloader.set_description(f"Inferencing on batch {i}")
                results += batch
                batch = self._filter_empty_sentences(batch)
                # stop if all sentences are empty
                if not batch:
                    continue

                feature: torch.Tensor = self.forward(batch)
                tags, all_tags = self._obtain_labels(
                    feature=feature,
                    batch_sentences=batch,
                    transitions=transitions,
                    get_all_tags=all_tag_prob,
                )

                for (sentence, sent_tags) in zip(batch, tags):
                    for (token, tag) in zip(sentence.tokens, sent_tags):
                        token.add_tag_label(self.tag_type, tag)

                # all_tags will be empty if all_tag_prob is set to False, so the for loop will be avoided
                for (sentence, sent_all_tags) in zip(batch, all_tags):
                    for (token, token_all_tags) in zip(sentence.tokens, sent_all_tags):
                        token.add_tags_proba_dist(self.tag_type, token_all_tags)

                # clearing token embeddings to save memory
                store_embeddings(batch, storage_mode=embedding_storage_mode)

            results: List[Union[Sentence, str]] = [
                results[index] for index in original_order_index
            ]
            assert len(sentences) == len(results)
            return results
Пример #22
0
    def predict(
        self,
        sentences: Union[List[Sentence], Sentence],
        mini_batch_size: int = 32,
        return_probabilities_for_all_classes: bool = False,
        verbose: bool = False,
        label_name: Optional[str] = None,
        return_loss=False,
        embedding_storage_mode="none",
    ):
        """
        Predicts the class labels for the given sentences. The labels are directly added to the sentences.
        :param sentences: list of sentences
        :param mini_batch_size: mini batch size to use
        :param return_probabilities_for_all_classes : return probabilities for all classes instead of only best predicted
        :param verbose: set to True to display a progress bar
        :param return_loss: set to True to return loss
        :param label_name: set this to change the name of the label type that is predicted
        :param embedding_storage_mode: default is 'none' which is always best. Only set to 'cpu' or 'gpu' if
        you wish to not only predict, but also keep the generated embeddings in CPU or GPU memory respectively.
        'gpu' to store embeddings in GPU memory.
        """
        if label_name is None:
            label_name = self.label_type if self.label_type is not None else "label"

        with torch.no_grad():
            if not sentences:
                return sentences

            if isinstance(sentences, DataPoint):
                sentences = [sentences]

            # filter empty sentences
            if isinstance(sentences[0], DataPoint):
                sentences = [
                    sentence for sentence in sentences if len(sentence) > 0
                ]
            if len(sentences) == 0:
                return sentences

            # reverse sort all sequences by their length
            rev_order_len_index = sorted(range(len(sentences)),
                                         key=lambda k: len(sentences[k]),
                                         reverse=True)

            reordered_sentences: List[Union[DataPoint, str]] = [
                sentences[index] for index in rev_order_len_index
            ]

            dataloader = DataLoader(
                dataset=SentenceDataset(reordered_sentences),
                batch_size=mini_batch_size)
            # progress bar for verbosity
            if verbose:
                dataloader = tqdm(dataloader)

            overall_loss = 0
            batch_no = 0
            label_count = 0
            for batch in dataloader:

                batch_no += 1

                if verbose:
                    dataloader.set_description(
                        f"Inferencing on batch {batch_no}")

                # stop if all sentences are empty
                if not batch:
                    continue

                scores, gold_labels, data_points, label_candidates = self.forward_pass(
                    batch, return_label_candidates=True)
                # remove previously predicted labels of this type
                for sentence in data_points:
                    sentence.remove_labels(label_name)

                if return_loss:
                    overall_loss += self._calculate_loss(scores,
                                                         gold_labels)[0]
                    label_count += len(label_candidates)

                # if anything could possibly be predicted
                if len(label_candidates) > 0:
                    if self.multi_label:
                        sigmoided = torch.sigmoid(
                            scores)  # size: (n_sentences, n_classes)
                        n_labels = sigmoided.size(1)
                        for s_idx, (data_point, label_candidate) in enumerate(
                                zip(data_points, label_candidates)):
                            for l_idx in range(n_labels):
                                label_value = self.label_dictionary.get_item_for_index(
                                    l_idx)
                                if label_value == 'O': continue
                                label_threshold = self._get_label_threshold(
                                    label_value)
                                label_score = sigmoided[s_idx, l_idx].item()
                                if label_score > label_threshold or return_probabilities_for_all_classes:
                                    label = label_candidate.spawn(
                                        value=label_value, score=label_score)
                                    data_point.add_complex_label(
                                        label_name, label)
                    else:
                        softmax = torch.nn.functional.softmax(scores, dim=-1)

                        if return_probabilities_for_all_classes:
                            n_labels = softmax.size(1)
                            for s_idx, (data_point,
                                        label_candidate) in enumerate(
                                            zip(data_points,
                                                label_candidates)):
                                for l_idx in range(n_labels):
                                    label_value = self.label_dictionary.get_item_for_index(
                                        l_idx)
                                    if label_value == 'O': continue
                                    label_score = softmax[s_idx, l_idx].item()
                                    label = label_candidate.spawn(
                                        value=label_value, score=label_score)
                                    data_point.add_complex_label(
                                        label_name, label)
                        else:
                            conf, idx = torch.max(softmax, dim=-1)
                            for data_point, label_candidate, c, i in zip(
                                    data_points, label_candidates, conf, idx):
                                label_value = self.label_dictionary.get_item_for_index(
                                    i.item())
                                if label_value == 'O': continue
                                label = label_candidate.spawn(
                                    value=label_value, score=c.item())
                                data_point.add_complex_label(label_name, label)

                store_embeddings(batch, storage_mode=embedding_storage_mode)

            if return_loss:
                return overall_loss, label_count
Пример #23
0
    def predict(
        self,
        sentences: Union[List[Sentence], Sentence],
        mini_batch_size: int = 32,
        return_probabilities_for_all_classes: bool = False,
        verbose: bool = False,
        label_name: Optional[str] = None,
        return_loss=False,
        embedding_storage_mode="none",
    ):
        """
        Predicts labels for current batch with CRF or Softmax.
        :param sentences: List of sentences in batch
        :param mini_batch_size: batch size for test data
        :param return_probabilities_for_all_classes: Whether to return probabilites for all classes
        :param verbose: whether to use progress bar
        :param label_name: which label to predict
        :param return_loss: whether to return loss value
        :param embedding_storage_mode: determines where to store embeddings - can be "gpu", "cpu" or None.
        """
        if label_name is None:
            label_name = self.tag_type

        with torch.no_grad():
            if not sentences:
                return sentences

            # make sure its a list
            if not isinstance(sentences, list) and not isinstance(
                    sentences, flair.data.Dataset):
                sentences = [sentences]

            # filter empty sentences
            sentences = [
                sentence for sentence in sentences if len(sentence) > 0
            ]

            # reverse sort all sequences by their length
            reordered_sentences = sorted(sentences,
                                         key=lambda s: len(s),
                                         reverse=True)

            if len(reordered_sentences) == 0:
                return sentences

            dataloader = DataLoader(
                dataset=FlairDatapointDataset(reordered_sentences),
                batch_size=mini_batch_size,
            )
            # progress bar for verbosity
            if verbose:
                dataloader = tqdm(dataloader, desc="Batch inference")

            overall_loss = torch.zeros(1, device=flair.device)
            batch_no = 0
            label_count = 0
            for batch in dataloader:

                batch_no += 1

                # stop if all sentences are empty
                if not batch:
                    continue

                # get features from forward propagation
                features, gold_labels = self.forward(batch)

                # remove previously predicted labels of this type
                for sentence in batch:
                    sentence.remove_labels(label_name)

                # if return_loss, get loss value
                if return_loss:
                    loss = self._calculate_loss(features, gold_labels)
                    overall_loss += loss[0]
                    label_count += loss[1]

                # Sort batch in same way as forward propagation
                lengths = torch.LongTensor(
                    [len(sentence) for sentence in batch])
                _, sort_indices = lengths.sort(dim=0, descending=True)
                batch = [batch[i] for i in sort_indices]

                # make predictions
                if self.use_crf:
                    predictions, all_tags = self.viterbi_decoder.decode(
                        features, return_probabilities_for_all_classes)
                else:
                    predictions, all_tags = self._standard_inference(
                        features, batch, return_probabilities_for_all_classes)

                # add predictions to Sentence
                for sentence, sentence_predictions in zip(batch, predictions):

                    # BIOES-labels need to be converted to spans
                    if self.predict_spans:
                        sentence_tags = [
                            label[0] for label in sentence_predictions
                        ]
                        sentence_scores = [
                            label[1] for label in sentence_predictions
                        ]
                        predicted_spans = get_spans_from_bio(
                            sentence_tags, sentence_scores)
                        for predicted_span in predicted_spans:
                            span: Span = sentence[
                                predicted_span[0][0]:predicted_span[0][-1] + 1]
                            span.add_label(label_name,
                                           value=predicted_span[2],
                                           score=predicted_span[1])

                    # token-labels can be added directly
                    else:
                        for token, label in zip(sentence.tokens,
                                                sentence_predictions):
                            token.add_label(typename=label_name,
                                            value=label[0],
                                            score=label[1])

                # all_tags will be empty if all_tag_prob is set to False, so the for loop will be avoided
                for (sentence, sent_all_tags) in zip(batch, all_tags):
                    for (token, token_all_tags) in zip(sentence.tokens,
                                                       sent_all_tags):
                        token.add_tags_proba_dist(label_name, token_all_tags)

                store_embeddings(sentences,
                                 storage_mode=embedding_storage_mode)

            if return_loss:
                return overall_loss, label_count
Пример #24
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embeddings_storage_mode: str = "cpu",
    ) -> (Result, float):

        with torch.no_grad():
            eval_loss = 0

            metric = Metric("Evaluation")

            lines: List[str] = []
            batch_count: int = 0
            for batch in data_loader:

                batch_count += 1

                labels, loss = self.forward_labels_and_loss(batch)

                eval_loss += loss

                sentences_for_batch = [
                    sent.to_plain_string() for sent in batch
                ]
                confidences_for_batch = [[
                    label.score for label in sent_labels
                ] for sent_labels in labels]
                predictions_for_batch = [[
                    label.value for label in sent_labels
                ] for sent_labels in labels]
                true_values_for_batch = [
                    sentence.get_label_names() for sentence in batch
                ]
                available_labels = self.label_dictionary.get_items()

                for sentence, confidence, prediction, true_value in zip(
                        sentences_for_batch,
                        confidences_for_batch,
                        predictions_for_batch,
                        true_values_for_batch,
                ):
                    eval_line = "{}\t{}\t{}\t{}\n".format(
                        sentence, true_value, prediction, confidence)
                    lines.append(eval_line)

                for predictions_for_sentence, true_values_for_sentence in zip(
                        predictions_for_batch, true_values_for_batch):

                    for label in available_labels:
                        if (label in predictions_for_sentence
                                and label in true_values_for_sentence):
                            metric.add_tp(label)
                        elif (label in predictions_for_sentence
                              and label not in true_values_for_sentence):
                            metric.add_fp(label)
                        elif (label not in predictions_for_sentence
                              and label in true_values_for_sentence):
                            metric.add_fn(label)
                        elif (label not in predictions_for_sentence
                              and label not in true_values_for_sentence):
                            metric.add_tn(label)

                store_embeddings(batch, embeddings_storage_mode)

            eval_loss /= batch_count

            detailed_result = (
                f"\nMICRO_AVG: acc {metric.micro_avg_accuracy()} - f1-score {metric.micro_avg_f_score()}"
                f"\nMACRO_AVG: acc {metric.macro_avg_accuracy()} - f1-score {metric.macro_avg_f_score()}"
            )
            for class_name in metric.get_classes():
                detailed_result += (
                    f"\n{class_name:<10} tp: {metric.get_tp(class_name)} - fp: {metric.get_fp(class_name)} - "
                    f"fn: {metric.get_fn(class_name)} - tn: {metric.get_tn(class_name)} - precision: "
                    f"{metric.precision(class_name):.4f} - recall: {metric.recall(class_name):.4f} - "
                    f"accuracy: {metric.accuracy(class_name):.4f} - f1-score: "
                    f"{metric.f_score(class_name):.4f}")

            result = Result(
                main_score=metric.micro_avg_f_score(),
                log_line=
                f"{metric.precision()}\t{metric.recall()}\t{metric.micro_avg_f_score()}",
                log_header="PRECISION\tRECALL\tF1",
                detailed_results=detailed_result,
            )

            if out_path is not None:
                with open(out_path, "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            return result, eval_loss
Пример #25
0
    def evaluate(
        self,
        data_points: Union[List[DataPoint], Dataset],
        gold_label_type: str,
        out_path: Union[str, Path] = None,
        embedding_storage_mode: str = "none",
        mini_batch_size: int = 32,
        num_workers: int = 8,
        main_evaluation_metric: Tuple[str, str] = ("micro avg", "f1-score"),
        exclude_labels: List[str] = [],
        gold_label_dictionary: Optional[Dictionary] = None,
    ) -> Result:
        import numpy as np
        import sklearn

        # read Dataset into data loader (if list of sentences passed, make Dataset first)
        if not isinstance(data_points, Dataset):
            data_points = SentenceDataset(data_points)
        data_loader = DataLoader(data_points,
                                 batch_size=mini_batch_size,
                                 num_workers=num_workers)

        with torch.no_grad():

            # loss calculation
            eval_loss = 0
            average_over = 0

            # variables for printing
            lines: List[str] = []

            # variables for computing scores
            all_spans: List[str] = []
            all_true_values = {}
            all_predicted_values = {}

            sentence_id = 0
            for batch in data_loader:

                # remove any previously predicted labels
                for datapoint in batch:
                    datapoint.remove_labels('predicted')

                # predict for batch
                loss_and_count = self.predict(
                    batch,
                    embedding_storage_mode=embedding_storage_mode,
                    mini_batch_size=mini_batch_size,
                    label_name='predicted',
                    return_loss=True)

                if isinstance(loss_and_count, Tuple):
                    average_over += loss_and_count[1]
                    eval_loss += loss_and_count[0]
                else:
                    eval_loss += loss_and_count

                # get the gold labels
                for datapoint in batch:

                    for gold_label in datapoint.get_labels(gold_label_type):
                        representation = str(
                            sentence_id) + ': ' + gold_label.identifier

                        value = gold_label.value
                        if gold_label_dictionary and gold_label_dictionary.get_idx_for_item(
                                value) == 0:
                            value = '<unk>'

                        if representation not in all_true_values:
                            all_true_values[representation] = [value]
                        else:
                            all_true_values[representation].append(value)

                        if representation not in all_spans:
                            all_spans.append(representation)

                    for predicted_span in datapoint.get_labels("predicted"):
                        representation = str(
                            sentence_id) + ': ' + predicted_span.identifier

                        # add to all_predicted_values
                        if representation not in all_predicted_values:
                            all_predicted_values[representation] = [
                                predicted_span.value
                            ]
                        else:
                            all_predicted_values[representation].append(
                                predicted_span.value)

                        if representation not in all_spans:
                            all_spans.append(representation)

                    sentence_id += 1

                store_embeddings(batch, embedding_storage_mode)

                # make printout lines
                if out_path:
                    lines.extend(
                        self._print_predictions(batch, gold_label_type))

            # write all_predicted_values to out_file if set
            if out_path:
                with open(Path(out_path), "w", encoding="utf-8") as outfile:
                    outfile.write("".join(lines))

            # make the evaluation dictionary
            evaluation_label_dictionary = Dictionary(add_unk=False)
            evaluation_label_dictionary.add_item("O")
            for true_values in all_true_values.values():
                for label in true_values:
                    evaluation_label_dictionary.add_item(label)
            for predicted_values in all_predicted_values.values():
                for label in predicted_values:
                    evaluation_label_dictionary.add_item(label)

            # finally, compute numbers
            y_true = []
            y_pred = []

            for span in all_spans:

                true_values = all_true_values[
                    span] if span in all_true_values else ['O']
                predicted_values = all_predicted_values[
                    span] if span in all_predicted_values else ['O']

                y_true_instance = np.zeros(len(evaluation_label_dictionary),
                                           dtype=int)
                for true_value in true_values:
                    y_true_instance[evaluation_label_dictionary.
                                    get_idx_for_item(true_value)] = 1
                y_true.append(y_true_instance.tolist())

                y_pred_instance = np.zeros(len(evaluation_label_dictionary),
                                           dtype=int)
                for predicted_value in predicted_values:
                    y_pred_instance[evaluation_label_dictionary.
                                    get_idx_for_item(predicted_value)] = 1
                y_pred.append(y_pred_instance.tolist())

        # now, calculate evaluation numbers
        target_names = []
        labels = []

        counter = Counter()
        counter.update(
            list(itertools.chain.from_iterable(all_true_values.values())))
        counter.update(
            list(itertools.chain.from_iterable(all_predicted_values.values())))

        for label_name, count in counter.most_common():
            if label_name == 'O': continue
            if label_name in exclude_labels: continue
            target_names.append(label_name)
            labels.append(
                evaluation_label_dictionary.get_idx_for_item(label_name))

        # there is at least one gold label or one prediction (default)
        if len(all_true_values) + len(all_predicted_values) > 1:
            classification_report = sklearn.metrics.classification_report(
                y_true,
                y_pred,
                digits=4,
                target_names=target_names,
                zero_division=0,
                labels=labels,
            )

            classification_report_dict = sklearn.metrics.classification_report(
                y_true,
                y_pred,
                target_names=target_names,
                zero_division=0,
                output_dict=True,
                labels=labels,
            )

            accuracy_score = round(
                sklearn.metrics.accuracy_score(y_true, y_pred), 4)

            precision_score = round(
                classification_report_dict["micro avg"]["precision"], 4)
            recall_score = round(
                classification_report_dict["micro avg"]["recall"], 4)
            micro_f_score = round(
                classification_report_dict["micro avg"]["f1-score"], 4)
            macro_f_score = round(
                classification_report_dict["macro avg"]["f1-score"], 4)

            main_score = classification_report_dict[main_evaluation_metric[0]][
                main_evaluation_metric[1]]

        else:
            # issue error and default all evaluation numbers to 0.
            log.error(
                "ACHTUNG! No gold labels and no all_predicted_values found! Could be an error in your corpus or how you "
                "initialize the trainer!")
            accuracy_score = precision_score = recall_score = micro_f_score = macro_f_score = main_score = 0.
            classification_report = ""
            classification_report_dict = {}

        detailed_result = ("\nResults:"
                           f"\n- F-score (micro) {micro_f_score}"
                           f"\n- F-score (macro) {macro_f_score}"
                           f"\n- Accuracy {accuracy_score}"
                           "\n\nBy class:\n" + classification_report)

        # line for log file
        log_header = "PRECISION\tRECALL\tF1\tACCURACY"
        log_line = f"{precision_score}\t" f"{recall_score}\t" f"{micro_f_score}\t" f"{accuracy_score}"

        if average_over > 0:
            eval_loss /= average_over

        result = Result(main_score=main_score,
                        log_line=log_line,
                        log_header=log_header,
                        detailed_results=detailed_result,
                        classification_report=classification_report_dict,
                        loss=eval_loss)

        return result
Пример #26
0
    def predict(
        self,
        sentences: Union[List[Sentence], Sentence, List[str], str],
        mini_batch_size: int = 32,
        embedding_storage_mode="none",
        multi_class_prob: bool = False,
        verbose: bool = False,
        use_tokenizer: Union[bool, Callable[[str], List[Token]]] = space_tokenizer,
    ) -> List[Sentence]:
        """
        Predicts the class labels for the given sentences. The labels are directly added to the sentences.
        :param sentences: list of sentences
        :param mini_batch_size: mini batch size to use
        :param embedding_storage_mode: 'none' for the minimum memory footprint, 'cpu' to store embeddings in Ram,
        'gpu' to store embeddings in GPU memory.
        :param multi_class_prob : return probability for all class for multiclass
        :param verbose: set to True to display a progress bar
        :param use_tokenizer: a custom tokenizer when string are provided (default is space based tokenizer).
        :return: the list of sentences containing the labels
        """
        with torch.no_grad():
            if not sentences:
                return sentences

            if isinstance(sentences, Sentence) or isinstance(sentences, str):
                sentences = [sentences]

            if (flair.device.type == "cuda") and embedding_storage_mode == "cpu":
                log.warning(
                    "You are inferring on GPU with parameter 'embedding_storage_mode' set to 'cpu'."
                    "This option will slow down your inference, usually 'none' (default value) "
                    "is a better choice."
                )

            # reverse sort all sequences by their length
            rev_order_len_index = sorted(
                range(len(sentences)), key=lambda k: len(sentences[k]), reverse=True
            )
            original_order_index = sorted(
                range(len(rev_order_len_index)), key=lambda k: rev_order_len_index[k]
            )

            reordered_sentences: List[Union[Sentence, str]] = [
                sentences[index] for index in rev_order_len_index
            ]

            if isinstance(sentences[0], Sentence):
                # remove previous embeddings
                store_embeddings(reordered_sentences, "none")
                dataset = SentenceDataset(reordered_sentences)
            else:
                dataset = StringDataset(
                    reordered_sentences, use_tokenizer=use_tokenizer
                )
            dataloader = DataLoader(
                dataset=dataset, batch_size=mini_batch_size, collate_fn=lambda x: x
            )

            # progress bar for verbosity
            if verbose:
                dataloader = tqdm(dataloader)

            results: List[Sentence] = []
            for i, batch in enumerate(dataloader):
                if verbose:
                    dataloader.set_description(f"Inferencing on batch {i}")
                results += batch
                batch = self._filter_empty_sentences(batch)
                # stop if all sentences are empty
                if not batch:
                    continue

                scores = self.forward(batch)
                predicted_labels = self._obtain_labels(
                    scores, predict_prob=multi_class_prob
                )

                for (sentence, labels) in zip(batch, predicted_labels):
                    sentence.labels = labels

                # clearing token embeddings to save memory
                store_embeddings(batch, storage_mode=embedding_storage_mode)

            results: List[Union[Sentence, str]] = [
                results[index] for index in original_order_index
            ]
            assert len(sentences) == len(results)
            return results
Пример #27
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embeddings_storage_mode="none",
    ) -> (Result, float):
        # assumes that for each data pair there's at least one embedding per modality

        with torch.no_grad():
            # pre-compute embeddings for all targets in evaluation dataset
            target_index = {}
            all_target_embeddings = []
            for data_points in data_loader:
                target_inputs = []
                for data_point in data_points:
                    if str(data_point.second) not in target_index:
                        target_index[str(data_point.second)] = len(target_index)
                        target_inputs.append(data_point)
                if target_inputs:
                    all_target_embeddings.append(
                        self._embed_target(target_inputs).to(self.eval_device)
                    )
                store_embeddings(data_points, embeddings_storage_mode)
            all_target_embeddings = torch.cat(all_target_embeddings, dim=0)  # [n0, d0]
            assert len(target_index) == all_target_embeddings.shape[0]

            ranks = []
            for data_points in data_loader:
                batch_embeddings = self._embed_source(data_points)

                batch_source_embeddings = batch_embeddings.to(self.eval_device)
                # compute the similarity
                batch_similarity_matrix = self.similarity_measure.forward(
                    [batch_source_embeddings, all_target_embeddings]
                )

                # sort the similarity matrix across modality 1
                batch_modality_1_argsort = torch.argsort(
                    batch_similarity_matrix, descending=True, dim=1
                )

                # get the ranks, so +1 to start counting ranks from 1
                batch_modality_1_ranks = (
                    torch.argsort(batch_modality_1_argsort, dim=1) + 1
                )

                batch_target_indices = [
                    target_index[str(data_point.second)] for data_point in data_points
                ]

                batch_gt_ranks = batch_modality_1_ranks[
                    torch.arange(batch_similarity_matrix.shape[0]),
                    torch.tensor(batch_target_indices),
                ]
                ranks.extend(batch_gt_ranks.tolist())

                store_embeddings(data_points, embeddings_storage_mode)

        ranks = np.array(ranks)
        median_rank = np.median(ranks)
        recall_at = {k: np.mean(ranks <= k) for k in self.recall_at_points}

        results_header = ["Median rank"] + [
            "Recall@top" + str(r) for r in self.recall_at_points
        ]
        results_header_str = "\t".join(results_header)
        epoch_results = [str(median_rank)] + [
            str(recall_at[k]) for k in self.recall_at_points
        ]
        epoch_results_str = "\t".join(epoch_results)
        detailed_results = ", ".join(
            [f"{h}={v}" for h, v in zip(results_header, epoch_results)]
        )

        validated_measure = sum(
            [
                recall_at[r] * w
                for r, w in zip(self.recall_at_points, self.recall_at_points_weights)
            ]
        )

        return (
            Result(
                validated_measure,
                results_header_str,
                epoch_results_str,
                detailed_results,
            ),
            0,
        )
Пример #28
0
    def evaluate(
        self,
        data_loader: DataLoader,
        out_path: Path = None,
        embedding_storage_mode="none",
    ) -> (Result, float):
        ranks_min = list()
        ranks_max = list()

        with torch.no_grad():
            targets = list()
            targets_y = list()
            target_sentences = list()
            for data_points in data_loader:
                targets.extend(
                    [
                        tensor
                        for tensor in self._embed_target(data_points).to(
                            self.eval_device
                        )
                    ]
                )
                targets_y.extend([sentence.second.person for sentence in data_points])
                target_sentences.extend(
                    [str(sentence.second) for sentence in data_points]
                )
                store_embeddings(data_points, embedding_storage_mode)

            for data_points in data_loader:
                sources = self._embed_source(data_points).to(self.eval_device)
                sources_y = [sentence.first.person for sentence in data_points]
                source_sentences = [str(sentence.first) for sentence in data_points]

                scores = list()
                agreement = list()
                for source, source_y, source_sentence in zip(
                    sources, sources_y, source_sentences
                ):
                    for target, target_y, target_sentence in zip(
                        targets, targets_y, target_sentences
                    ):
                        if source_sentence != target_sentence:
                            score = self.similarity_measure(source, target).item()
                            scores.append(score)
                            agreement.append(source_y == target_y)

                df = pd.DataFrame({"scores": scores, "agreement": agreement})
                df = df.sort_values("scores", ascending=False).reset_index(drop=True)
                df = df[df["agreement"] == True]
                ranks_min.append(1 - min(df.index) / len(agreement))
                ranks_max.append(1 - max(df.index) / len(agreement))

        results_header_str = "\t".join(
            ["Median max rank", "Median min rank", "Best", "Worst"]
        )
        epoch_results_str = "\t".join(
            [
                str(np.median(ranks_max)),
                str(np.median(ranks_min)),
                str(max(ranks_min)),
                str(min(ranks_max)),
            ]
        )
        return (
            Result(np.median(ranks_max), results_header_str, epoch_results_str, "",),
            0,
        )
Пример #29
0
    def train(
        self,
        base_path: Union[Path, str],
        learning_rate: float = 0.1,
        mini_batch_size: int = 32,
        eval_mini_batch_size: int = None,
        max_epochs: int = 100,
        anneal_factor: float = 0.5,
        patience: int = 3,
        min_learning_rate: float = 0.0001,
        train_with_dev: bool = False,
        monitor_train: bool = False,
        monitor_test: bool = False,
        embeddings_storage_mode: str = "cpu",
        checkpoint: bool = False,
        save_final_model: bool = True,
        anneal_with_restarts: bool = False,
        shuffle: bool = True,
        param_selection_mode: bool = False,
        num_workers: int = 6,
        sampler=None,
        **kwargs,
    ) -> dict:
        """
        Trains any class that implements the flair.nn.Model interface.
        :param base_path: Main path to which all output during training is logged and models are saved
        :param learning_rate: Initial learning rate
        :param mini_batch_size: Size of mini-batches during training
        :param eval_mini_batch_size: Size of mini-batches during evaluation
        :param max_epochs: Maximum number of epochs to train. Terminates training if this number is surpassed.
        :param anneal_factor: The factor by which the learning rate is annealed
        :param patience: Patience is the number of epochs with no improvement the Trainer waits
         until annealing the learning rate
        :param min_learning_rate: If the learning rate falls below this threshold, training terminates
        :param train_with_dev: If True, training is performed using both train+dev data
        :param monitor_train: If True, training data is evaluated at end of each epoch
        :param monitor_test: If True, test data is evaluated at end of each epoch
        :param embeddings_storage_mode: One of 'none' (all embeddings are deleted and freshly recomputed),
        'cpu' (embeddings are stored on CPU) or 'gpu' (embeddings are stored on GPU)
        :param checkpoint: If True, a full checkpoint is saved at end of each epoch
        :param save_final_model: If True, final model is saved
        :param anneal_with_restarts: If True, the last best model is restored when annealing the learning rate
        :param shuffle: If True, data is shuffled during training
        :param param_selection_mode: If True, testing is performed against dev data. Use this mode when doing
        parameter selection.
        :param num_workers: Number of workers in your data loader.
        :param sampler: You can pass a data sampler here for special sampling of data.
        :param kwargs: Other arguments for the Optimizer
        :return:
        """

        if self.use_tensorboard:
            try:
                from torch.utils.tensorboard import SummaryWriter

                writer = SummaryWriter()
            except:
                log_line(log)
                log.warning(
                    "ATTENTION! PyTorch >= 1.1.0 and pillow are required for TensorBoard support!"
                )
                log_line(log)
                self.use_tensorboard = False
                pass

        if eval_mini_batch_size is None:
            eval_mini_batch_size = mini_batch_size

        # cast string to Path
        if type(base_path) is str:
            base_path = Path(base_path)

        log_handler = add_file_handler(log, base_path / "training.log")

        log_line(log)
        log.info(f'Model: "{self.model}"')
        log_line(log)
        log.info(f'Corpus: "{self.corpus}"')
        log_line(log)
        log.info("Parameters:")
        log.info(f' - learning_rate: "{learning_rate}"')
        log.info(f' - mini_batch_size: "{mini_batch_size}"')
        log.info(f' - patience: "{patience}"')
        log.info(f' - anneal_factor: "{anneal_factor}"')
        log.info(f' - max_epochs: "{max_epochs}"')
        log.info(f' - shuffle: "{shuffle}"')
        log.info(f' - train_with_dev: "{train_with_dev}"')
        log_line(log)
        log.info(f'Model training base path: "{base_path}"')
        log_line(log)
        log.info(f"Device: {flair.device}")
        log_line(log)
        log.info(f"Embeddings storage mode: {embeddings_storage_mode}")

        # determine what splits (train, dev, test) to evaluate and log
        log_train = True if monitor_train else False
        log_test = (True if (not param_selection_mode and self.corpus.test
                             and monitor_test) else False)
        log_dev = True if not train_with_dev else False

        # prepare loss logging file and set up header
        loss_txt = init_output_file(base_path, "loss.tsv")

        weight_extractor = WeightExtractor(base_path)

        optimizer: torch.optim.Optimizer = self.optimizer(
            self.model.parameters(), lr=learning_rate, **kwargs)
        if self.optimizer_state is not None:
            optimizer.load_state_dict(self.optimizer_state)

        # minimize training loss if training with dev data, else maximize dev score
        anneal_mode = "min" if train_with_dev else "max"

        scheduler: ReduceLROnPlateau = ReduceLROnPlateau(
            optimizer,
            factor=anneal_factor,
            patience=patience,
            mode=anneal_mode,
            verbose=True,
        )

        if self.scheduler_state is not None:
            scheduler.load_state_dict(self.scheduler_state)

        train_data = self.corpus.train

        # if training also uses dev data, include in training set
        if train_with_dev:
            train_data = ConcatDataset([self.corpus.train, self.corpus.dev])

        if sampler is not None:
            sampler = sampler(train_data)
            shuffle = False

        dev_score_history = []
        dev_loss_history = []
        train_loss_history = []

        # At any point you can hit Ctrl + C to break out of training early.
        try:
            previous_learning_rate = learning_rate

            for epoch in range(0 + self.epoch, max_epochs + self.epoch):
                log_line(log)

                # get new learning rate
                for group in optimizer.param_groups:
                    learning_rate = group["lr"]

                # reload last best model if annealing with restarts is enabled
                if (learning_rate != previous_learning_rate
                        and anneal_with_restarts
                        and (base_path / "best-model.pt").exists()):
                    log.info("resetting to best model")
                    self.model.load(base_path / "best-model.pt")

                previous_learning_rate = learning_rate

                # stop training if learning rate becomes too small
                if learning_rate < min_learning_rate:
                    log_line(log)
                    log.info("learning rate too small - quitting training!")
                    log_line(log)
                    break

                batch_loader = DataLoader(
                    train_data,
                    batch_size=mini_batch_size,
                    shuffle=shuffle,
                    num_workers=num_workers,
                    sampler=sampler,
                )

                self.model.train()

                train_loss: float = 0

                seen_batches = 0
                total_number_of_batches = len(batch_loader)

                modulo = max(1, int(total_number_of_batches / 10))

                # process mini-batches
                for batch_no, batch in enumerate(batch_loader):

                    loss = self.model.forward_loss(batch)

                    optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                   5.0)
                    optimizer.step()

                    seen_batches += 1
                    train_loss += loss.item()

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(batch, embeddings_storage_mode)

                    if batch_no % modulo == 0:
                        log.info(
                            f"epoch {epoch + 1} - iter {batch_no}/{total_number_of_batches} - loss "
                            f"{train_loss / seen_batches:.8f}")
                        iteration = epoch * total_number_of_batches + batch_no
                        if not param_selection_mode:
                            weight_extractor.extract_weights(
                                self.model.state_dict(), iteration)

                train_loss /= seen_batches

                self.model.eval()

                log_line(log)
                log.info(
                    f"EPOCH {epoch + 1} done: loss {train_loss:.4f} - lr {learning_rate:.4f}"
                )

                if self.use_tensorboard:
                    writer.add_scalar("train_loss", train_loss, epoch + 1)

                # anneal against train loss if training with dev, otherwise anneal against dev score
                current_score = train_loss

                # evaluate on train / dev / test split depending on training settings
                result_line: str = ""

                if log_train:
                    train_eval_result, train_loss = self.model.evaluate(
                        DataLoader(
                            self.corpus.train,
                            batch_size=eval_mini_batch_size,
                            num_workers=num_workers,
                        ),
                        embeddings_storage_mode=embeddings_storage_mode,
                    )
                    result_line += f"\t{train_eval_result.log_line}"

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(self.corpus.train,
                                     embeddings_storage_mode)

                if log_dev:
                    dev_eval_result, dev_loss = self.model.evaluate(
                        DataLoader(
                            self.corpus.dev,
                            batch_size=eval_mini_batch_size,
                            num_workers=num_workers,
                        ),
                        embeddings_storage_mode=embeddings_storage_mode,
                    )
                    result_line += f"\t{dev_loss}\t{dev_eval_result.log_line}"
                    log.info(
                        f"DEV : loss {dev_loss} - score {dev_eval_result.main_score}"
                    )
                    # calculate scores using dev data if available
                    # append dev score to score history
                    dev_score_history.append(dev_eval_result.main_score)
                    dev_loss_history.append(dev_loss)

                    current_score = dev_eval_result.main_score

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(self.corpus.dev, embeddings_storage_mode)

                    if self.use_tensorboard:
                        writer.add_scalar("dev_loss", dev_loss, epoch + 1)
                        writer.add_scalar("dev_score",
                                          dev_eval_result.main_score,
                                          epoch + 1)

                if log_test:
                    test_eval_result, test_loss = self.model.evaluate(
                        DataLoader(
                            self.corpus.test,
                            batch_size=eval_mini_batch_size,
                            num_workers=num_workers,
                        ),
                        base_path / "test.tsv",
                        embeddings_storage_mode=embeddings_storage_mode,
                    )
                    result_line += f"\t{test_loss}\t{test_eval_result.log_line}"
                    log.info(
                        f"TEST : loss {test_loss} - score {test_eval_result.main_score}"
                    )

                    # depending on memory mode, embeddings are moved to CPU, GPU or deleted
                    store_embeddings(self.corpus.test, embeddings_storage_mode)

                    if self.use_tensorboard:
                        writer.add_scalar("test_loss", test_loss, epoch + 1)
                        writer.add_scalar("test_score",
                                          test_eval_result.main_score,
                                          epoch + 1)

                # determine learning rate annealing through scheduler
                scheduler.step(current_score)

                train_loss_history.append(train_loss)

                # determine bad epoch number
                try:
                    bad_epochs = scheduler.num_bad_epochs
                except:
                    bad_epochs = 0
                for group in optimizer.param_groups:
                    new_learning_rate = group["lr"]
                if new_learning_rate != previous_learning_rate:
                    bad_epochs = patience + 1

                # log bad epochs
                log.info(f"BAD EPOCHS (no improvement): {bad_epochs}")

                # output log file
                with open(loss_txt, "a") as f:

                    # make headers on first epoch
                    if epoch == 0:
                        f.write(
                            f"EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS"
                        )

                        if log_train:
                            f.write("\tTRAIN_" + "\tTRAIN_".join(
                                train_eval_result.log_header.split("\t")))
                        if log_dev:
                            f.write("\tDEV_LOSS\tDEV_" + "\tDEV_".join(
                                dev_eval_result.log_header.split("\t")))
                        if log_test:
                            f.write("\tTEST_LOSS\tTEST_" + "\tTEST_".join(
                                test_eval_result.log_header.split("\t")))

                    f.write(
                        f"\n{epoch}\t{datetime.datetime.now():%H:%M:%S}\t{bad_epochs}\t{learning_rate:.4f}\t{train_loss}"
                    )
                    f.write(result_line)

                # if checkpoint is enable, save model at each epoch
                if checkpoint and not param_selection_mode:
                    self.model.save_checkpoint(
                        base_path / "checkpoint.pt",
                        optimizer.state_dict(),
                        scheduler.state_dict(),
                        epoch + 1,
                        train_loss,
                    )

                # if we use dev data, remember best model based on dev evaluation score
                if (not train_with_dev and not param_selection_mode
                        and current_score == scheduler.best):
                    self.model.save(base_path / "best-model.pt")

            # if we do not use dev data for model selection, save final model
            if save_final_model and not param_selection_mode:
                self.model.save(base_path / "final-model.pt")

        except KeyboardInterrupt:
            log_line(log)
            log.info("Exiting from training early.")

            if self.use_tensorboard:
                writer.close()

            if not param_selection_mode:
                log.info("Saving model ...")
                self.model.save(base_path / "final-model.pt")
                log.info("Done.")

        # test best model if test data is present
        if self.corpus.test:
            final_score = self.final_test(base_path, eval_mini_batch_size,
                                          num_workers)
        else:
            final_score = 0
            log.info("Test data not provided setting final score to 0")

        log.removeHandler(log_handler)

        if self.use_tensorboard:
            writer.close()

        return {
            "test_score": final_score,
            "dev_score_history": dev_score_history,
            "train_loss_history": train_loss_history,
            "dev_loss_history": dev_loss_history,
        }
Пример #30
0
 def train(self,
           base_path: Union[(Path, str)],
           learning_rate: float = 0.1,
           mini_batch_size: int = 32,
           eval_mini_batch_size: int = None,
           max_epochs: int = 100,
           anneal_factor: float = 0.5,
           patience: int = 3,
           min_learning_rate: float = 0.0001,
           train_with_dev: bool = False,
           monitor_train: bool = False,
           monitor_test: bool = False,
           embeddings_storage_mode: str = 'cpu',
           checkpoint: bool = False,
           save_final_model: bool = True,
           anneal_with_restarts: bool = False,
           shuffle: bool = True,
           param_selection_mode: bool = False,
           num_workers: int = 6,
           sampler=None,
           use_amp: bool = False,
           amp_opt_level: str = 'O1',
           **kwargs) -> dict:
     "\n        Trains any class that implements the flair.nn.Model interface.\n        :param base_path: Main path to which all output during training is logged and models are saved\n        :param learning_rate: Initial learning rate\n        :param mini_batch_size: Size of mini-batches during training\n        :param eval_mini_batch_size: Size of mini-batches during evaluation\n        :param max_epochs: Maximum number of epochs to train. Terminates training if this number is surpassed.\n        :param anneal_factor: The factor by which the learning rate is annealed\n        :param patience: Patience is the number of epochs with no improvement the Trainer waits\n         until annealing the learning rate\n        :param min_learning_rate: If the learning rate falls below this threshold, training terminates\n        :param train_with_dev: If True, training is performed using both train+dev data\n        :param monitor_train: If True, training data is evaluated at end of each epoch\n        :param monitor_test: If True, test data is evaluated at end of each epoch\n        :param embeddings_storage_mode: One of 'none' (all embeddings are deleted and freshly recomputed),\n        'cpu' (embeddings are stored on CPU) or 'gpu' (embeddings are stored on GPU)\n        :param checkpoint: If True, a full checkpoint is saved at end of each epoch\n        :param save_final_model: If True, final model is saved\n        :param anneal_with_restarts: If True, the last best model is restored when annealing the learning rate\n        :param shuffle: If True, data is shuffled during training\n        :param param_selection_mode: If True, testing is performed against dev data. Use this mode when doing\n        parameter selection.\n        :param num_workers: Number of workers in your data loader.\n        :param sampler: You can pass a data sampler here for special sampling of data.\n        :param kwargs: Other arguments for the Optimizer\n        :return:\n        "
     if self.use_tensorboard:
         try:
             from torch.utils.tensorboard import SummaryWriter
             writer = SummaryWriter()
         except:
             log_line(log)
             log.warning(
                 'ATTENTION! PyTorch >= 1.1.0 and pillow are required for TensorBoard support!'
             )
             log_line(log)
             self.use_tensorboard = False
             pass
     if use_amp:
         if (sys.version_info < (3, 0)):
             raise RuntimeError(
                 'Apex currently only supports Python 3. Aborting.')
         if (amp is None):
             raise RuntimeError(
                 'Failed to import apex. Please install apex from https://www.github.com/nvidia/apex to enable mixed-precision training.'
             )
     if (eval_mini_batch_size is None):
         eval_mini_batch_size = mini_batch_size
     if (type(base_path) is str):
         base_path = Path(base_path)
     log_handler = add_file_handler(log, (base_path / 'training.log'))
     log_line(log)
     log.info(''.join(['Model: "', '{}'.format(self.model), '"']))
     log_line(log)
     log.info(''.join(['Corpus: "', '{}'.format(self.corpus), '"']))
     log_line(log)
     log.info('Parameters:')
     log.info(''.join(
         [' - learning_rate: "', '{}'.format(learning_rate), '"']))
     log.info(''.join(
         [' - mini_batch_size: "', '{}'.format(mini_batch_size), '"']))
     log.info(''.join([' - patience: "', '{}'.format(patience), '"']))
     log.info(''.join(
         [' - anneal_factor: "', '{}'.format(anneal_factor), '"']))
     log.info(''.join([' - max_epochs: "', '{}'.format(max_epochs), '"']))
     log.info(''.join([' - shuffle: "', '{}'.format(shuffle), '"']))
     log.info(''.join(
         [' - train_with_dev: "', '{}'.format(train_with_dev), '"']))
     log_line(log)
     log.info(''.join(
         ['Model training base path: "', '{}'.format(base_path), '"']))
     log_line(log)
     log.info(''.join(['Device: ', '{}'.format(flair.device)]))
     log_line(log)
     log.info(''.join([
         'Embeddings storage mode: ', '{}'.format(embeddings_storage_mode)
     ]))
     log_train = (True if monitor_train else False)
     log_test = (True if ((not param_selection_mode) and self.corpus.test
                          and monitor_test) else False)
     log_dev = (True if (not train_with_dev) else False)
     loss_txt = init_output_file(base_path, 'loss.tsv')
     weight_extractor = WeightExtractor(base_path)
     optimizer = self.optimizer(self.model.parameters(),
                                lr=learning_rate,
                                **kwargs)
     if (self.optimizer_state is not None):
         optimizer.load_state_dict(self.optimizer_state)
     if use_amp:
         (self.model, optimizer) = amp.initialize(self.model,
                                                  optimizer,
                                                  opt_level=amp_opt_level)
     anneal_mode = ('min' if train_with_dev else 'max')
     scheduler = ReduceLROnPlateau(optimizer,
                                   factor=anneal_factor,
                                   patience=patience,
                                   mode=anneal_mode,
                                   verbose=True)
     if (self.scheduler_state is not None):
         scheduler.load_state_dict(self.scheduler_state)
     train_data = self.corpus.train
     if train_with_dev:
         train_data = ConcatDataset([self.corpus.train, self.corpus.dev])
     if (sampler is not None):
         sampler = sampler(train_data)
         shuffle = False
     dev_score_history = []
     dev_loss_history = []
     train_loss_history = []
     try:
         previous_learning_rate = learning_rate
         for epoch in range((0 + self.epoch), (max_epochs + self.epoch)):
             log_line(log)
             for group in optimizer.param_groups:
                 learning_rate = group['lr']
             if ((learning_rate != previous_learning_rate)
                     and anneal_with_restarts
                     and (base_path / 'best-model.pt').exists()):
                 log.info('resetting to best model')
                 self.model.load((base_path / 'best-model.pt'))
             previous_learning_rate = learning_rate
             if (learning_rate < min_learning_rate):
                 log_line(log)
                 log.info('learning rate too small - quitting training!')
                 log_line(log)
                 break
             batch_loader = DataLoader(train_data,
                                       batch_size=mini_batch_size,
                                       shuffle=shuffle,
                                       num_workers=num_workers,
                                       sampler=sampler)
             self.model.train()
             train_loss = 0
             seen_batches = 0
             total_number_of_batches = len(batch_loader)
             modulo = max(1, int((total_number_of_batches / 10)))
             batch_time = 0
             for (batch_no, batch) in enumerate(batch_loader):
                 start_time = time.time()
                 loss = self.model.forward_loss(batch)
                 optimizer.zero_grad()
                 if use_amp:
                     with amp.scale_loss(loss, optimizer) as scaled_loss:
                         scaled_loss.backward()
                 else:
                     loss.backward()
                 torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                                5.0)
                 optimizer.step()
                 seen_batches += 1
                 train_loss += loss.item()
                 store_embeddings(batch, embeddings_storage_mode)
                 batch_time += (time.time() - start_time)
                 if ((batch_no % modulo) == 0):
                     log.info(''.join([
                         'epoch ', '{}'.format((epoch + 1)), ' - iter ',
                         '{}'.format(batch_no), '/',
                         '{}'.format(total_number_of_batches), ' - loss ',
                         '{:.8f}'.format((train_loss / seen_batches)),
                         ' - samples/sec: ', '{:.2f}'.format(
                             ((mini_batch_size * modulo) / batch_time))
                     ]))
                     batch_time = 0
                     iteration = ((epoch * total_number_of_batches) +
                                  batch_no)
                     if (not param_selection_mode):
                         weight_extractor.extract_weights(
                             self.model.state_dict(), iteration)
             train_loss /= seen_batches
             self.model.eval()
             log_line(log)
             log.info(''.join([
                 'EPOCH ', '{}'.format((epoch + 1)), ' done: loss ',
                 '{:.4f}'.format(train_loss), ' - lr ',
                 '{:.4f}'.format(learning_rate)
             ]))
             if self.use_tensorboard:
                 writer.add_scalar('train_loss', train_loss, (epoch + 1))
             current_score = train_loss
             result_line = ''
             if log_train:
                 (train_eval_result, train_loss) = self.model.evaluate(
                     DataLoader(self.corpus.train,
                                batch_size=eval_mini_batch_size,
                                num_workers=num_workers),
                     embeddings_storage_mode=embeddings_storage_mode)
                 result_line += ''.join(
                     ['\t', '{}'.format(train_eval_result.log_line)])
                 store_embeddings(self.corpus.train,
                                  embeddings_storage_mode)
             if log_dev:
                 (dev_eval_result, dev_loss) = self.model.evaluate(
                     DataLoader(self.corpus.dev,
                                batch_size=eval_mini_batch_size,
                                num_workers=num_workers),
                     embeddings_storage_mode=embeddings_storage_mode)
                 result_line += ''.join([
                     '\t', '{}'.format(dev_loss), '\t',
                     '{}'.format(dev_eval_result.log_line)
                 ])
                 log.info(''.join([
                     'DEV : loss ', '{}'.format(dev_loss), ' - score ',
                     '{}'.format(dev_eval_result.main_score)
                 ]))
                 dev_score_history.append(dev_eval_result.main_score)
                 dev_loss_history.append(dev_loss)
                 current_score = dev_eval_result.main_score
                 store_embeddings(self.corpus.dev, embeddings_storage_mode)
                 if self.use_tensorboard:
                     writer.add_scalar('dev_loss', dev_loss, (epoch + 1))
                     writer.add_scalar('dev_score',
                                       dev_eval_result.main_score,
                                       (epoch + 1))
             if log_test:
                 (test_eval_result, test_loss) = self.model.evaluate(
                     DataLoader(self.corpus.test,
                                batch_size=eval_mini_batch_size,
                                num_workers=num_workers),
                     (base_path / 'test.tsv'),
                     embeddings_storage_mode=embeddings_storage_mode)
                 result_line += ''.join([
                     '\t', '{}'.format(test_loss), '\t',
                     '{}'.format(test_eval_result.log_line)
                 ])
                 log.info(''.join([
                     'TEST : loss ', '{}'.format(test_loss), ' - score ',
                     '{}'.format(test_eval_result.main_score)
                 ]))
                 store_embeddings(self.corpus.test, embeddings_storage_mode)
                 if self.use_tensorboard:
                     writer.add_scalar('test_loss', test_loss, (epoch + 1))
                     writer.add_scalar('test_score',
                                       test_eval_result.main_score,
                                       (epoch + 1))
             scheduler.step(current_score)
             train_loss_history.append(train_loss)
             try:
                 bad_epochs = scheduler.num_bad_epochs
             except:
                 bad_epochs = 0
             for group in optimizer.param_groups:
                 new_learning_rate = group['lr']
             if (new_learning_rate != previous_learning_rate):
                 bad_epochs = (patience + 1)
             log.info(''.join(
                 ['BAD EPOCHS (no improvement): ',
                  '{}'.format(bad_epochs)]))
             with open(loss_txt, 'a') as f:
                 if (epoch == 0):
                     f.write(
                         'EPOCH\tTIMESTAMP\tBAD_EPOCHS\tLEARNING_RATE\tTRAIN_LOSS'
                     )
                     if log_train:
                         f.write(('\tTRAIN_' + '\tTRAIN_'.join(
                             train_eval_result.log_header.split('\t'))))
                     if log_dev:
                         f.write(('\tDEV_LOSS\tDEV_' + '\tDEV_'.join(
                             dev_eval_result.log_header.split('\t'))))
                     if log_test:
                         f.write(('\tTEST_LOSS\tTEST_' + '\tTEST_'.join(
                             test_eval_result.log_header.split('\t'))))
                 f.write(''.join([
                     '\n', '{}'.format(epoch), '\t',
                     '{:%H:%M:%S}'.format(datetime.datetime.now()), '\t',
                     '{}'.format(bad_epochs), '\t',
                     '{:.4f}'.format(learning_rate), '\t',
                     '{}'.format(train_loss)
                 ]))
                 f.write(result_line)
             if (checkpoint and (not param_selection_mode)):
                 self.model.save_checkpoint((base_path / 'checkpoint.pt'),
                                            optimizer.state_dict(),
                                            scheduler.state_dict(),
                                            (epoch + 1), train_loss)
             if ((not train_with_dev) and (not param_selection_mode)
                     and (current_score == scheduler.best)):
                 self.model.save((base_path / 'best-model.pt'))
         if (save_final_model and (not param_selection_mode)):
             self.model.save((base_path / 'final-model.pt'))
     except KeyboardInterrupt:
         log_line(log)
         log.info('Exiting from training early.')
         if self.use_tensorboard:
             writer.close()
         if (not param_selection_mode):
             log.info('Saving model ...')
             self.model.save((base_path / 'final-model.pt'))
             log.info('Done.')
     if self.corpus.test:
         final_score = self.final_test(base_path, eval_mini_batch_size,
                                       num_workers)
     else:
         final_score = 0
         log.info('Test data not provided setting final score to 0')
     log.removeHandler(log_handler)
     if self.use_tensorboard:
         writer.close()
     return {
         'test_score': final_score,
         'dev_score_history': dev_score_history,
         'train_loss_history': train_loss_history,
         'dev_loss_history': dev_loss_history,
     }