def _train(self, corpus: Corpus, params: dict, base_path: Path,
               max_epochs: int, optimization_value: str):
        corpus = corpus
        label_dict = corpus.make_label_dictionary()
        for sent in corpus.get_all_sentences():
            sent.clear_embeddings()
        model = self._set_up_model(params, label_dict)
        training_parameters = {
            key: params[key]
            for key, value in params.items() if key in TRAINING_PARAMETERS
        }
        model_trainer_parameters = {
            key: params[key]
            for key, value in params.items()
            if key in MODEL_TRAINER_PARAMETERS and key != 'model'
        }
        trainer: ModelTrainer = ModelTrainer(model, corpus,
                                             **model_trainer_parameters)
        path = base_path
        results = trainer.train(path,
                                max_epochs=max_epochs,
                                param_selection_mode=True,
                                **training_parameters)

        if optimization_value == "score":
            result = results['test_score']
        else:
            result = results['dev_loss_history'][-1]

        return {'result': result, 'params': params}
示例#2
0
def test_tagged_corpus_get_all_sentences():
    train_sentence = Sentence("I'm used in training.", use_tokenizer=True)
    dev_sentence = Sentence("I'm a dev sentence.", use_tokenizer=True)
    test_sentence = Sentence('I will be only used for testing.',
                             use_tokenizer=True)
    corpus = Corpus([train_sentence], [dev_sentence], [test_sentence])
    all_sentences = corpus.get_all_sentences()
    assert (3 == len(all_sentences))
示例#3
0
def make_relations_tag_dictionary(corpus: Corpus,
                                  tag_type='dependency',
                                  special_tags=[]) -> Dictionary:

    tag_dictionary: Dictionary = Dictionary(add_unk=False)
    # for tag in special_tags:
    #     tag_dictionary.add_item(tag)
    for sentence in corpus.get_all_sentences():
        for token in sentence.tokens:
            tag_dictionary.add_item(token.get_tag(tag_type).value)
    return tag_dictionary
示例#4
0
    def predict(self, corpus: Corpus):
        """
        Predict labels given a list of sentences and returns the respective class indices.

        :param corpus: the flair corpus this wrapper will use for predicting the labels.
        """

        X = self._convert_dataset(corpus)
        log.info("Start the prediction " + str(self.model) + " with " + str(len(X)) + " Datapoints.")
        predict = self.model.predict(X)

        for idx, sentence in enumerate(corpus.get_all_sentences()):
            sentence.set_label("cluster", str(predict[idx]))

        log.info("Finished prediction and labeled all sentences.")
        return predict