Exemplo n.º 1
0
def test_train_load_use_classifier_multi_label(results_base_path, tasks_base_path):
    # corpus = NLPTaskDataFetcher.load_corpus('multi_class', base_path=tasks_base_path)
    corpus = NLPTaskDataFetcher.load_classification_corpus(
        data_folder=tasks_base_path / "multi_class"
    )
    label_dict = corpus.make_label_dictionary()

    word_embedding: WordEmbeddings = WordEmbeddings("turian")
    document_embeddings = DocumentRNNEmbeddings(
        embeddings=[word_embedding],
        hidden_size=32,
        reproject_words=False,
        bidirectional=False,
    )

    model = TextClassifier(document_embeddings, label_dict, multi_label=True)

    trainer = ModelTrainer(model, corpus)
    trainer.train(
        results_base_path,
        EvaluationMetric.MICRO_F1_SCORE,
        mini_batch_size=1,
        max_epochs=100,
        test_mode=True,
        checkpoint=False,
    )

    sentence = Sentence("apple tv")

    for s in model.predict(sentence):
        for l in s.labels:
            print(l)
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    sentence = Sentence("apple tv")

    for s in model.predict(sentence):

        assert "apple" in sentence.get_label_names()
        assert "tv" in sentence.get_label_names()

        for l in s.labels:
            print(l)
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    loaded_model = TextClassifier.load_from_file(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
def test_train_load_use_classifier_multi_label(results_base_path,
                                               tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(tasks_base_path /
                                                 "multi_class",
                                                 label_type="topic")
    label_dict = corpus.make_label_dictionary(label_type="topic")

    model: TextClassifier = TextClassifier(
        document_embeddings=document_embeddings,
        label_dictionary=label_dict,
        label_type="topic",
        multi_label=True)

    trainer = ModelTrainer(model, corpus)
    trainer.train(
        results_base_path,
        mini_batch_size=1,
        max_epochs=100,
        shuffle=False,
        checkpoint=False,
        train_with_test=True,
        train_with_dev=True,
    )

    sentence = Sentence("apple tv")

    model.predict(sentence)

    for label in sentence.labels:
        print(label)
        assert label.value is not None
        assert 0.0 <= label.score <= 1.0
        assert type(label.score) is float

    sentence = Sentence("apple tv")

    model.predict(sentence)

    assert "apple" in sentence.get_label_names()
    assert "tv" in sentence.get_label_names()

    for label in sentence.labels:
        assert label.value is not None
        assert 0.0 <= label.score <= 1.0
        assert type(label.score) is float

    del trainer, model, corpus
    loaded_model = TextClassifier.load(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
    del loaded_model
Exemplo n.º 3
0
def test_train_load_use_classifier_multi_label(results_base_path,
                                               tasks_base_path):

    # corpus = NLPTaskDataFetcher.load_corpus('multi_class', base_path=tasks_base_path)
    corpus = NLPTaskDataFetcher.load_classification_corpus(
        data_folder=tasks_base_path / 'multi_class')
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings = DocumentLSTMEmbeddings(embeddings=[glove_embedding],
                                                 hidden_size=32,
                                                 reproject_words=False,
                                                 bidirectional=False)

    model = TextClassifier(document_embeddings, label_dict, multi_label=True)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  EvaluationMetric.MICRO_F1_SCORE,
                  max_epochs=100,
                  test_mode=True,
                  checkpoint=False)

    sentence = Sentence('apple tv')

    for s in model.predict(sentence):
        for l in s.labels:
            print(l)
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)

    sentence = Sentence("apple tv")

    for s in model.predict(sentence):

        assert ('apple' in sentence.get_label_names())
        assert ('tv' in sentence.get_label_names())

        for l in s.labels:
            print(l)
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)

    loaded_model = TextClassifier.load_from_file(results_base_path /
                                                 'final-model.pt')

    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
Exemplo n.º 4
0
    def predict(self, x: str = 'test string'):
        """

        :param x:
        :return:
        """
        sentence = Sentence(x)
        self.sentiment_model.predict(sentence)
        level = self.complaint_severity[int(sentence.get_label_names()[0])]
        return level, self._get_contact_info(level)
Exemplo n.º 5
0
def test_train_load_use_classifier_multi_label(results_base_path,
                                               tasks_base_path):
    corpus = flair.datasets.ClassificationCorpus(
        (tasks_base_path / 'multi_class'))
    label_dict = corpus.make_label_dictionary()
    word_embedding = WordEmbeddings('turian')
    document_embeddings = DocumentRNNEmbeddings(embeddings=[word_embedding],
                                                hidden_size=32,
                                                reproject_words=False,
                                                bidirectional=False)
    model = TextClassifier(document_embeddings, label_dict, multi_label=True)
    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  mini_batch_size=1,
                  max_epochs=100,
                  shuffle=False,
                  checkpoint=False)
    sentence = Sentence('apple tv')
    for s in model.predict(sentence):
        for l in s.labels:
            print(l)
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)
    sentence = Sentence('apple tv')
    for s in model.predict(sentence):
        assert ('apple' in sentence.get_label_names())
        assert ('tv' in sentence.get_label_names())
        for l in s.labels:
            print(l)
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)
    loaded_model = TextClassifier.load((results_base_path / 'final-model.pt'))
    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')
    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])
    shutil.rmtree(results_base_path)
def get_flair(text):
    #this is for flair
    try:
        sentence = Sentence(text)
        classifier.predict(sentence)
        thevalence = re.findall(r'\d+\.*\d*', str(sentence.labels[0]))
        thevalence = float(thevalence[0])
        therating = sentence.get_label_names()[0]
        return  [therating, thevalence]

    except Exception:
        print("An exception occurred. Text was not passed to get_valence")
        return 'n/a'