def test_train_load_use_classifier(results_base_path, tasks_base_path):

    corpus = NLPTaskDataFetcher.load_corpus(NLPTask.IMDB, base_path=tasks_base_path)
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64, False,
                                                                         False)

    model = TextClassifier(document_embeddings, label_dict, False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path, EvaluationMetric.MICRO_F1_SCORE, max_epochs=2, test_mode=True)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)

    loaded_model = TextClassifier.load_from_file(results_base_path / 'final-model.pt')

    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
Пример #2
0
def test_train_load_use_classifier_multi_label(results_base_path, tasks_base_path):
    # corpus = NLPTaskDataFetcher.load_corpus('multi_class', base_path=tasks_base_path)
    corpus = NLPTaskDataFetcher.load_classification_corpus(
        data_folder=tasks_base_path / "multi_class"
    )
    label_dict = corpus.make_label_dictionary()

    word_embedding: WordEmbeddings = WordEmbeddings("turian")
    document_embeddings = DocumentRNNEmbeddings(
        embeddings=[word_embedding],
        hidden_size=32,
        reproject_words=False,
        bidirectional=False,
    )

    model = TextClassifier(document_embeddings, label_dict, multi_label=True)

    trainer = ModelTrainer(model, corpus)
    trainer.train(
        results_base_path,
        EvaluationMetric.MICRO_F1_SCORE,
        mini_batch_size=1,
        max_epochs=100,
        test_mode=True,
        checkpoint=False,
    )

    sentence = Sentence("apple tv")

    for s in model.predict(sentence):
        for l in s.labels:
            print(l)
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    sentence = Sentence("apple tv")

    for s in model.predict(sentence):

        assert "apple" in sentence.get_label_names()
        assert "tv" in sentence.get_label_names()

        for l in s.labels:
            print(l)
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    loaded_model = TextClassifier.load_from_file(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
Пример #3
0
def test_train_charlm__nocache_load_use_classifier():
    corpus = NLPTaskDataFetcher.fetch_data(NLPTask.IMDB)
    label_dict = corpus.make_label_dictionary()

    glove_embedding: TokenEmbeddings = CharLMEmbeddings('news-forward-fast', use_cache=False)
    document_embeddings: DocumentLSTMEmbeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1, False, 64,
                                                                         False,
                                                                         False)

    model = TextClassifier(document_embeddings, label_dict, False)

    trainer = TextClassifierTrainer(model, corpus, label_dict, False)
    trainer.train('./results', max_epochs=2)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)

        loaded_model = TextClassifier.load_from_file('./results/final-model.pt')

    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree('./results')
Пример #4
0
def test_train_charlm_nocache_load_use_classifier(results_base_path, tasks_base_path):
    corpus = NLPTaskDataFetcher.load_corpus("imdb", base_path=tasks_base_path)
    label_dict = corpus.make_label_dictionary()

    embedding: TokenEmbeddings = FlairEmbeddings("news-forward-fast", use_cache=False)
    document_embeddings: DocumentRNNEmbeddings = DocumentRNNEmbeddings(
        [embedding], 128, 1, False, 64, False, False
    )

    model = TextClassifier(document_embeddings, label_dict, False)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path, max_epochs=2, test_mode=True)

    sentence = Sentence("Berlin is a really nice city.")

    for s in model.predict(sentence):
        for l in s.labels:
            assert l.value is not None
            assert 0.0 <= l.score <= 1.0
            assert type(l.score) is float

    loaded_model = TextClassifier.load_from_file(results_base_path / "final-model.pt")

    sentence = Sentence("I love Berlin")
    sentence_empty = Sentence("       ")

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
Пример #5
0
    def final_test(self, base_path: Path, embeddings_in_memory: bool,
                   evaluation_metric: EvaluationMetric,
                   eval_mini_batch_size: int):

        log_line(log)
        log.info('Testing using best model ...')

        self.model.eval()

        if (base_path / 'best-model.pt').exists():
            if isinstance(self.model, TextClassifier):
                self.model = TextClassifier.load_from_file(base_path /
                                                           'best-model.pt')
            if isinstance(self.model, SequenceTagger):
                self.model = SequenceTagger.load_from_file(base_path /
                                                           'best-model.pt')

        test_metric, test_loss = self.evaluate(
            self.model,
            self.corpus.test,
            eval_mini_batch_size=eval_mini_batch_size,
            embeddings_in_memory=embeddings_in_memory)

        log.info(
            f'MICRO_AVG: acc {test_metric.micro_avg_accuracy()} - f1-score {test_metric.micro_avg_f_score()}'
        )
        log.info(
            f'MACRO_AVG: acc {test_metric.macro_avg_accuracy()} - f1-score {test_metric.macro_avg_f_score()}'
        )
        for class_name in test_metric.get_classes():
            log.info(
                f'{class_name:<10} tp: {test_metric.get_tp(class_name)} - fp: {test_metric.get_fp(class_name)} - '
                f'fn: {test_metric.get_fn(class_name)} - tn: {test_metric.get_tn(class_name)} - precision: '
                f'{test_metric.precision(class_name):.4f} - recall: {test_metric.recall(class_name):.4f} - '
                f'accuracy: {test_metric.accuracy(class_name):.4f} - f1-score: '
                f'{test_metric.f_score(class_name):.4f}')
        log_line(log)

        # if we are training over multiple datasets, do evaluation for each
        if type(self.corpus) is MultiCorpus:
            for subcorpus in self.corpus.corpora:
                log_line(log)
                self._calculate_evaluation_results_for(subcorpus.name,
                                                       subcorpus.test,
                                                       evaluation_metric,
                                                       embeddings_in_memory,
                                                       eval_mini_batch_size,
                                                       base_path / 'test.tsv')

        # get and return the final test score of best model
        if evaluation_metric == EvaluationMetric.MACRO_ACCURACY:
            final_score = test_metric.macro_avg_accuracy()
        elif evaluation_metric == EvaluationMetric.MICRO_ACCURACY:
            final_score = test_metric.micro_avg_accuracy()
        elif evaluation_metric == EvaluationMetric.MACRO_F1_SCORE:
            final_score = test_metric.macro_avg_f_score()
        else:
            final_score = test_metric.micro_avg_f_score()

        return final_score
Пример #6
0
def test_train_charlm_load_use_classifier(results_base_path, tasks_base_path):
    corpus = NLPTaskDataFetcher.load_corpus(u'imdb', base_path=tasks_base_path)
    label_dict = corpus.make_label_dictionary()
    glove_embedding = FlairEmbeddings(u'news-forward-fast')
    document_embeddings = DocumentLSTMEmbeddings([glove_embedding], 128, 1,
                                                 False, 64, False, False)
    model = TextClassifier(document_embeddings, label_dict, False)
    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  EvaluationMetric.MACRO_F1_SCORE,
                  max_epochs=2,
                  test_mode=True)
    sentence = Sentence(u'Berlin is a really nice city.')
    for s in model.predict(sentence):
        for l in s.labels:
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)
    loaded_model = TextClassifier.load_from_file(
        (results_base_path / u'final-model.pt'))
    sentence = Sentence(u'I love Berlin')
    sentence_empty = Sentence(u'       ')
    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])
    shutil.rmtree(results_base_path)
Пример #7
0
def test_train_load_use_classifier_multi_label(results_base_path,
                                               tasks_base_path):

    # corpus = NLPTaskDataFetcher.load_corpus('multi_class', base_path=tasks_base_path)
    corpus = NLPTaskDataFetcher.load_classification_corpus(
        data_folder=tasks_base_path / 'multi_class')
    label_dict = corpus.make_label_dictionary()

    glove_embedding: WordEmbeddings = WordEmbeddings('en-glove')
    document_embeddings = DocumentLSTMEmbeddings(embeddings=[glove_embedding],
                                                 hidden_size=32,
                                                 reproject_words=False,
                                                 bidirectional=False)

    model = TextClassifier(document_embeddings, label_dict, multi_label=True)

    trainer = ModelTrainer(model, corpus)
    trainer.train(results_base_path,
                  EvaluationMetric.MICRO_F1_SCORE,
                  max_epochs=100,
                  test_mode=True,
                  checkpoint=False)

    sentence = Sentence('apple tv')

    for s in model.predict(sentence):
        for l in s.labels:
            print(l)
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)

    sentence = Sentence("apple tv")

    for s in model.predict(sentence):

        assert ('apple' in sentence.get_label_names())
        assert ('tv' in sentence.get_label_names())

        for l in s.labels:
            print(l)
            assert (l.value is not None)
            assert (0.0 <= l.score <= 1.0)
            assert (type(l.score) is float)

    loaded_model = TextClassifier.load_from_file(results_base_path /
                                                 'final-model.pt')

    sentence = Sentence('I love Berlin')
    sentence_empty = Sentence('       ')

    loaded_model.predict(sentence)
    loaded_model.predict([sentence, sentence_empty])
    loaded_model.predict([sentence_empty])

    # clean up results directory
    shutil.rmtree(results_base_path)
Пример #8
0
 def __init__(self, nlp, model_file):
     self.nlp = nlp
     self.clf = TextClassifier.load_from_file(model_file)
     for label in self.clf.label_dictionary.get_items():
         self.nlp.vocab.strings.add(label)
         #split = tag.split('-')
         # add tags without iob prefix to string store
         #if len(split) == 2:
         #    self.nlp.vocab.strings.add(split[1])
     
     Doc.set_extension('rels', default=[])
 def tagging(self, dataset):
     classifier = TextClassifier.load_from_file(os.path.join(self.model_path, "best-model.pt"))
     sentences = dataset.df['clean_text'].values
     results = []
     for sent in sentences:
         if sent:
             sentence = Sentence(sent)
             classifier.predict(sentence)
             result = sentence.labels[0].value
         else:
             result = 0
         results.append(result)
     return results
Пример #10
0
def classify(data, labels, test, train, validation):
    train_data = [k for k in data.keys() if k in train]
    train_labels = [labels[k] for k in train_data]
    train_data = [data[k] for k in train_data]

    test_data = [k for k in data.keys() if k in test]
    test_labels = [labels[k] for k in test_data]
    test_data = [data[k] for k in test_data]

    validation_data = [k for k in data.keys() if k in validation]
    validation_labels = [labels[k] for k in validation_data]
    validation_data = [data[k] for k in validation_data]

    save_training_files(train_data, train_labels, test_data, test_labels,
                        validation_data, validation_labels)
    corpus = NLPTaskDataFetcher.load_classification_corpus(
        Path('./'),
        test_file='test.txt',
        dev_file='dev.txt',
        train_file='train.txt')
    word_embeddings = [
        WordEmbeddings('pl'),
        FlairEmbeddings('polish-forward'),
        FlairEmbeddings('polish-backward')
    ]
    doc_embeddings = DocumentRNNEmbeddings(word_embeddings,
                                           hidden_size=512,
                                           reproject_words=True,
                                           reproject_words_dimension=256)
    classifier = TextClassifier(
        doc_embeddings,
        label_dictionary=corpus.make_label_dictionary(),
        multi_label=False)
    trainer = ModelTrainer(classifier, corpus)
    trainer.train('./', max_epochs=25)
    classifier = TextClassifier.load_from_file('./best-model.pt')

    validation_data = [Sentence(x) for x in validation_data]
    for x in validation_data:
        classifier.predict(x)
    predicted = [int(x.labels[0].value) for x in validation_data]
    remove_training_files()
    precision, recall, f1, _ = precision_recall_fscore_support(
        validation_labels, predicted, average='binary')
    return {
        'accuracy': float("{:.3f}".format(round(precision, 3))),
        'recall': float("{:.3f}".format(round(recall, 3))),
        'f1': float("{:.3f}".format(round(f1, 3)))
    }
Пример #11
0
    def load_models(self) -> None:
        """Load intent classifier and named entity recognizers."""
        # pylint: disable=E0401
        from flair.models import TextClassifier, SequenceTagger

        # Load mapping from intent id to user intent name
        if self.intent_map is None:
            intent_map_path = self.profile.read_path(
                self.profile.get("training.intent.intent_map", "intent_map.json")
            )

            with open(intent_map_path, "r") as intent_map_file:
                self.intent_map = json.load(intent_map_file)

        data_dir = self.profile.read_path(
            self.profile.get("intent.flair.data_dir", "flair_data")
        )

        # Only load intent classifier if there is more than one intent
        if (self.class_model is None) and (len(self.intent_map) > 1):
            class_model_path = os.path.join(
                data_dir, "classification", "final-model.pt"
            )
            self._logger.debug("Loading classification model from %s", class_model_path)
            self.class_model = TextClassifier.load_from_file(class_model_path)
            self._logger.debug("Loaded classification model")

        if self.ner_models is None:
            ner_models = {}
            ner_data_dir = os.path.join(data_dir, "ner")
            for file_name in os.listdir(ner_data_dir):
                ner_model_dir = os.path.join(ner_data_dir, file_name)
                if os.path.isdir(ner_model_dir):
                    # Assume directory is intent name
                    intent_name = file_name
                    if intent_name not in self.intent_map:
                        self._logger.warning(
                            "%s was not found in intent map", intent_name
                        )

                    ner_model_path = os.path.join(ner_model_dir, "final-model.pt")
                    self._logger.debug("Loading NER model from %s", ner_model_path)
                    ner_models[intent_name] = SequenceTagger.load_from_file(
                        ner_model_path
                    )

            self._logger.debug("Loaded NER model(s)")
            self.ner_models = ner_models
Пример #12
0
    def __init__(self, nlp, model_file):
        self.nlp = nlp
        self.clf = TextClassifier.load_from_file(model_file)
        # self.concept_map = pickle.load(open(dict_map_path, "rb"))
        self.concept_map = {'_UNK': [0], 'Dosing': [1], 'State_of_health': [2], 'Measurement': [3], 'Negation': [4],
                            'Treatment': [5], 'Medical_condition': [6], 'Process': [7], 'Medication': [8],
                            'Person': [9], 'Medical_device': [10], 'Time_information': [11], 'Body_part': [12],
                            'DiagLab_Procedure': [13], 'Local_specification': [14], 'Biological_chemistry': [15],
                            'Structure_element': [16], 'Biological_parameter': [17], 'Body_Fluid': [18], 'Type': [19],
                            'Speculation': [20], 'Tissue': [21], 'Degree': [22], 'Medical_specification': [23],
                            'Temporal_course': [24]}
        for label in self.clf.label_dictionary.get_items():
            self.nlp.vocab.strings.add(label)
            # split = tag.split('-')
            # add tags without iob prefix to string store
            # if len(split) == 2:
            #    self.nlp.vocab.strings.add(split[1])

        Doc.set_extension('rels', default=[], force=True)
Пример #13
0
def evaluate(test_file, model_file, dataset_format='macss', semeval_scoring=False):
    if semeval_scoring:
        eval_script = cached_path(
            'https://raw.githubusercontent.com/vzhong/semeval/master/dataset/SemEval2010_task8_scorer-v1.2/semeval2010_task8_scorer-v1.2.pl',
            cache_dir='scripts')
        chmod(eval_script, 0o777)

    classifier: TextClassifier = TextClassifier.load_from_file(model_file)
    #sentences_test: List[Sentence] = load_sentences_jsonl(test_file, attach_id=True)
    idx2item = load_idx2item(join(dirname(test_file), 'vocabulary/embeddings.csv'))

    load_dataset = dataset_loader[dataset_format]

    sentences_test: List[Sentence] = load_dataset(test_file, idx2item, is_test=False, attach_id=True)
    sentences_pred: List[Sentence] = load_dataset(test_file, idx2item, is_test=True, attach_id=True)                                                      

    sentences_pred = classifier.predict(sentences_pred)

    if semeval_scoring:
        id_labels_true = [(sentence.id_, sentence.labels[0]) for sentence in sentences_test]
        id_labels_pred = [(sentence.id_, sentence.labels[0]) for sentence in sentences_pred]

        input_files = []
        for id_labels in [id_labels_true, id_labels_pred]:
            tmp_file = NamedTemporaryFile(delete=True)
            input_files.append(tmp_file)
            with open(tmp_file.name, 'w') as f:
                for id_, label in id_labels:
                    f.write('{}\t{}\n'.format(id_, label.name))
            tmp_file.file.close()

        p = run([eval_script, input_files[0].name, input_files[1].name], stdout=PIPE, encoding='utf-8')
        main_result = p.stdout
        print(main_result)

    else:
        y_true = [sentence.labels[0].name for sentence in sentences_test]
        y_pred = [sentence.labels[0].name for sentence in sentences_pred] 
        print(classification_report(y_true, y_pred))
Пример #14
0
def load_models(
    class_model_path: Optional[str] = None,
    ner_model_paths: List[str] = []
) -> Tuple[Optional[TextClassifier], Dict[str, SequenceTagger]]:

    # Only load intent classifier if there is more than one intent
    class_model = None
    if (class_model_path is not None) and os.path.exists(class_model_path):
        logger.debug(f"Loading classification model from {class_model_path}")
        class_model = TextClassifier.load_from_file(class_model_path)
        logger.debug("Loaded classification model")

    ner_models = {}
    for ner_model_path in ner_model_paths:
        # Assume directory name is intent name
        intent_name = os.path.split(os.path.split(ner_model_path)[0])[1]
        logger.debug(
            f"Loading NER model from {ner_model_path} for intent {intent_name}"
        )
        ner_models[intent_name] = SequenceTagger.load_from_file(ner_model_path)

    logger.debug("Loaded NER model(s)")

    return class_model, ner_models
Пример #15
0
from __future__ import absolute_import
Пример #16
0
from bson.objectid import ObjectId
from flask_cors import CORS
from flair.models import TextClassifier
from flair.data import Sentence
from flask import session
import os

model_path = '/root/flask_vue_ML'
expose = Flask(__name__)
expose.secret_key = "super_secret_key"
# expose.config['MONGO_DBNAME'] = 'exposeModel'
# expose.config['MONGO_URI'] = 'mongodb://*****:*****@expose.route('/', methods=['GET'])
def index():
    return jsonify("welcome to Arafa API")


@expose.route('/api/tasks', methods=['GET'])
def get_result():
    result = []
    try:
        data_result = session['my_result']
        result.append ({'title': data_result['title'], 'tag': data_result['tag'] })
    except:
        result.append ({'title': 'The txt you input', 'tag': 'spam or harm' })
    return jsonify(result)
Пример #17
0
 def __init__(self, c_classifier_file, p_classifier_file):
     self.classifier_c = TextClassifier.load_from_file(c_classifier_file)
     self.classifier_p = TextClassifier.load_from_file(p_classifier_file)
Пример #18
0
def main(args):
    args = parser.parse_args()

    # Loading classifier model:
    print("Loading classifier model")
    classifier = TextClassifier.load_from_file(join(args.model_dir, 'best-model.pt'))

    txt_files = glob.glob(join(args.data_dir, '*.txt'))
    
    sent_splitter = PunktSentenceTokenizer()
    tokenizer = TreebankWordTokenizer()
    sentence_lookahead = 0

    for txt_fn in txt_files:
        print("Processing %s" % (txt_fn))
        ann_input_fn = join(args.data_dir, basename(txt_fn)[:-3]+'ann')
        ents, _ = read_brat_file(ann_input_fn)

        ann_output_fn = join(args.output_dir, basename(txt_fn)[:-3]+'ann')
        with open(txt_fn, 'r') as myfile:
            text = myfile.read()

        ann_out = open(ann_output_fn, 'w')
        
        # Write entities right away:
        for ent_id in ents.keys():
            ent = ents[ent_id]
            ent_text = text[ent.start:ent.end].replace('\n', ' ')
            ann_out.write('%s\t%s %d %d\t%s\n' % (ent_id, ent.cat, ent.start, ent.end, ent_text))

        sent_spans = list(sent_splitter.span_tokenize(text))

        rel_ind = 0
        rel_attempts = 0
        for sent_ind in range(len(sent_spans)):
            primary_sent_span = sent_spans[sent_ind]
            end_window_ind = min(sent_ind+sentence_lookahead, len(sent_spans)-1)
            end_sent_span = sent_spans[end_window_ind]

            sent = text[primary_sent_span[0]:end_sent_span[1]].replace('\n', ' ')
            drug_ents, att_ents = get_span_ents(primary_sent_span, end_sent_span, ents)

            for att_ent in att_ents:
                for drug_ent in drug_ents:
                    ## Get index of ents into sent:
                    a1_start = att_ent.start - primary_sent_span[0]
                    a1_end = att_ent.end - primary_sent_span[0]
                    a1_text = sent[a1_start:a1_end]

                    a2_start = drug_ent.start - primary_sent_span[0]
                    a2_end = drug_ent.end - primary_sent_span[0]
                    a2_text = sent[a2_start:a2_end]

                    if a1_start < a2_start:
                        # arg1 occurs before arg2
                        rel_text = (sent[:a1_start] + 
                                    " %sStart %s %sEnd " % (att_ent.cat, a1_text, att_ent.cat) +
                                    sent[a1_end:a2_start] +
                                    " DrugStart %s DrugEnd" % (a2_text) +
                                    sent[a2_end:])
                    else:
                        rel_text = (sent[:a2_start] +
                                    " DrugStart %s DrugEnd " % (a2_text) +
                                    sent[a2_end:a1_start] +
                                    " %sStart %s %sEnd " % (att_ent.cat, a1_text, att_ent.cat) +
                                    sent[a1_end:])

                    # if att_ent.cat == 'Dosage':
                        # print("working with Dosage ent")
                    sentence = Sentence(rel_text, use_tokenizer=True)
                    labels = classifier.predict(sentence)[0].labels
                    if len(labels) > 1:
                        print('  This relation has more than one output label')
                    label = labels[0].value
                    # print("Comparing ent %s and ent %s and got %s" % (att_ent.id, drug_ent.id, label))
                    rel_attempts += 1
                    if not label == 'None':
                        # Make sure label corresponds to entity type:
                        if label.find(att_ent.cat) < 0:
                            # print("  Skipping found relation where label %s doesn't match arg type %s" % (label, att_ent.cat))
                            continue
                        ann_out.write('R%d\t%s Arg1:%s Arg2:%s\n' % (rel_ind, label, att_ent.id, drug_ent.id))
                        rel_ind += 1

        # print("Finished: Found %d relations while making %d classification attempts" % (rel_ind, rel_attempts))
        ann_out.close()
Пример #19
0
import sys

from flair.data import Sentence
from flair.models import TextClassifier

tagger = TextClassifier.load_from_file(
    'resources/germeval_2018/results/final-model.pt')

test_filename = sys.argv[1]

with open(test_filename, 'rt') as f:
    lines = [line.rstrip() for line in f.readlines()]

for line in lines:
    sentence, label, x = line.split("\t")

    new_line = [
        token.replace('#', '') for token in sentence.split()
        if not token[0] in ['@', '&', '|']
    ]

    sentence = " ".join(new_line)

    s = Sentence(sentence, use_tokenizer=True)

    tagger.predict(s)

    label = str(s.labels[0]).split()[0]

    print(f"{sentence}\t{label}\tNOT USED")
Пример #20
0
from flair.models import TextClassifier
from flair.data import Sentence

classifier = TextClassifier.load_from_file("model/best-model.pt")
sentence = Sentence("Hi. Yes mum, I will...")
classifier.predict(sentence)
print(sentence.labels)
Пример #21
0
from pathlib import Path
from flair.models import TextClassifier
from flair.data import Sentence
import pandas as pd

classifier = TextClassifier.load_from_file('C:/Users/jeanc/Documents/reviews/model/final-model.pt')

# create example sentence
test = pd.DataFrame()
# predict tags and print
def fun(x):
    sent = Sentence(x)
    classifier.predict(sent)
    for label in sent.labels:
        return label.value


test['label1'] = test['tweet'].apply(fun)
Пример #23
0
trainer.train('resources/taggers/ag_news',
              learning_rate=0.1,
              mini_batch_size=32,
              anneal_factor=0.5,
              patience=5,
              max_epochs=150)

# 8. plot training curves (optional)
from flair.visual.training_curves import Plotter
plotter = Plotter()
plotter.plot_training_curves('resources/taggers/ag_news/loss.tsv')
plotter.plot_weights('resources/taggers/ag_news/weights.txt')

### MAIN

classifier = TextClassifier.load_from_file(
    'resources/taggers/ag_news/final-model.pt')

# create example sentence
sentence = Sentence('France is the current world cup winner.')

# predict tags and print
classifier.predict(sentence)

print(sentence.labels)

################################################################################################################################
################################################################################################################################
################################################################################################################################
'''
## MultiLanguage
Пример #24
0
 def __init__(self):
     try:
         self.classifier = TextClassifier.load_from_file(
             BASE_DIR.joinpath('./models/best-model.pt'))
     except FileNotFoundError as ex:
         print(ex)
Пример #25
0
from flair.models import TextClassifier
from flair.data import Sentence
classifier = TextClassifier.load_from_file('./best-model.pt')
sentence = Sentence('add your text here for prediction'
                    )  # add your text here whose label is to be predicted
classifier.predict(sentence)
print(sentence.labels)