Пример #1
0
def test_token_level():
    text = 'A B C D.'

    gold_a = [Annotation('B C', 2, 5, 'PER')]
    gold_b = [Annotation('A', 0, 1, 'ORG'), Annotation('B', 2, 3, 'PER')]

    pred_a = [Annotation('B', 2, 3, 'PER'), Annotation('C', 4, 5, 'PER')]
    pred_b = [Annotation('A', 0, 1, 'ORG'), Annotation('B', 2, 3, 'ORG')]

    gold = [
        Document(name='doc_a', text=text, annotations=gold_a),
        Document(name='doc_b', text=text, annotations=gold_b)
    ]

    predicted = [
        Document(name='doc_a', text=text, annotations=pred_a),
        Document(name='doc_b', text=text, annotations=pred_b)
    ]

    evaluator = Evaluator(gold, predicted)
    scores = evaluator.token_level()
    assert scores.precision('PER') == 1
    assert scores.recall('PER') == 0.6667
    assert scores.f_score('PER') == 0.8

    assert scores.precision('ORG') == 0.5
    assert scores.recall('ORG') == 1
    assert scores.f_score('ORG') == 0.6667
Пример #2
0
def test_entity_level():
    gold = [
        Document(name='doc_a',
                 text='',
                 annotations=[Annotation('', 3, 6, 'MISC')]),
        Document(name='doc_b',
                 text='',
                 annotations=[Annotation('', 0, 2, 'PER')])
    ]

    predicted = [
        Document(name='doc_a',
                 text='',
                 annotations=[Annotation('', 2, 6, 'MISC')]),
        Document(name='doc_b',
                 text='',
                 annotations=[Annotation('', 0, 2, 'PER')])
    ]

    evaluator = Evaluator(gold, predicted)
    scores = evaluator.entity_level()
    assert scores.micro_avg_f_score() == 0.5
    assert scores.macro_avg_f_score() == 0.5
    assert scores.f_score('PER') == 1
    assert scores.f_score('MISC') == 0
Пример #3
0
def test_token_annotations():
    evaluator = Evaluator(gold=(), predicted=())
    doc = Document(name='doc_a',
                   text='A B C D.',
                   annotations=[
                       Annotation('B C', 2, 5, 'PER'),
                       Annotation('D.', 6, 8, 'ORG')
                   ])

    assert evaluator.token_annotations(doc) == ['O', 'PER', 'PER', 'ORG']
    assert evaluator.token_annotations(
        doc, tag_blind=True) == ['O', 'ENT', 'ENT', 'ENT']
Пример #4
0
def arg_parser():
    parser = argparse.ArgumentParser()
    parser.add_argument("language",
                        help="Language to use for tokenizer",
                        choices=Evaluator.supported_languages())
    parser.add_argument("documents_path", help="Path to *.txt files")
    parser.add_argument("gold_path", help="Path to gold *.ann files")
    parser.add_argument("pred_path", help="Path to predicted *.ann files")
    return parser.parse_args()
Пример #5
0
def main(args, model_dir):
    logger.info('Args = {}'.format(args))
    corpus = CorpusLoader().load_corpus(CORPUS_PATH[args.corpus])
    tokenizer = TokenizerFactory().tokenizer(args.corpus)
    logger.info('Loaded corpus: {}'.format(corpus))

    logger.info('Get sentences...')
    train_sents, _ = flair_utils.standoff_to_flair_sents(corpus.train, tokenizer, verbose=True)
    dev_sents, _ = flair_utils.standoff_to_flair_sents(corpus.dev, tokenizer, verbose=True)
    test_sents, test_docs = flair_utils.standoff_to_flair_sents(corpus.test,
                                                                tokenizer, verbose=True)

    train_sents = train_sents + dev_sents
    train_sents_filtered = list(filter(lambda sent: not _ignore_sentence(sent), train_sents))

    sample_size = int(len(train_sents_filtered) * args.train_sample_frac)
    rs = RandomState(seed=args.random_seed)
    train_sents_sample = rs.choice(train_sents_filtered, replace=False, size=sample_size).tolist()
    logger.info('Train with fraction of training data: {} sents out of {} sentences ({}%)',
                sample_size, len(train_sents_filtered), args.train_sample_frac)

    # We need to pass some dev data, otherwise flair raises a ZeroDivisionError
    # See: https://github.com/zalandoresearch/flair/issues/1139
    # We just split the training sample into half and instruct Flair to train_with_dev (see below).
    half = len(train_sents_sample) // 2
    flair_corpus = flair_utils.FilteredCorpus(train=train_sents_sample[:half],
                                              dev=train_sents_sample[half:],
                                              test=test_sents,
                                              ignore_sentence=_ignore_sentence)
    logger.info(flair_corpus)

    logger.info('Train model...')
    tagger = run_bilstmcrf.get_model(flair_corpus,
                                     corpus_name=args.corpus,
                                     embedding_lang=args.embedding_lang,
                                     pooled_contextual_embeddings=True)

    trainer = ModelTrainer(tagger, flair_corpus)
    trainer.train(join(model_dir, 'flair'),
                  max_epochs=150,
                  monitor_train=False,
                  train_with_dev=True,
                  save_final_model=args.save_final_model)

    logger.info('Make predictions...')
    run_bilstmcrf.make_predictions(tagger, flair_corpus)

    logger.info('Start evaluation...')
    evaluator = Evaluator(gold=corpus.test,
                          predicted=flair_utils.flair_sents_to_standoff(test_sents, test_docs))

    entity_level_metric = evaluator.entity_level()
    logger.info('\n{}', entity_level_metric)
    entity_level_metric.to_csv(join(model_dir, 'scores_entity.csv'))
    evaluator.token_level().to_csv(join(model_dir, 'scores_token.csv'))
    evaluator.token_level_blind().to_csv(join(model_dir, 'scores_token_blind.csv'))
    logger.info('Done.')
Пример #6
0
def main(args, model_dir):
    logger.info('Args = {}'.format(args))
    corpus = CorpusLoader().load_corpus(CORPUS_PATH[args.corpus])
    tokenizer = TokenizerFactory().tokenizer(args.corpus)
    logger.info('Loaded corpus: {}'.format(corpus))

    logger.info('Get sentences...')
    train_sents, _ = tagging_utils.standoff_to_sents(corpus.train, tokenizer, verbose=True)
    dev_sents, _ = tagging_utils.standoff_to_sents(corpus.dev, tokenizer, verbose=True)
    test_sents, test_docs = tagging_utils.standoff_to_sents(corpus.test, tokenizer, verbose=True)

    train_sents = train_sents + dev_sents
    train_sents_filtered = list(filter(_is_not_meta_sentence, train_sents))

    sample_size = int(len(train_sents_filtered) * args.train_sample_frac)
    rs = RandomState(seed=args.random_seed)
    train_sents_sample = rs.choice(train_sents_filtered, replace=False, size=sample_size).tolist()
    logger.info('Train with fraction of training data: {} sents out of {} sentences ({}%)',
                sample_size, len(train_sents_filtered), args.train_sample_frac)

    logger.info('Compute features...')
    feature_extractor, meta_sentence_filter = crf_util.FEATURE_EXTRACTOR[args.feature_extractor]
    X_train, y_train = crf_labeler.sents_to_features_and_labels(train_sents_sample,
                                                                feature_extractor)
    X_test, _ = crf_labeler.sents_to_features_and_labels(test_sents, feature_extractor)

    logger.info('len(X_train) = {}'.format(len(X_train)))
    logger.info('len(y_train) = {}'.format(len(y_train)))
    logger.info('len(X_test) = {}'.format(len(X_test)))

    crf = crf_labeler.SentenceFilterCRF(
        ignore_sentence=meta_sentence_filter,
        ignored_label='O',
        algorithm='lbfgs',
        c1=0.1,
        c2=0.1,
        max_iterations=100,
        all_possible_transitions=True
    )
    logger.info('Start training... {}'.format(crf))
    crf.fit(X_train, y_train)

    logger.info('CRF classes: {}'.format(crf.classes_))

    logger.info('Make predictions...')
    y_pred_test = crf.predict(X_test)

    logger.info('Start evaluation...')
    evaluator = Evaluator(gold=corpus.test,
                          predicted=tagging_utils.sents_to_standoff(y_pred_test, test_docs))

    entity_level_metric = evaluator.entity_level()
    logger.info('\n{}', entity_level_metric)
    entity_level_metric.to_csv(join(model_dir, 'scores_entity.csv'))
    evaluator.token_level().to_csv(join(model_dir, 'scores_token.csv'))
    evaluator.token_level_blind().to_csv(join(model_dir, 'scores_token_blind.csv'))
    logger.info('Done.')
Пример #7
0
def evaluate_documents(gold_docs, pred_docs, language='nl'):
    return Evaluator(gold_docs, pred_docs, language=language)