Пример #1
0
def main(train_data_dir,
         test_data_dir=None,
         output_dir=None,
         lang='de',
         blank_model=False,
         verbose=False,
         epochs=100,
         init_batch_size=1,
         init_dropout_rate=0.5,
         batch_comp_rate=1.001):
    if blank_model:
        nlp = spacy.blank(INSTALLED_MODELS[lang])
        add_pipe(nlp, 'ner')
    else:
        nlp = spacy.load(INSTALLED_MODELS[lang])

    train_data = load_data(train_data_dir)
    add_labels(nlp.get_pipe('ner'), get_ner_labels(train_data))

    train(train_data, 'ner', epochs, nlp, blank_model, verbose,
          init_batch_size, init_dropout_rate, batch_comp_rate)

    if test_data_dir is not None:
        test_data = load_data(test_data_dir)
        print('\nPerformance on test data:')
        test(test_data, nlp, False)

    if output_dir:
        save_model(nlp, output_dir)
Пример #2
0
def main(test_data_dir, model='de', verbose=False):
    if model in INSTALLED_MODELS:
        model = INSTALLED_MODELS['de']

    nlp = spacy.load(model)
    test_data = load_data(test_data_dir)
    test(test_data, nlp, verbose)
Пример #3
0
 def test_load_data(self):
     data = load_data(self.ANNOTATIONS_PATH)
     self.assertEqual(2, len(data))
     self.assertEqual(
         'Die Revision des Klägers gegen das Urteil des 6. Zivilsenats des Oberlandesgerichts Köln '
         'vom 16. Dezember 2016 wird zurückgewiesen.', data[0][0])
     self.assertEqual([(65, 88, 'ORG')], data[0][1]['entities'])
Пример #4
0
 def test_train_overfit(self):
     data = load_data(self.ANNOTATIONS_PATH)
     nlp = spacy.load('de_core_news_sm')
     before_score = test(data, nlp, False)
     add_labels(nlp.get_pipe('ner'), get_ner_labels(data))
     train(data, 'ner', 10, nlp, True, True, 1, 0.0, 1.0)
     after_score = test(data, nlp, False)
     self.assertGreater(after_score, before_score)
Пример #5
0
 def test_train_blank(self):
     data = load_data(self.ANNOTATIONS_PATH)
     nlp = spacy.blank('de')
     add_pipe(nlp, 'ner')
     add_labels(nlp.get_pipe('ner'), get_ner_labels(data))
     train(data, 'ner', 1, nlp, True, True, 1, 0.0, 1.0)