Пример #1
0
    def test_ner(self):
        model = txt.sequence_tagger('bilstm-crf', self.preproc)
        learner = ktrain.get_learner(model,
                                     train_data=self.trn,
                                     val_data=self.val)
        lr = 0.001
        hist = learner.fit(lr, 1)

        # test training results
        #self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(learner.validate(), 0.65)

        # test top losses
        obs = learner.top_losses(n=1)
        self.assertIn(obs[0][0], list(range(len(self.val.x))))
        learner.view_top_losses(n=1)

        # test weight decay
        self.assertEqual(len(learner.get_weight_decay()), 2)
        self.assertEqual(learner.get_weight_decay()[0], None)
        learner.set_weight_decay(1e-4)
        self.assertAlmostEqual(learner.get_weight_decay()[0], 1e-4)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test predictor
        SENT = 'There is a man named John Smith.'
        p = ktrain.get_predictor(learner.model, self.preproc)
        self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
Пример #2
0
    def test_ner(self):
        model = txt.sequence_tagger('bilstm-bert',
                                    self.preproc,
                                    bert_model='bert-base-cased')
        learner = ktrain.get_learner(model,
                                     train_data=self.trn,
                                     val_data=self.val,
                                     batch_size=128)
        lr = 0.01
        hist = learner.fit(lr, 1)

        # test training results
        #self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(learner.validate(), 0.79)

        # test top losses
        obs = learner.top_losses(n=1)
        self.assertIn(obs[0][0], list(range(len(self.val.x))))
        learner.view_top_losses(n=1)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test predictor
        SENT = 'There is a man named John Smith.'
        p = ktrain.get_predictor(learner.model, self.preproc)
        self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
        merged_prediction = p.predict(SENT,
                                      merge_tokens=True,
                                      return_offsets=True)
        self.assertEqual(merged_prediction[0][0], 'John Smith')
        self.assertEqual(merged_prediction[0][1], 'PER')
        self.assertEqual(merged_prediction[0][2], (21, 31))
Пример #3
0
    def test_ner(self):
        wv_url = (
            "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.vec.gz"
        )
        model = txt.sequence_tagger("bilstm-crf",
                                    self.preproc,
                                    wv_path_or_url=wv_url)
        learner = ktrain.get_learner(model,
                                     train_data=self.trn,
                                     val_data=self.val,
                                     batch_size=128)
        lr = 0.01
        hist = learner.fit(lr, 1)

        # test training results
        # self.assertAlmostEqual(max(hist.history['lr']), lr)
        self.assertGreater(learner.validate(), 0.65)

        # test top losses
        obs = learner.top_losses(n=1)
        self.assertIn(obs[0][0], list(range(len(self.val.x))))
        learner.view_top_losses(n=1)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test predictor
        SENT = "There is a man named John Smith."
        p = ktrain.get_predictor(learner.model, self.preproc)
        self.assertEqual(p.predict(SENT)[-2][1], "I-PER")
        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        self.assertEqual(p.predict(SENT)[-2][1], "I-PER")
Пример #4
0
with open('./x_train.json', 'r') as f:
    x_train = json.load(f)

print(f'Length of x_train : {len(x_train)}')

with open('./y_train.json', 'r') as f:
    y_train = json.load(f)

print(f'Length of y_train : {len(y_train)}')

(trn, val, preproc) = text.entities_from_array(x_train, y_train)

text.print_sequence_taggers()

WV_URL='https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.vec.gz'
model = text.sequence_tagger('bilstm-bert', preproc, bert_model='bert-large-cased', wv_path_or_url=WV_URL)

learner = ktrain.get_learner(model, train_data = trn, val_data = val, batch_size = 64)

learner.lr_find()

learner.lr_plot()

learner.fit(1e-2, 1, cycle_len=1)

learner.validate()

predictor = ktrain.get_predictor(learner.model, preproc)

predictor.predict('Cloud counselage seminar in Data Science')