def test_ner(self): model = txt.sequence_tagger('bilstm-crf', self.preproc) learner = ktrain.get_learner(model, train_data=self.trn, val_data=self.val) lr = 0.001 hist = learner.fit(lr, 1) # test training results #self.assertAlmostEqual(max(hist.history['lr']), lr) self.assertGreater(learner.validate(), 0.65) # test top losses obs = learner.top_losses(n=1) self.assertIn(obs[0][0], list(range(len(self.val.x)))) learner.view_top_losses(n=1) # test weight decay self.assertEqual(len(learner.get_weight_decay()), 2) self.assertEqual(learner.get_weight_decay()[0], None) learner.set_weight_decay(1e-4) self.assertAlmostEqual(learner.get_weight_decay()[0], 1e-4) # test load and save model learner.save_model('/tmp/test_model') learner.load_model('/tmp/test_model') # test predictor SENT = 'There is a man named John Smith.' p = ktrain.get_predictor(learner.model, self.preproc) self.assertEqual(p.predict(SENT)[-2][1], 'I-PER') p.save('/tmp/test_predictor') p = ktrain.load_predictor('/tmp/test_predictor') self.assertEqual(p.predict(SENT)[-2][1], 'I-PER')
def test_ner(self): model = txt.sequence_tagger('bilstm-bert', self.preproc, bert_model='bert-base-cased') learner = ktrain.get_learner(model, train_data=self.trn, val_data=self.val, batch_size=128) lr = 0.01 hist = learner.fit(lr, 1) # test training results #self.assertAlmostEqual(max(hist.history['lr']), lr) self.assertGreater(learner.validate(), 0.79) # test top losses obs = learner.top_losses(n=1) self.assertIn(obs[0][0], list(range(len(self.val.x)))) learner.view_top_losses(n=1) # test weight decay self.assertEqual(learner.get_weight_decay(), None) learner.set_weight_decay(1e-2) self.assertAlmostEqual(learner.get_weight_decay(), 1e-2) # test load and save model learner.save_model('/tmp/test_model') learner.load_model('/tmp/test_model') # test predictor SENT = 'There is a man named John Smith.' p = ktrain.get_predictor(learner.model, self.preproc) self.assertEqual(p.predict(SENT)[-2][1], 'I-PER') p.save('/tmp/test_predictor') p = ktrain.load_predictor('/tmp/test_predictor') self.assertEqual(p.predict(SENT)[-2][1], 'I-PER') merged_prediction = p.predict(SENT, merge_tokens=True, return_offsets=True) self.assertEqual(merged_prediction[0][0], 'John Smith') self.assertEqual(merged_prediction[0][1], 'PER') self.assertEqual(merged_prediction[0][2], (21, 31))
def test_ner(self): wv_url = ( "https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.en.300.vec.gz" ) model = txt.sequence_tagger("bilstm-crf", self.preproc, wv_path_or_url=wv_url) learner = ktrain.get_learner(model, train_data=self.trn, val_data=self.val, batch_size=128) lr = 0.01 hist = learner.fit(lr, 1) # test training results # self.assertAlmostEqual(max(hist.history['lr']), lr) self.assertGreater(learner.validate(), 0.65) # test top losses obs = learner.top_losses(n=1) self.assertIn(obs[0][0], list(range(len(self.val.x)))) learner.view_top_losses(n=1) # test weight decay self.assertEqual(learner.get_weight_decay(), None) learner.set_weight_decay(1e-2) self.assertAlmostEqual(learner.get_weight_decay(), 1e-2) # test load and save model learner.save_model("/tmp/test_model") learner.load_model("/tmp/test_model") # test predictor SENT = "There is a man named John Smith." p = ktrain.get_predictor(learner.model, self.preproc) self.assertEqual(p.predict(SENT)[-2][1], "I-PER") p.save("/tmp/test_predictor") p = ktrain.load_predictor("/tmp/test_predictor") self.assertEqual(p.predict(SENT)[-2][1], "I-PER")
with open('./x_train.json', 'r') as f: x_train = json.load(f) print(f'Length of x_train : {len(x_train)}') with open('./y_train.json', 'r') as f: y_train = json.load(f) print(f'Length of y_train : {len(y_train)}') (trn, val, preproc) = text.entities_from_array(x_train, y_train) text.print_sequence_taggers() WV_URL='https://dl.fbaipublicfiles.com/fasttext/vectors-crawl/cc.nl.300.vec.gz' model = text.sequence_tagger('bilstm-bert', preproc, bert_model='bert-large-cased', wv_path_or_url=WV_URL) learner = ktrain.get_learner(model, train_data = trn, val_data = val, batch_size = 64) learner.lr_find() learner.lr_plot() learner.fit(1e-2, 1, cycle_len=1) learner.validate() predictor = ktrain.get_predictor(learner.model, preproc) predictor.predict('Cloud counselage seminar in Data Science')