def test_predict(): global model, X_tr, Y_tr, word_to_ix, tag_to_ix torch.manual_seed(711); best_tags = model.predict(bilstm.prepare_sequence(X_tr[5], word_to_ix)) eq_(best_tags[0:5],['NOUN', 'ADJ', 'CONJ', 'ADV', 'ADJ']) best_tags = model.predict(bilstm.prepare_sequence(X_tr[0], word_to_ix)) eq_(best_tags[0:5],['X', 'NUM', 'INTJ', 'PART', 'AUX'])
def test_neg_log_likelihood(): global model, X_tr, Y_tr, word_to_ix, tag_to_ix torch.manual_seed(711); lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[5], word_to_ix)) loss = model.neg_log_likelihood(lstm_feats, bilstm.prepare_sequence(Y_tr[5], tag_to_ix)) assert_almost_equal(loss.item(),50.326389, places=4) lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[0], word_to_ix)) loss = model.neg_log_likelihood(lstm_feats, bilstm.prepare_sequence(Y_tr[0], tag_to_ix)) assert_almost_equal(loss.item(),102.239616, places=4)
def test_forward_alg(): global model, X_tr, word_to_ix torch.manual_seed(711); lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[0], word_to_ix)) alpha = model.forward_alg(lstm_feats) assert_almost_equal(alpha.item(), 104.916992, places=4) lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[1], word_to_ix)) alpha = model.forward_alg(lstm_feats) assert_almost_equal(alpha.item(), 65.290924, places=4)
def test_score_sentence(): global model, X_tr, Y_tr, word_to_ix, tag_to_ix torch.manual_seed(711); lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[0], word_to_ix)) score = model.score_sentence(lstm_feats, bilstm.prepare_sequence(Y_tr[0], tag_to_ix)) print(tag_to_ix) assert_almost_equal(score.item(), 2.659940, places=4) lstm_feats = model.forward(bilstm.prepare_sequence(X_tr[1], word_to_ix)) score = model.score_sentence(lstm_feats, bilstm.prepare_sequence(Y_tr[1], tag_to_ix)) assert_almost_equal(score.item(), -2.397999, places=4)
def apply_model(model, outfilename, word_to_ix, all_tags=None, trainfile=TRAIN_FILE, testfile=DEV_FILE): """ applies the model on the data and writes the best sequence of tags to the outfile """ if all_tags is None: all_tags = set() # this is slow for i, (words, tags) in enumerate(preproc.conll_seq_generator(trainfile)): for tag in tags: all_tags.add(tag) with open(outfilename, 'w') as outfile: for words, _ in preproc.conll_seq_generator(testfile): seq_words = bilstm.prepare_sequence(words, word_to_ix) pred_tags = model.predict(seq_words) for i, tag in enumerate(pred_tags): outfile.write(tag + '\n') outfile.write('\n')
def test_dlmodel_forward(): global model, X_tr, word_to_ix torch.manual_seed(711) lstm_feats = model(bilstm.prepare_sequence(X_tr[0], word_to_ix)) assert_almost_equal(lstm_feats[0].data.numpy()[0], 0.044214, places=4) assert_almost_equal(lstm_feats[0].data.numpy()[1], -0.071416, places=4) assert_almost_equal(lstm_feats[0].data.numpy()[2], -0.125279, places=4)