def test_model_graph_dimensions(X_sequence, mod_attr, graph_attr): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab, max_iter=1) mod.fit(X_train, y_train) mod_attr_val = getattr(mod, mod_attr) graph_attr_val = getattr(mod.model.rnn.rnn, graph_attr) assert mod_attr_val == graph_attr_val
def fit_elmo_rnn(X, y): mod = TorchRNNClassifier( vocab=[], max_iter=50, use_embedding=False) mod.fit(X, y) return mod
def test_predict_functions_honor_device(X_sequence, func): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab, max_iter=2) mod.fit(X_train, y_train) prediction_func = getattr(mod, func) with pytest.raises(RuntimeError): prediction_func(X_test, device="FAKE_DEVICE")
def fit_hf_rnn(X, y): mod = TorchRNNClassifier( vocab=[], max_iter=50, hidden_dim=50, use_embedding=False) # Pass in the BERT hidden states directly! mod.fit(X, y) return mod
def test_model(X_sequence): """Just makes sure that this code will run; it doesn't check that it is creating good models. """ X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab=vocab, max_iter=100) mod.fit(X_train, y_train) mod.predict(X_test) mod.predict_proba(X_test)
def test_predict_restores_device(X_sequence, func): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab, max_iter=2) mod.fit(X_train, y_train) current_device = mod.device assert current_device != torch.device("cpu:0") prediction_func = getattr(mod, func) prediction_func(X_test, device="cpu:0") assert mod.device == current_device
def test_torch_rnn_classifier_save_load(X_sequence): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab=vocab, max_iter=2) mod.fit(X_train, y_train) mod.predict(X_test) with tempfile.NamedTemporaryFile(mode='wb') as f: name = f.name mod.to_pickle(name) mod2 = TorchRNNClassifier.from_pickle(name) mod2.predict(X_test) mod2.fit(X_test, y_test)
def test_embedding_update_control(X_sequence, freeze, outcome): X_train, X_test, y_train, y_test, vocab = X_sequence embed_dim = 5 embedding = np.ones((len(vocab), embed_dim)) mod = TorchRNNClassifier(vocab, max_iter=10, embedding=embedding, freeze_embedding=freeze) mod.fit(X_train, y_train) graph_emb = mod.model.rnn.embedding.weight.detach().cpu().numpy() assert np.array_equal(embedding, graph_emb) == outcome
def test_pretrained_embedding(X_sequence): X_train, X_test, y_train, y_test, vocab = X_sequence embed_dim = 5 embedding = np.ones((len(vocab), embed_dim)) mod = TorchRNNClassifier(vocab, max_iter=1, embedding=embedding, freeze_embedding=True) mod.fit(X_train, y_train) graph_emb = mod.model.rnn.embedding.weight.detach().cpu().numpy() assert np.array_equal(embedding, graph_emb)
def test_cheese_disease(cheese_disease_dataset): vocab = cheese_disease_dataset['vocab'] X_train = cheese_disease_dataset['X_train'] y_train = cheese_disease_dataset['y_train'] mod = TorchRNNClassifier(vocab=vocab, embed_dim=50, hidden_dim=50, max_iter=200) mod.fit(X_train, y_train) X_test = cheese_disease_dataset['X_train'] y_test = cheese_disease_dataset['y_train'] pred = mod.predict(X_test) acc = accuracy_score(y_test, pred) assert acc > 0.80
def test_simple_example_params(X_sequence, param, expected): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab, **{param: expected}) if param == "use_embedding" and expected == False: embedding = np.random.uniform( low=-1.0, high=1.0, size=(len(vocab), 60)) X_train = [[embedding[vocab.index(w)] for w in ex] for ex in X_train] X_test = [[embedding[vocab.index(w)] for w in ex] for ex in X_test] mod.fit(X_train, y_train) preds = mod.predict(X_test) acc = accuracy_score(y_test, preds) if not (param == "max_iter" and expected == 0): assert acc >= 0.60
def fit_rnn_classifier(X, y): sst_glove_vocab = get_vocab(X, n_words=10) # sst_glove_vocab = get_vocab(X, n_words=10000) mod = TorchRNNClassifier( sst_glove_vocab, eta=0.05, embedding=None, batch_size=1000, embed_dim=50, hidden_dim=50, max_iter=5, l2_strength=0.001, bidirectional=True, hidden_activation=nn.ReLU()) mod.fit(X, y) return mod
def fit_simple_chained_rnn(X, y): vocab = utils.get_vocab(X, n_words=10000) mod = TorchRNNClassifier(vocab, hidden_dim=50, max_iter=10) mod.fit(X, y) return mod
class RNN_Classifier: ''' Modified torch rnn classifier wrapper class for initial fitting and then fine tuning of weights. ''' def __init__(self, sent140_train_X_list, sent140_dev_X_list, sent140_train_Y, sent140_dev_Y, sent140_train_embedding, sent140_train_glove_vocab, emoji_train_X_list, emoji_dev_X_list, emoji_test_X_list, emoji_train_Y, emoji_dev_Y, emoji_test_Y, sent140_emoji_train_embedding, sent140_emoji_train_glove_vocab, emojiless_train_X_list, emojiless_dev_X_list, emojiless_test_X_list, emojiless_train_Y, emojiless_dev_Y, emojiless_test_Y, sent140_emojiless_train_embedding, sent140_emojiless_train_glove_vocab, testing): ''' Pass in initial data for fitting to constructor. Later adding passing logisitic regression parameters into constructor. ''' self.testing = testing self.sent140_train_X = sent140_train_X_list self.sent140_train_Y = sent140_train_Y self.sent140_dev_X = sent140_dev_X_list self.sent140_dev_Y = sent140_dev_Y self.emoji_train_X = emoji_train_X_list self.emoji_train_Y = emoji_train_Y self.emoji_dev_X = emoji_dev_X_list self.emoji_dev_Y = emoji_dev_Y if self.testing: self.emoji_test_X = emoji_test_X_list self.emoji_test_Y = emoji_test_Y self.emojiless_train_X = emojiless_train_X_list self.emojiless_train_Y = emojiless_train_Y self.emojiless_dev_X = emojiless_dev_X_list self.emojiless_dev_Y = emojiless_dev_Y if self.testing: self.emojiless_test_X = emojiless_test_X_list self.emojiless_test_Y = emojiless_test_Y # embeddings and vocabs self.sent140_train_embedding = sent140_train_embedding self.sent140_train_glove_vocab = sent140_train_glove_vocab self.sent140_emoji_train_embedding = sent140_emoji_train_embedding self.sent140_emoji_train_glove_vocab = sent140_emoji_train_glove_vocab self.sent140_emojiless_train_embedding = sent140_emojiless_train_embedding self.sent140_emojiless_train_glove_vocab = sent140_emojiless_train_glove_vocab # pass in model parameters for to constructor? def run_sent140(self): ''' Trained on sent140, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model #self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding, bidirectional=True) self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding) # train self.model_sent140.fit(self.sent140_train_X, self.sent140_train_Y) # test on sent140 #sent140_train_preds = self.model_sent140.predict(self.sent140_train_X) #sent140_dev_preds = self.model_sent140.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140.predict(self.emoji_test_X) else: emoji_test_preds = None #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds) def run_sent140_emojiless(self): ''' Trained on sent140, fine-tuned on emojiless, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model #self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding, bidirectional=True) self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding) # train # combine features combined_train_X = self.sent140_train_X + self.emojiless_train_X combined_train_Y = self.sent140_train_Y + self.emojiless_train_Y self.model_sent140_emojiless.fit(combined_train_X, combined_train_Y) # test on sent140 #sent140_train_preds = self.model_sent140_emojiless.predict(self.sent140_train_X) #sent140_dev_preds = self.model_sent140_emojiless.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emojiless.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emojiless.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emojiless.predict(self.emoji_test_X) else: emoji_test_preds = None #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds) def run_sent140_emoji(self): ''' Trained on sent140, fine-tuned on emoji, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model #self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding, bidirectional=True) self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding) # train combined_train_X = self.sent140_train_X + self.emoji_train_X combined_train_Y = self.sent140_train_Y + self.emoji_train_Y self.model_sent140_emoji.fit(combined_train_X, combined_train_Y) # test on sent140 #sent140_train_preds = self.model_sent140_emoji.predict(self.sent140_train_X) #sent140_dev_preds = self.model_sent140_emoji.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emoji.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emoji.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emoji.predict(self.emoji_test_X) else: emoji_test_preds = None #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)