def test_model(X_sequence): """Just makes sure that this code will run; it doesn't check that it is creating good models. """ X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab=vocab, max_iter=100) mod.fit(X_train, y_train) mod.predict(X_test) mod.predict_proba(X_test)
def test_torch_rnn_classifier_save_load(X_sequence): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab=vocab, max_iter=2) mod.fit(X_train, y_train) mod.predict(X_test) with tempfile.NamedTemporaryFile(mode='wb') as f: name = f.name mod.to_pickle(name) mod2 = TorchRNNClassifier.from_pickle(name) mod2.predict(X_test) mod2.fit(X_test, y_test)
def test_cheese_disease(cheese_disease_dataset): vocab = cheese_disease_dataset['vocab'] X_train = cheese_disease_dataset['X_train'] y_train = cheese_disease_dataset['y_train'] mod = TorchRNNClassifier(vocab=vocab, embed_dim=50, hidden_dim=50, max_iter=200) mod.fit(X_train, y_train) X_test = cheese_disease_dataset['X_train'] y_test = cheese_disease_dataset['y_train'] pred = mod.predict(X_test) acc = accuracy_score(y_test, pred) assert acc > 0.80
def test_simple_example_params(X_sequence, param, expected): X_train, X_test, y_train, y_test, vocab = X_sequence mod = TorchRNNClassifier(vocab, **{param: expected}) if param == "use_embedding" and expected == False: embedding = np.random.uniform( low=-1.0, high=1.0, size=(len(vocab), 60)) X_train = [[embedding[vocab.index(w)] for w in ex] for ex in X_train] X_test = [[embedding[vocab.index(w)] for w in ex] for ex in X_test] mod.fit(X_train, y_train) preds = mod.predict(X_test) acc = accuracy_score(y_test, preds) if not (param == "max_iter" and expected == 0): assert acc >= 0.60
# In[27]: torch_rnn = TorchRNNClassifier(sst_train_vocab, embed_dim=50, hidden_dim=50, max_iter=50, eta=0.05) # In[28]: get_ipython().run_line_magic('time', '_ = torch_rnn.fit(X_rnn_train, y_rnn_train)') # In[29]: torch_rnn_dev_predictions = torch_rnn.predict(X_rnn_dev) # In[30]: print(classification_report(y_rnn_dev, torch_rnn_dev_predictions)) # ### Pretrained embeddings # With `embedding=None`, `RNNClassifier`, `TorchRNNClassifier` and `TfRNNClassifier` create random embeddings in which the values are drawn from a uniform distribution with bounds `[-1, 1)`. You can also pass in an embedding, as long as you make sure it has the right vocabulary. The utility `utils.create_pretrained_embedding` will help with that: # In[31]: glove_embedding, sst_glove_vocab = utils.create_pretrained_embedding( glove_lookup, sst_train_vocab) # Here's an illustration using `TorchRNNClassifier`:
get_ipython().run_line_magic('time', '_ = elmo_rnn.fit(X_elmo_train, y_elmo_train)') # Evaluation proceeds in the usual way: # In[54]: X_elmo_dev = elmo_layer_reduce_top(X_elmo_dev_layers) # In[55]: elmo_rnn_preds = elmo_rnn.predict(X_elmo_dev) # In[56]: print(classification_report(y_elmo_dev, elmo_rnn_preds, digits=3)) # #### Using the SST experiment framework with ELMo # # To round things out, here's an example of how to use `sst.experiment` with ELMo, for more compact and maintainable experiment code: # In[57]:
class RNN_Classifier: ''' Modified torch rnn classifier wrapper class for initial fitting and then fine tuning of weights. ''' def __init__(self, sent140_train_X_list, sent140_dev_X_list, sent140_train_Y, sent140_dev_Y, sent140_train_embedding, sent140_train_glove_vocab, emoji_train_X_list, emoji_dev_X_list, emoji_test_X_list, emoji_train_Y, emoji_dev_Y, emoji_test_Y, sent140_emoji_train_embedding, sent140_emoji_train_glove_vocab, emojiless_train_X_list, emojiless_dev_X_list, emojiless_test_X_list, emojiless_train_Y, emojiless_dev_Y, emojiless_test_Y, sent140_emojiless_train_embedding, sent140_emojiless_train_glove_vocab, testing): ''' Pass in initial data for fitting to constructor. Later adding passing logisitic regression parameters into constructor. ''' self.testing = testing self.sent140_train_X = sent140_train_X_list self.sent140_train_Y = sent140_train_Y self.sent140_dev_X = sent140_dev_X_list self.sent140_dev_Y = sent140_dev_Y self.emoji_train_X = emoji_train_X_list self.emoji_train_Y = emoji_train_Y self.emoji_dev_X = emoji_dev_X_list self.emoji_dev_Y = emoji_dev_Y if self.testing: self.emoji_test_X = emoji_test_X_list self.emoji_test_Y = emoji_test_Y self.emojiless_train_X = emojiless_train_X_list self.emojiless_train_Y = emojiless_train_Y self.emojiless_dev_X = emojiless_dev_X_list self.emojiless_dev_Y = emojiless_dev_Y if self.testing: self.emojiless_test_X = emojiless_test_X_list self.emojiless_test_Y = emojiless_test_Y # embeddings and vocabs self.sent140_train_embedding = sent140_train_embedding self.sent140_train_glove_vocab = sent140_train_glove_vocab self.sent140_emoji_train_embedding = sent140_emoji_train_embedding self.sent140_emoji_train_glove_vocab = sent140_emoji_train_glove_vocab self.sent140_emojiless_train_embedding = sent140_emojiless_train_embedding self.sent140_emojiless_train_glove_vocab = sent140_emojiless_train_glove_vocab # pass in model parameters for to constructor? def run_sent140(self): ''' Trained on sent140, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model #self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding, bidirectional=True) self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding) # train self.model_sent140.fit(self.sent140_train_X, self.sent140_train_Y) # test on sent140 #sent140_train_preds = self.model_sent140.predict(self.sent140_train_X) #sent140_dev_preds = self.model_sent140.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140.predict(self.emoji_test_X) else: emoji_test_preds = None #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds) def run_sent140_emojiless(self): ''' Trained on sent140, fine-tuned on emojiless, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model #self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding, bidirectional=True) self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding) # train # combine features combined_train_X = self.sent140_train_X + self.emojiless_train_X combined_train_Y = self.sent140_train_Y + self.emojiless_train_Y self.model_sent140_emojiless.fit(combined_train_X, combined_train_Y) # test on sent140 #sent140_train_preds = self.model_sent140_emojiless.predict(self.sent140_train_X) #sent140_dev_preds = self.model_sent140_emojiless.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emojiless.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emojiless.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emojiless.predict(self.emoji_test_X) else: emoji_test_preds = None #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds) def run_sent140_emoji(self): ''' Trained on sent140, fine-tuned on emoji, predict on emoji Report score on sent 140 too, just because it's intersting ''' # model #self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding, bidirectional=True) self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding) # train combined_train_X = self.sent140_train_X + self.emoji_train_X combined_train_Y = self.sent140_train_Y + self.emoji_train_Y self.model_sent140_emoji.fit(combined_train_X, combined_train_Y) # test on sent140 #sent140_train_preds = self.model_sent140_emoji.predict(self.sent140_train_X) #sent140_dev_preds = self.model_sent140_emoji.predict(self.sent140_dev_X) # test on emoji emoji_train_preds = self.model_sent140_emoji.predict(self.emoji_train_X) emoji_dev_preds = self.model_sent140_emoji.predict(self.emoji_dev_X) if self.testing: emoji_test_preds = self.model_sent140_emoji.predict(self.emoji_test_X) else: emoji_test_preds = None #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds) return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)