def test_model(X_sequence):
    """Just makes sure that this code will run; it doesn't check that
    it is creating good models.
    """
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab=vocab, max_iter=100)
    mod.fit(X_train, y_train)
    mod.predict(X_test)
    mod.predict_proba(X_test)
def test_torch_rnn_classifier_save_load(X_sequence):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab=vocab, max_iter=2)
    mod.fit(X_train, y_train)
    mod.predict(X_test)
    with tempfile.NamedTemporaryFile(mode='wb') as f:
        name = f.name
        mod.to_pickle(name)
        mod2 = TorchRNNClassifier.from_pickle(name)
        mod2.predict(X_test)
        mod2.fit(X_test, y_test)
Exemplo n.º 3
0
def test_cheese_disease(cheese_disease_dataset):
    vocab = cheese_disease_dataset['vocab']
    X_train = cheese_disease_dataset['X_train']
    y_train = cheese_disease_dataset['y_train']
    mod = TorchRNNClassifier(vocab=vocab,
                             embed_dim=50,
                             hidden_dim=50,
                             max_iter=200)
    mod.fit(X_train, y_train)
    X_test = cheese_disease_dataset['X_train']
    y_test = cheese_disease_dataset['y_train']
    pred = mod.predict(X_test)
    acc = accuracy_score(y_test, pred)
    assert acc > 0.80
def test_simple_example_params(X_sequence, param, expected):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab, **{param: expected})

    if param == "use_embedding" and expected == False:
        embedding = np.random.uniform(
            low=-1.0, high=1.0, size=(len(vocab), 60))
        X_train = [[embedding[vocab.index(w)] for w in ex] for ex in X_train]
        X_test = [[embedding[vocab.index(w)] for w in ex] for ex in X_test]

    mod.fit(X_train, y_train)
    preds = mod.predict(X_test)
    acc = accuracy_score(y_test, preds)
    if not (param == "max_iter" and expected == 0):
        assert acc >= 0.60
# In[27]:

torch_rnn = TorchRNNClassifier(sst_train_vocab,
                               embed_dim=50,
                               hidden_dim=50,
                               max_iter=50,
                               eta=0.05)

# In[28]:

get_ipython().run_line_magic('time',
                             '_ = torch_rnn.fit(X_rnn_train, y_rnn_train)')

# In[29]:

torch_rnn_dev_predictions = torch_rnn.predict(X_rnn_dev)

# In[30]:

print(classification_report(y_rnn_dev, torch_rnn_dev_predictions))

# ### Pretrained embeddings

# With `embedding=None`, `RNNClassifier`, `TorchRNNClassifier` and `TfRNNClassifier` create random embeddings in which the values are drawn from a uniform distribution with bounds `[-1, 1)`. You can also pass in an embedding, as long as you make sure it has the right vocabulary. The utility `utils.create_pretrained_embedding` will help with that:

# In[31]:

glove_embedding, sst_glove_vocab = utils.create_pretrained_embedding(
    glove_lookup, sst_train_vocab)

# Here's an illustration using `TorchRNNClassifier`:
Exemplo n.º 6
0
get_ipython().run_line_magic('time', '_ = elmo_rnn.fit(X_elmo_train, y_elmo_train)')


# Evaluation proceeds in the usual way:

# In[54]:


X_elmo_dev = elmo_layer_reduce_top(X_elmo_dev_layers)


# In[55]:


elmo_rnn_preds = elmo_rnn.predict(X_elmo_dev)


# In[56]:


print(classification_report(y_elmo_dev, elmo_rnn_preds, digits=3))


# #### Using the SST experiment framework with ELMo
# 
# To round things out, here's an example of how to use `sst.experiment` with ELMo, for more compact and maintainable experiment code:

# In[57]:

Exemplo n.º 7
0
class RNN_Classifier:
    '''
    Modified torch rnn classifier wrapper class for initial fitting and then fine tuning of weights.
    '''

    def __init__(self, sent140_train_X_list, sent140_dev_X_list, sent140_train_Y, sent140_dev_Y, sent140_train_embedding, sent140_train_glove_vocab, emoji_train_X_list, emoji_dev_X_list, emoji_test_X_list, emoji_train_Y, emoji_dev_Y, emoji_test_Y, sent140_emoji_train_embedding, sent140_emoji_train_glove_vocab, emojiless_train_X_list, emojiless_dev_X_list, emojiless_test_X_list, emojiless_train_Y, emojiless_dev_Y, emojiless_test_Y, sent140_emojiless_train_embedding, sent140_emojiless_train_glove_vocab, testing):
        '''
        Pass in initial data for fitting to constructor. Later adding passing logisitic regression
        parameters into constructor.
        '''
        self.testing = testing

        self.sent140_train_X = sent140_train_X_list
        self.sent140_train_Y = sent140_train_Y
        self.sent140_dev_X = sent140_dev_X_list
        self.sent140_dev_Y = sent140_dev_Y

        self.emoji_train_X = emoji_train_X_list
        self.emoji_train_Y = emoji_train_Y
        self.emoji_dev_X = emoji_dev_X_list
        self.emoji_dev_Y = emoji_dev_Y
        if self.testing:
            self.emoji_test_X = emoji_test_X_list
            self.emoji_test_Y = emoji_test_Y
        
        self.emojiless_train_X = emojiless_train_X_list
        self.emojiless_train_Y = emojiless_train_Y
        self.emojiless_dev_X = emojiless_dev_X_list
        self.emojiless_dev_Y = emojiless_dev_Y
        if self.testing:
            self.emojiless_test_X = emojiless_test_X_list
            self.emojiless_test_Y = emojiless_test_Y

        # embeddings and vocabs
        self.sent140_train_embedding = sent140_train_embedding
        self.sent140_train_glove_vocab = sent140_train_glove_vocab
        self.sent140_emoji_train_embedding = sent140_emoji_train_embedding
        self.sent140_emoji_train_glove_vocab = sent140_emoji_train_glove_vocab
        self.sent140_emojiless_train_embedding = sent140_emojiless_train_embedding
        self.sent140_emojiless_train_glove_vocab = sent140_emojiless_train_glove_vocab

        # pass in model parameters for to constructor?
    

    def run_sent140(self):
        '''
        Trained on sent140, predict on emoji
        Report score on sent 140 too, just because it's intersting
        '''
        # model
        #self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding, bidirectional=True)
        self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding)
        
        # train
        self.model_sent140.fit(self.sent140_train_X, self.sent140_train_Y)

        # test on sent140
        #sent140_train_preds = self.model_sent140.predict(self.sent140_train_X)
        #sent140_dev_preds = self.model_sent140.predict(self.sent140_dev_X)

        # test on emoji
        emoji_train_preds = self.model_sent140.predict(self.emoji_train_X)
        emoji_dev_preds = self.model_sent140.predict(self.emoji_dev_X)
        if self.testing:
            emoji_test_preds = self.model_sent140.predict(self.emoji_test_X)
        else:
            emoji_test_preds = None

        #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
        return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
    

    def run_sent140_emojiless(self):
        '''
        Trained on sent140, fine-tuned on emojiless, predict on emoji
        Report score on sent 140 too, just because it's intersting
        '''
        # model
        #self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding, bidirectional=True)
        self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding)
        
        # train
        # combine features
        combined_train_X = self.sent140_train_X + self.emojiless_train_X
        combined_train_Y = self.sent140_train_Y + self.emojiless_train_Y
        self.model_sent140_emojiless.fit(combined_train_X, combined_train_Y)
        
        # test on sent140
        #sent140_train_preds = self.model_sent140_emojiless.predict(self.sent140_train_X)
        #sent140_dev_preds = self.model_sent140_emojiless.predict(self.sent140_dev_X)
        
        # test on emoji
        emoji_train_preds = self.model_sent140_emojiless.predict(self.emoji_train_X)
        emoji_dev_preds = self.model_sent140_emojiless.predict(self.emoji_dev_X)        
        if self.testing:
            emoji_test_preds = self.model_sent140_emojiless.predict(self.emoji_test_X)
        else:
            emoji_test_preds = None
        
        #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
        return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)

    
    def run_sent140_emoji(self):
        '''
        Trained on sent140, fine-tuned on emoji, predict on emoji
        Report score on sent 140 too, just because it's intersting
        '''
        # model
        #self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding, bidirectional=True)
        self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding)
        
        # train
        combined_train_X = self.sent140_train_X + self.emoji_train_X
        combined_train_Y = self.sent140_train_Y + self.emoji_train_Y
        self.model_sent140_emoji.fit(combined_train_X, combined_train_Y)

        # test on sent140
        #sent140_train_preds = self.model_sent140_emoji.predict(self.sent140_train_X)
        #sent140_dev_preds = self.model_sent140_emoji.predict(self.sent140_dev_X)
        
        # test on emoji
        emoji_train_preds = self.model_sent140_emoji.predict(self.emoji_train_X)
        emoji_dev_preds = self.model_sent140_emoji.predict(self.emoji_dev_X)
        if self.testing:
            emoji_test_preds = self.model_sent140_emoji.predict(self.emoji_test_X)
        else:
            emoji_test_preds = None
        
        #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
        return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)