def test_model_graph_dimensions(X_sequence, mod_attr, graph_attr):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab, max_iter=1)
    mod.fit(X_train, y_train)
    mod_attr_val = getattr(mod, mod_attr)
    graph_attr_val = getattr(mod.model.rnn.rnn, graph_attr)
    assert mod_attr_val == graph_attr_val
Exemplo n.º 2
0
def fit_elmo_rnn(X, y):
    mod = TorchRNNClassifier(
        vocab=[],
        max_iter=50,
        use_embedding=False)
    mod.fit(X, y)
    return mod
def test_predict_functions_honor_device(X_sequence, func):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab, max_iter=2)
    mod.fit(X_train, y_train)
    prediction_func = getattr(mod, func)
    with pytest.raises(RuntimeError):
        prediction_func(X_test, device="FAKE_DEVICE")
Exemplo n.º 4
0
def fit_hf_rnn(X, y):
    mod = TorchRNNClassifier(
        vocab=[],
        max_iter=50, 
        hidden_dim=50,
        use_embedding=False)  # Pass in the BERT hidden states directly!
    mod.fit(X, y)
    return mod
def test_model(X_sequence):
    """Just makes sure that this code will run; it doesn't check that
    it is creating good models.
    """
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab=vocab, max_iter=100)
    mod.fit(X_train, y_train)
    mod.predict(X_test)
    mod.predict_proba(X_test)
def test_predict_restores_device(X_sequence, func):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab, max_iter=2)
    mod.fit(X_train, y_train)
    current_device = mod.device
    assert current_device != torch.device("cpu:0")
    prediction_func = getattr(mod, func)
    prediction_func(X_test, device="cpu:0")
    assert mod.device == current_device
def test_torch_rnn_classifier_save_load(X_sequence):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab=vocab, max_iter=2)
    mod.fit(X_train, y_train)
    mod.predict(X_test)
    with tempfile.NamedTemporaryFile(mode='wb') as f:
        name = f.name
        mod.to_pickle(name)
        mod2 = TorchRNNClassifier.from_pickle(name)
        mod2.predict(X_test)
        mod2.fit(X_test, y_test)
Exemplo n.º 8
0
def test_embedding_update_control(X_sequence, freeze, outcome):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    embed_dim = 5
    embedding = np.ones((len(vocab), embed_dim))
    mod = TorchRNNClassifier(vocab,
                             max_iter=10,
                             embedding=embedding,
                             freeze_embedding=freeze)
    mod.fit(X_train, y_train)
    graph_emb = mod.model.rnn.embedding.weight.detach().cpu().numpy()
    assert np.array_equal(embedding, graph_emb) == outcome
Exemplo n.º 9
0
def test_pretrained_embedding(X_sequence):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    embed_dim = 5
    embedding = np.ones((len(vocab), embed_dim))
    mod = TorchRNNClassifier(vocab,
                             max_iter=1,
                             embedding=embedding,
                             freeze_embedding=True)
    mod.fit(X_train, y_train)
    graph_emb = mod.model.rnn.embedding.weight.detach().cpu().numpy()
    assert np.array_equal(embedding, graph_emb)
Exemplo n.º 10
0
def test_cheese_disease(cheese_disease_dataset):
    vocab = cheese_disease_dataset['vocab']
    X_train = cheese_disease_dataset['X_train']
    y_train = cheese_disease_dataset['y_train']
    mod = TorchRNNClassifier(vocab=vocab,
                             embed_dim=50,
                             hidden_dim=50,
                             max_iter=200)
    mod.fit(X_train, y_train)
    X_test = cheese_disease_dataset['X_train']
    y_test = cheese_disease_dataset['y_train']
    pred = mod.predict(X_test)
    acc = accuracy_score(y_test, pred)
    assert acc > 0.80
def test_simple_example_params(X_sequence, param, expected):
    X_train, X_test, y_train, y_test, vocab = X_sequence
    mod = TorchRNNClassifier(vocab, **{param: expected})

    if param == "use_embedding" and expected == False:
        embedding = np.random.uniform(
            low=-1.0, high=1.0, size=(len(vocab), 60))
        X_train = [[embedding[vocab.index(w)] for w in ex] for ex in X_train]
        X_test = [[embedding[vocab.index(w)] for w in ex] for ex in X_test]

    mod.fit(X_train, y_train)
    preds = mod.predict(X_test)
    acc = accuracy_score(y_test, preds)
    if not (param == "max_iter" and expected == 0):
        assert acc >= 0.60
Exemplo n.º 12
0
def fit_rnn_classifier(X, y):
    sst_glove_vocab = get_vocab(X, n_words=10)
    #     sst_glove_vocab = get_vocab(X, n_words=10000)
    mod = TorchRNNClassifier(
                             sst_glove_vocab,
                             eta=0.05,
                             embedding=None,
                             batch_size=1000,
                             embed_dim=50,
                             hidden_dim=50,
                             max_iter=5,
                             l2_strength=0.001,
                             bidirectional=True,
                             hidden_activation=nn.ReLU())
    mod.fit(X, y)
    return mod
Exemplo n.º 13
0
def fit_simple_chained_rnn(X, y):
    vocab = utils.get_vocab(X, n_words=10000)
    mod = TorchRNNClassifier(vocab, hidden_dim=50, max_iter=10)
    mod.fit(X, y)
    return mod
Exemplo n.º 14
0
class RNN_Classifier:
    '''
    Modified torch rnn classifier wrapper class for initial fitting and then fine tuning of weights.
    '''

    def __init__(self, sent140_train_X_list, sent140_dev_X_list, sent140_train_Y, sent140_dev_Y, sent140_train_embedding, sent140_train_glove_vocab, emoji_train_X_list, emoji_dev_X_list, emoji_test_X_list, emoji_train_Y, emoji_dev_Y, emoji_test_Y, sent140_emoji_train_embedding, sent140_emoji_train_glove_vocab, emojiless_train_X_list, emojiless_dev_X_list, emojiless_test_X_list, emojiless_train_Y, emojiless_dev_Y, emojiless_test_Y, sent140_emojiless_train_embedding, sent140_emojiless_train_glove_vocab, testing):
        '''
        Pass in initial data for fitting to constructor. Later adding passing logisitic regression
        parameters into constructor.
        '''
        self.testing = testing

        self.sent140_train_X = sent140_train_X_list
        self.sent140_train_Y = sent140_train_Y
        self.sent140_dev_X = sent140_dev_X_list
        self.sent140_dev_Y = sent140_dev_Y

        self.emoji_train_X = emoji_train_X_list
        self.emoji_train_Y = emoji_train_Y
        self.emoji_dev_X = emoji_dev_X_list
        self.emoji_dev_Y = emoji_dev_Y
        if self.testing:
            self.emoji_test_X = emoji_test_X_list
            self.emoji_test_Y = emoji_test_Y
        
        self.emojiless_train_X = emojiless_train_X_list
        self.emojiless_train_Y = emojiless_train_Y
        self.emojiless_dev_X = emojiless_dev_X_list
        self.emojiless_dev_Y = emojiless_dev_Y
        if self.testing:
            self.emojiless_test_X = emojiless_test_X_list
            self.emojiless_test_Y = emojiless_test_Y

        # embeddings and vocabs
        self.sent140_train_embedding = sent140_train_embedding
        self.sent140_train_glove_vocab = sent140_train_glove_vocab
        self.sent140_emoji_train_embedding = sent140_emoji_train_embedding
        self.sent140_emoji_train_glove_vocab = sent140_emoji_train_glove_vocab
        self.sent140_emojiless_train_embedding = sent140_emojiless_train_embedding
        self.sent140_emojiless_train_glove_vocab = sent140_emojiless_train_glove_vocab

        # pass in model parameters for to constructor?
    

    def run_sent140(self):
        '''
        Trained on sent140, predict on emoji
        Report score on sent 140 too, just because it's intersting
        '''
        # model
        #self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding, bidirectional=True)
        self.model_sent140 = TorchRNNClassifier(self.sent140_train_glove_vocab, embedding=self.sent140_train_embedding)
        
        # train
        self.model_sent140.fit(self.sent140_train_X, self.sent140_train_Y)

        # test on sent140
        #sent140_train_preds = self.model_sent140.predict(self.sent140_train_X)
        #sent140_dev_preds = self.model_sent140.predict(self.sent140_dev_X)

        # test on emoji
        emoji_train_preds = self.model_sent140.predict(self.emoji_train_X)
        emoji_dev_preds = self.model_sent140.predict(self.emoji_dev_X)
        if self.testing:
            emoji_test_preds = self.model_sent140.predict(self.emoji_test_X)
        else:
            emoji_test_preds = None

        #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
        return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
    

    def run_sent140_emojiless(self):
        '''
        Trained on sent140, fine-tuned on emojiless, predict on emoji
        Report score on sent 140 too, just because it's intersting
        '''
        # model
        #self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding, bidirectional=True)
        self.model_sent140_emojiless = TorchRNNClassifier(self.sent140_emojiless_train_glove_vocab, embedding=self.sent140_emojiless_train_embedding)
        
        # train
        # combine features
        combined_train_X = self.sent140_train_X + self.emojiless_train_X
        combined_train_Y = self.sent140_train_Y + self.emojiless_train_Y
        self.model_sent140_emojiless.fit(combined_train_X, combined_train_Y)
        
        # test on sent140
        #sent140_train_preds = self.model_sent140_emojiless.predict(self.sent140_train_X)
        #sent140_dev_preds = self.model_sent140_emojiless.predict(self.sent140_dev_X)
        
        # test on emoji
        emoji_train_preds = self.model_sent140_emojiless.predict(self.emoji_train_X)
        emoji_dev_preds = self.model_sent140_emojiless.predict(self.emoji_dev_X)        
        if self.testing:
            emoji_test_preds = self.model_sent140_emojiless.predict(self.emoji_test_X)
        else:
            emoji_test_preds = None
        
        #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
        return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)

    
    def run_sent140_emoji(self):
        '''
        Trained on sent140, fine-tuned on emoji, predict on emoji
        Report score on sent 140 too, just because it's intersting
        '''
        # model
        #self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding, bidirectional=True)
        self.model_sent140_emoji = TorchRNNClassifier(self.sent140_emoji_train_glove_vocab, embedding=self.sent140_emoji_train_embedding)
        
        # train
        combined_train_X = self.sent140_train_X + self.emoji_train_X
        combined_train_Y = self.sent140_train_Y + self.emoji_train_Y
        self.model_sent140_emoji.fit(combined_train_X, combined_train_Y)

        # test on sent140
        #sent140_train_preds = self.model_sent140_emoji.predict(self.sent140_train_X)
        #sent140_dev_preds = self.model_sent140_emoji.predict(self.sent140_dev_X)
        
        # test on emoji
        emoji_train_preds = self.model_sent140_emoji.predict(self.emoji_train_X)
        emoji_dev_preds = self.model_sent140_emoji.predict(self.emoji_dev_X)
        if self.testing:
            emoji_test_preds = self.model_sent140_emoji.predict(self.emoji_test_X)
        else:
            emoji_test_preds = None
        
        #return (sent140_train_preds, sent140_dev_preds, emoji_train_preds, emoji_dev_preds, emoji_test_preds)
        return (None, None, emoji_train_preds, emoji_dev_preds, emoji_test_preds)