예제 #1
0
def fit_shallow_neural_classifier_with_crossvalidation(X, y):
    basemod = TorchShallowNeuralClassifier(max_iter=50)
    cv = 3
    param_grid = {'hidden_dim': [25, 50, 100]}
    best_mod = utils.fit_classifier_with_crossvalidation(
        X, y, basemod, cv, param_grid)
    return best_mod
예제 #2
0
def fit_softmax_with_crossvalidation(X, y):
    """A MaxEnt model of dataset with hyperparameter cross-validation.
    
    Parameters
    ----------
    X : 2d np.array
        The matrix of features, one example per row.
        
    y : list
        The list of labels for rows in `X`.   
    
    Returns
    -------
    sklearn.linear_model.LogisticRegression
        A trained model instance, the best model found.
    
    """
    basemod = LogisticRegression(fit_intercept=True,
                                 solver='liblinear',
                                 multi_class='auto')
    cv = 3
    param_grid = {'C': [0.4, 0.6, 0.8, 1.0], 'penalty': ['l1', 'l2']}
    best_mod = utils.fit_classifier_with_crossvalidation(
        X, y, basemod, cv, param_grid)
    return best_mod
def fit_basic_sgd_classifier_with_crossvalidation(X, y):
    basemod = BasicSGDClassifier()
    cv = 5
    param_grid = {'eta': [0.01, 0.1, 1.0], 'max_iter': [10]}
    best_mod = utils.fit_classifier_with_crossvalidation(
        X, y, basemod, cv, param_grid)
    return best_mod
def test_color_describer_cross_validation(color_describer_dataset):
    color_seqs, word_seqs, vocab = color_describer_dataset
    mod = torch_color_describer.ContextualColorDescriber(vocab,
                                                         embed_dim=10,
                                                         hidden_dim=10,
                                                         max_iter=100,
                                                         embedding=None)
    best_mod = utils.fit_classifier_with_crossvalidation(
        color_seqs,
        word_seqs,
        mod,
        cv=2,
        scoring=None,
        param_grid={'hidden_dim': [10, 20]})
def fit_softmax_with_crossvalidation(X, y):
    """A MaxEnt model of dataset with hyperparameter 
    cross-validation. Some notes:
        
    * 'fit_intercept': whether to include the class bias feature.
    * 'C': weight for the regularization term (smaller is more regularized).
    * 'penalty': type of regularization -- roughly, 'l1' ecourages small 
      sparse models, and 'l2' encourages the weights to conform to a 
      gaussian prior distribution.
    
    Other arguments can be cross-validated; see 
    http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html
    
    Parameters
    ----------
    X : 2d np.array
        The matrix of features, one example per row.
        
    y : list
        The list of labels for rows in `X`.   
    
    Returns
    -------
    sklearn.linear_model.LogisticRegression
        A trained model instance, the best model found.
    
    """
    basemod = LogisticRegression(fit_intercept=True,
                                 solver='liblinear',
                                 multi_class='auto')
    cv = 5
    param_grid = {
        'fit_intercept': [True, False],
        'C': [0.4, 0.6, 0.8, 1.0, 2.0, 3.0],
        'penalty': ['l1', 'l2']
    }
    best_mod = utils.fit_classifier_with_crossvalidation(
        X, y, basemod, cv, param_grid)
    return best_mod
예제 #6
0
def test_rnn_classifier_cross_validation(model_class, X_sequence):
    train, test, vocab = X_sequence
    mod = model_class(vocab, max_iter=2)
    X, y = zip(*train)
    best_mod = utils.fit_classifier_with_crossvalidation(
        X, y, mod, cv=2, param_grid={'hidden_dim': [10, 20]})
예제 #7
0
# In[40]:

for timestep in toy_proba[0]:
    print(dict(zip(toy_vocab, timestep)))

# ### Cross-validation

# You can use `utils.fit_classifier_with_crossvalidation` to cross-validate these models. Just be sure to set `scoring=None` so that the sklearn model selection methods use the `score` method of `ContextualColorDescriber`, which is an alias for `listener_accuracy`:

# In[41]:

best_mod = utils.fit_classifier_with_crossvalidation(
    toy_color_seqs_train,
    toy_word_seqs_train,
    toy_mod,
    cv=2,
    scoring=None,
    param_grid={'hidden_dim': [10, 20]})

# ## Baseline SCC model

# Just to show how all the pieces come together, here's a very basic SCC experiment using the core code and very simplistic assumptions (which you will revisit in the assignment) about how to represent the examples:

# To facilitate quick development, we'll restrict attention to the two-word examples:

# In[42]:

dev_corpus = ColorsCorpusReader(COLORS_SRC_FILENAME, word_count=2)

# In[43]: