def fit_shallow_neural_classifier_with_crossvalidation(X, y): basemod = TorchShallowNeuralClassifier(max_iter=50) cv = 3 param_grid = {'hidden_dim': [25, 50, 100]} best_mod = utils.fit_classifier_with_crossvalidation( X, y, basemod, cv, param_grid) return best_mod
def fit_softmax_with_crossvalidation(X, y): """A MaxEnt model of dataset with hyperparameter cross-validation. Parameters ---------- X : 2d np.array The matrix of features, one example per row. y : list The list of labels for rows in `X`. Returns ------- sklearn.linear_model.LogisticRegression A trained model instance, the best model found. """ basemod = LogisticRegression(fit_intercept=True, solver='liblinear', multi_class='auto') cv = 3 param_grid = {'C': [0.4, 0.6, 0.8, 1.0], 'penalty': ['l1', 'l2']} best_mod = utils.fit_classifier_with_crossvalidation( X, y, basemod, cv, param_grid) return best_mod
def fit_basic_sgd_classifier_with_crossvalidation(X, y): basemod = BasicSGDClassifier() cv = 5 param_grid = {'eta': [0.01, 0.1, 1.0], 'max_iter': [10]} best_mod = utils.fit_classifier_with_crossvalidation( X, y, basemod, cv, param_grid) return best_mod
def test_color_describer_cross_validation(color_describer_dataset): color_seqs, word_seqs, vocab = color_describer_dataset mod = torch_color_describer.ContextualColorDescriber(vocab, embed_dim=10, hidden_dim=10, max_iter=100, embedding=None) best_mod = utils.fit_classifier_with_crossvalidation( color_seqs, word_seqs, mod, cv=2, scoring=None, param_grid={'hidden_dim': [10, 20]})
def fit_softmax_with_crossvalidation(X, y): """A MaxEnt model of dataset with hyperparameter cross-validation. Some notes: * 'fit_intercept': whether to include the class bias feature. * 'C': weight for the regularization term (smaller is more regularized). * 'penalty': type of regularization -- roughly, 'l1' ecourages small sparse models, and 'l2' encourages the weights to conform to a gaussian prior distribution. Other arguments can be cross-validated; see http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html Parameters ---------- X : 2d np.array The matrix of features, one example per row. y : list The list of labels for rows in `X`. Returns ------- sklearn.linear_model.LogisticRegression A trained model instance, the best model found. """ basemod = LogisticRegression(fit_intercept=True, solver='liblinear', multi_class='auto') cv = 5 param_grid = { 'fit_intercept': [True, False], 'C': [0.4, 0.6, 0.8, 1.0, 2.0, 3.0], 'penalty': ['l1', 'l2'] } best_mod = utils.fit_classifier_with_crossvalidation( X, y, basemod, cv, param_grid) return best_mod
def test_rnn_classifier_cross_validation(model_class, X_sequence): train, test, vocab = X_sequence mod = model_class(vocab, max_iter=2) X, y = zip(*train) best_mod = utils.fit_classifier_with_crossvalidation( X, y, mod, cv=2, param_grid={'hidden_dim': [10, 20]})
# In[40]: for timestep in toy_proba[0]: print(dict(zip(toy_vocab, timestep))) # ### Cross-validation # You can use `utils.fit_classifier_with_crossvalidation` to cross-validate these models. Just be sure to set `scoring=None` so that the sklearn model selection methods use the `score` method of `ContextualColorDescriber`, which is an alias for `listener_accuracy`: # In[41]: best_mod = utils.fit_classifier_with_crossvalidation( toy_color_seqs_train, toy_word_seqs_train, toy_mod, cv=2, scoring=None, param_grid={'hidden_dim': [10, 20]}) # ## Baseline SCC model # Just to show how all the pieces come together, here's a very basic SCC experiment using the core code and very simplistic assumptions (which you will revisit in the assignment) about how to represent the examples: # To facilitate quick development, we'll restrict attention to the two-word examples: # In[42]: dev_corpus = ColorsCorpusReader(COLORS_SRC_FILENAME, word_count=2) # In[43]: