def test_glmcv(distr): """Test GLMCV class.""" raises(ValueError, GLM, distr='blah') raises(ValueError, GLM, distr='gaussian', max_iter=1.8) scaler = StandardScaler() n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: if distr == 'gamma' and solver == 'cdfast': continue glm = GLMCV(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric, cv=2) assert (repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(glm.distr, beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.beta_ assert_allclose(beta, beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert (y_pred.shape[0] == X_train.shape[0]) # test picky score_metric check within fit(). glm.score_metric = 'bad_score_metric' # reuse last glm raises(ValueError, glm.fit, X_train, y_train)
def test_glmcv(): """Test GLMCV class.""" scaler = StandardScaler() n_samples, n_features = 100, 10 # coefficients beta0 = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0) beta = 1. / (np.float(n_features) + 1.) * \ np.random.normal(0.0, 1.0, (n_features,)) distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit', 'gamma'] solvers = ['batch-gradient', 'cdfast'] score_metric = 'pseudo_R2' learning_rate = 2e-1 for solver in solvers: for distr in distrs: if distr == 'gamma' and solver == 'cdfast': continue glm = GLMCV(distr, learning_rate=learning_rate, solver=solver, score_metric=score_metric) assert_true(repr(glm)) np.random.seed(glm.random_state) X_train = np.random.normal(0.0, 1.0, [n_samples, n_features]) y_train = simulate_glm(glm.distr, beta0, beta, X_train) X_train = scaler.fit_transform(X_train) glm.fit(X_train, y_train) beta_ = glm.beta_ assert_allclose(beta, beta_, atol=0.5) # check fit y_pred = glm.predict(scaler.transform(X_train)) assert_equal(y_pred.shape[0], X_train.shape[0])
# Split the data into training and test sets X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.33, random_state=0) ######################################################## # Fit a gaussian distributed GLM with elastic net regularization # use the default value for reg_lambda glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2') # fit model glm.fit(X_train, y_train) # score the test set prediction y_test_hat = glm.predict(X_test) print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test)) ######################################################## # Plot the true and predicted test set target values plt.plot(y_test[:50], 'ko-') plt.plot(y_test_hat[:50], 'ro-') plt.legend(['true', 'pred'], frameon=False) plt.xlabel('Counties') plt.ylabel('Per capita violent crime') plt.tick_params(axis='y', right='off') plt.tick_params(axis='x', top='off') ax = plt.gca() ax.spines['top'].set_visible(False)
# Split the data into training and test sets X_train, X_test, y_train, y_test = \ train_test_split(X, y, test_size=0.33, random_state=0) ######################################################## # Fit a gaussian distributed GLM with elastic net regularization # use the default value for reg_lambda glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2') # fit model glm.fit(X_train, y_train) # score the test set prediction y_test_hat = glm.predict(X_test) print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test)) ######################################################## # Now use plain grid search cv to compare import numpy as np # noqa from sklearn.model_selection import GridSearchCV # noqa from sklearn.cross_validation import StratifiedKFold # noqa cv = StratifiedKFold(y_train, 3) reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10, base=np.exp(1)) param_grid = [{'reg_lambda': reg_lambda}]
beta0 = np.random.rand() beta = sp.sparse.random(1, n_features, density=0.2).toarray()[0] # simulate data Xtrain = np.random.normal(0.0, 1.0, [n_samples, n_features]) ytrain = simulate_glm('poisson', beta0, beta, Xtrain) Xtest = np.random.normal(0.0, 1.0, [n_samples, n_features]) ytest = simulate_glm('poisson', beta0, beta, Xtest) # create an instance of the GLM class glm = GLM(distr='poisson') # fit the model on the training data glm.fit(Xtrain, ytrain) # predict using fitted model on the test data yhat = glm.predict(Xtest) plt.figure() plt.plot(ytest) plt.plot(yhat) # %% ############################################################################### # Ground truth with kernel-circuit, then infer the kernels ############################################################################### # %% ### GLM network simulation def GLM_net(allK, dcs, S): """ Simulate a GLM network given all response and coupling kernels and the stimulus