Exemplo n.º 1
0
def test_glmcv(distr):
    """Test GLMCV class."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:

        if distr == 'gamma' and solver == 'cdfast':
            continue

        glm = GLMCV(distr,
                    learning_rate=learning_rate,
                    solver=solver,
                    score_metric=score_metric,
                    cv=2)

        assert (repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(glm.distr, beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.beta_
        assert_allclose(beta, beta_, atol=0.5)  # check fit

        y_pred = glm.predict(scaler.transform(X_train))
        assert (y_pred.shape[0] == X_train.shape[0])

    # test picky score_metric check within fit().
    glm.score_metric = 'bad_score_metric'  # reuse last glm
    raises(ValueError, glm.fit, X_train, y_train)
Exemplo n.º 2
0
def test_glmcv():
    """Test GLMCV class."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit', 'gamma']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            if distr == 'gamma' and solver == 'cdfast':
                continue

            glm = GLMCV(distr,
                        learning_rate=learning_rate,
                        solver=solver,
                        score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])
Exemplo n.º 3
0
def test_glmcv():
    """Test GLMCV class."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit', 'gamma']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            if distr == 'gamma' and solver == 'cdfast':
                continue

            glm = GLMCV(distr, learning_rate=learning_rate,
                        solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])
Exemplo n.º 4
0
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm.predict(X_test)
print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test))

########################################################
# Plot the true and predicted test set target values

plt.plot(y_test[:50], 'ko-')
plt.plot(y_test_hat[:50], 'ro-')
plt.legend(['true', 'pred'], frameon=False)
plt.xlabel('Counties')
plt.ylabel('Per capita violent crime')

plt.tick_params(axis='y', right='off')
plt.tick_params(axis='x', top='off')
ax = plt.gca()
ax.spines['top'].set_visible(False)
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLMCV(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm.predict(X_test)
print ("test set pseudo $R^2$ = %f" % glm.score(X_test, y_test))

########################################################
# Now use plain grid search cv to compare

import numpy as np # noqa
from sklearn.model_selection import GridSearchCV # noqa
from sklearn.cross_validation import StratifiedKFold # noqa

cv = StratifiedKFold(y_train, 3)

reg_lambda = np.logspace(np.log(0.5), np.log(0.01), 10,
                         base=np.exp(1))
param_grid = [{'reg_lambda': reg_lambda}]
Exemplo n.º 6
0
beta0 = np.random.rand()
beta = sp.sparse.random(1, n_features, density=0.2).toarray()[0]
# simulate data
Xtrain = np.random.normal(0.0, 1.0, [n_samples, n_features])
ytrain = simulate_glm('poisson', beta0, beta, Xtrain)
Xtest = np.random.normal(0.0, 1.0, [n_samples, n_features])
ytest = simulate_glm('poisson', beta0, beta, Xtest)

# create an instance of the GLM class
glm = GLM(distr='poisson')

# fit the model on the training data
glm.fit(Xtrain, ytrain)

# predict using fitted model on the test data
yhat = glm.predict(Xtest)

plt.figure()
plt.plot(ytest)
plt.plot(yhat)


# %%
###############################################################################
# Ground truth with kernel-circuit, then infer the kernels
###############################################################################
# %%
### GLM network simulation
def GLM_net(allK, dcs, S):
    """
    Simulate a GLM network given all response and coupling kernels and the stimulus