Пример #1
0
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1.)

    # simulate training data
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)
Пример #2
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred_proba = glm_mn.predict_proba(X)
    assert_equal(y_pred_proba.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred_proba[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
Пример #3
0
def test_compare_sklearn(solver):
    """Test results against sklearn."""
    def rmse(a, b):
        return np.sqrt(np.mean((a - b) ** 2))

    X, Y, coef_ = make_regression(
        n_samples=1000, n_features=500,
        noise=0.1, n_informative=10, coef=True,
        random_state=42)

    alpha = 0.1
    l1_ratio = 0.5

    clf = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, tol=1e-5)
    clf.fit(X, Y)
    glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha,
              solver=solver, tol=1e-6, max_iter=500)
    glm.fit(X, Y)

    y_sk = clf.predict(X)
    y_pg = glm.predict(X)
    assert abs(rmse(Y, y_sk) - rmse(Y, y_pg)) < 0.5

    glm = GLM(distr='gaussian', alpha=l1_ratio, reg_lambda=alpha,
              solver=solver, tol=1e-6, max_iter=5, fit_intercept=False)
    glm.fit(X, Y)
    assert glm.beta0_ == 0.

    glm.predict(X)
Пример #4
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate = 2e-1, tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(len(glm_mn.simulate(glm_mn.fit_[0]['beta0'],
                                  glm_mn.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
Пример #5
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm = GLM(distr='multinomial', reg_lambda=np.array([0.0, 0.1, 0.2]), tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])
    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm.grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert grad_beta0[0] != grad_beta0[1]
    glm.fit(X, y)
    y_pred = glm.predict(X)
    assert_equal(y_pred.shape, (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes
    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm.pseudo_R2(y, yhat, ynull) > 0.)
    glm.deviance(y, yhat)
    assert_equal(len(glm.simulate(glm.fit_[0]['beta0'],
                                  glm.fit_[0]['beta'],
                                  X)),
                 X.shape[0])
    # these should raise an exception
    try:
        glm.pseudo_R2(y, y, y)
        assert False
    except Exception:
        assert True
    try:
        glm.deviance(y, y)
        assert False
    except Exception:
        assert True
Пример #6
0
def glm_bernoulli_pyglmnet(Xr, Yr, Xt):
    #poissonexp isn't listed as an option for distr?
    #glm = GLM(distr='poissonexp', alpha=0., reg_lambda=[0.], tol=1e-6)
    glm = GLM(distr='binomial', alpha=0., reg_lambda=[0.], tol=1e-6)
    glm.fit(Xr, Yr)
    Yt = glm.predict(Xt)[0]
    return Yt
Пример #7
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred = glm_mn.predict(X)
    assert_equal(y_pred.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred[0]
    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]
    # pseudo_R2 should be greater than 0
    assert_true(glm_mn.score(y, yhat, ynull, method='pseudo_R2') > 0.)
    glm_mn.score(y, yhat)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])
    # these should raise an exception
    assert_raises(ValueError, glm_mn.score, y, y, y, 'pseudo_R2')
    assert_raises(ValueError, glm_mn.score, y, y, None, 'deviance')
Пример #8
0
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1.)

    # simulate training data
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)
Пример #9
0
def test_tikhonov():
    """Tikhonov regularization test."""
    n_samples, n_features = 100, 10

    # design covariance matrix of parameters
    Gam = 15.
    PriorCov = np.zeros([n_features, n_features])
    for i in np.arange(0, n_features):
        for j in np.arange(i, n_features):
            PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) *
                                    (np.float(i) - np.float(j)) ** 2)
            PriorCov[j, i] = PriorCov[i, j]
            if i == j:
                PriorCov[i, j] += 0.01
    PriorCov = 1. / np.max(PriorCov) * PriorCov

    # sample parameters as multivariate normal
    beta0 = np.random.randn()
    beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov)

    # sample train and test data
    glm_sim = GLM(distr='softplus', score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    from sklearn.cross_validation import train_test_split
    Xtrain, Xtest, ytrain, ytest = \
        train_test_split(X, y, test_size=0.5, random_state=42)

    # design tikhonov matrix
    [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False)
    Tau = np.dot(np.diag(1. / np.sqrt(S)), U)
    Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max()

    # fit model with batch gradient
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='batch-gradient',
                       tol=1e-5,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)

    # fit model with cdfast
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='cdfast',
                       tol=1e-5,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
Пример #10
0
def test_tikhonov():
    """Tikhonov regularization test."""
    n_samples, n_features = 100, 10

    # design covariance matrix of parameters
    Gam = 15.
    PriorCov = np.zeros([n_features, n_features])
    for i in np.arange(0, n_features):
        for j in np.arange(i, n_features):
            PriorCov[i, j] = np.exp(-Gam * 1. / (np.float(n_features) ** 2) *
                                    (np.float(i) - np.float(j)) ** 2)
            PriorCov[j, i] = PriorCov[i, j]
            if i == j:
                PriorCov[i, j] += 0.01
    PriorCov = 1. / np.max(PriorCov) * PriorCov

    # sample parameters as multivariate normal
    beta0 = np.random.randn()
    beta = np.random.multivariate_normal(np.zeros(n_features), PriorCov)

    # sample train and test data
    glm_sim = GLM(distr='softplus', score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    from sklearn.model_selection import train_test_split
    Xtrain, Xtest, ytrain, ytest = \
        train_test_split(X, y, test_size=0.5, random_state=42)

    # design tikhonov matrix
    [U, S, V] = np.linalg.svd(PriorCov, full_matrices=False)
    Tau = np.dot(np.diag(1. / np.sqrt(S)), U)
    Tau = 1. / np.sqrt(np.float(n_samples)) * Tau / Tau.max()

    # fit model with batch gradient
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='batch-gradient',
                       tol=1e-3,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)

    # fit model with cdfast
    glm_tikhonov = GLM(distr='softplus',
                       alpha=0.0,
                       Tau=Tau,
                       solver='cdfast',
                       tol=1e-3,
                       score_metric='pseudo_R2')
    glm_tikhonov.fit(Xtrain, ytrain)

    R2_train, R2_test = dict(), dict()
    R2_train['tikhonov'] = glm_tikhonov.score(Xtrain, ytrain)
    R2_test['tikhonov'] = glm_tikhonov.score(Xtest, ytest)
Пример #11
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(X_train, y_train)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='softplus')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Пример #12
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate, solver=solver)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='poisson')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Пример #13
0
def test_cv():
    """Simple CV check"""
    X, y = make_regression()
    model_mn = GLM(distr='normal', alpha=0.01, reg_lambda=np.array([0.0, 0.1, 0.2]))
    model_mn.fit(X, y)

    cv = KFold(X.shape[0], 5)

    # check that it returns 5 scores
    assert_equal(len(cross_val_score(model_mn, X, y, cv=cv, scoring=simple_cv_scorer)), 5)
Пример #14
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    for distr in distrs:

        # FIXME: why do we need such this learning rate for 'poissonexp'?
        learning_rate = 1e-5 if distr == 'poissonexp' else 1e-4
        glm = GLM(distr, learning_rate=learning_rate)

        assert_true(repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = glm.simulate(beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.fit_[-2]['beta'][:]
        assert_allclose(beta[:], beta_, atol=0.1)  # check fit
        density_ = np.sum(beta_ > 0.1) / float(n_features)
        assert_allclose(density_, density, atol=0.05)  # check density

        y_pred = glm.predict(scaler.transform(X_train))
        assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.deviance(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm = GLM(distr='poisson')
    assert_raises(ValueError, glm.__getitem__, 2)

    # test fit_predict
    glm.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm.fit_predict, X_train[None, ...], y_train)
Пример #15
0
def test_cv():
    """Simple CV check"""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()

    glm_normal = GLM(distr='gaussian', alpha=0.01,
                     reg_lambda=[0.0, 0.1, 0.2])
    glm_normal.fit(X, y)

    cv = KFold(X.shape[0], 5)

    # check that it returns 5 scores
    assert_equal(len(cross_val_score(glm_normal, X, y, cv=cv,
                 scoring=simple_cv_scorer)), 5)
Пример #16
0
def test_api_input():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that a list will not work - the types have to be ndarray
    with pytest.raises(ValueError):
        glm.fit(X, list(y))

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
    glm = GLM(distr='gaussian', solver='test')

    with pytest.raises(ValueError, match="solver must be one of"):
        glm.fit(X, y)

    with pytest.raises(ValueError, match="fit_intercept must be"):
        glm = GLM(distr='gaussian', fit_intercept='blah')

    glm = GLM(distr='gaussian', max_iter=2)
    with pytest.warns(UserWarning, match='Reached max number of iterat'):
        glm.fit(X, y)
Пример #17
0
def test_cv():
    """Simple CV check."""
    # XXX: don't use scikit-learn for tests.
    X, y = make_regression()

    glm_normal = GLM(distr='gaussian', alpha=0.01,
                     reg_lambda=0.1)
    glm_normal.fit(X, y)

    cv = KFold(X.shape[0], 5)

    # check that it returns 5 scores
    assert_equal(len(cross_val_score(glm_normal, X, y, cv=cv,
                 scoring=simple_cv_scorer)), 5)
Пример #18
0
def test_pseudoR2():
    """Test pseudo r2."""
    n_samples, n_features = 1000, 100

    beta0 = np.random.rand()
    beta = np.random.normal(0.0, 1.0, n_features)

    # sample train and test data
    glm_sim = GLM(score_metric='pseudo_R2')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert (isinstance(score, float))
Пример #19
0
def test_deviance():
    """Test deviance."""
    n_samples, n_features = 1000, 100

    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)

    # sample train and test data
    glm_sim = GLM(score_metric='deviance')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)

    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert_true(isinstance(score, float))
Пример #20
0
def test_accuracy():
    """Testing accuracy."""
    n_samples, n_features, n_classes = 1000, 100, 2

    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, (n_features, n_classes))

    # sample train and test data
    glm_sim = GLM(distr='binomial', score_metric='accuracy')
    X = np.random.randn(n_samples, n_features)
    y = simulate_glm(glm_sim.distr, beta0, beta, X)
    y = np.argmax(y, axis=1)
    glm_sim.fit(X, y)
    score = glm_sim.score(X, y)

    assert_true(isinstance(score, float))
Пример #21
0
def test_group_lasso():
    """Group Lasso test."""
    n_samples, n_features = 100, 90

    # assign group ids
    groups = np.zeros(90)
    groups[0:29] = 1
    groups[30:59] = 2
    groups[60:] = 3

    # sample random coefficients
    beta0 = np.random.normal(0.0, 1.0, 1)
    beta = np.random.normal(0.0, 1.0, n_features)
    beta[groups == 2] = 0.

    # create an instance of the GLM class
    glm_group = GLM(distr='softplus', alpha=1., reg_lambda=0.2, group=groups)

    # simulate training data
    np.random.seed(glm_group.random_state)
    Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
    yr = simulate_glm(glm_group.distr, beta0, beta, Xr)

    # scale and fit
    scaler = StandardScaler().fit(Xr)
    glm_group.fit(scaler.transform(Xr), yr)

    # count number of nonzero coefs for each group.
    # in each group, coef must be [all nonzero] or [all zero].
    beta = glm_group.beta_
    group_ids = np.unique(groups)
    for group_id in group_ids:
        if group_id == 0:
            continue

        target_beta = beta[groups == group_id]
        n_nonzero = (target_beta != 0.0).sum()
        assert n_nonzero in (len(target_beta), 0)

    # one of the groups must be [all zero]
    assert np.any([
        beta[groups == group_id].sum() == 0 for group_id in group_ids
        if group_id != 0
    ])
Пример #22
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      solver=solver,
                      score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Пример #23
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict,
                  X_train[None, ...], y_train)
Пример #24
0
def test_glmnet():
    """Test glmnet."""
    glm = GLM(distr='poisson')
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
    y_train = glm.simulate(beta0, beta, X_train)

    X_train = scaler.fit_transform(X_train)
    glm.fit(X_train, y_train)

    beta_ = glm.fit_params[-2]['beta'][:]
    assert_allclose(beta[:], beta_, atol=0.1)  # check fit
    density_ = np.sum(beta_ > 0.1) / float(n_features)
    assert_allclose(density_, density, atol=0.02)  # check density
Пример #25
0
def test_multinomial():
    """Test all multinomial functionality"""
    glm_mn = GLM(distr='multinomial',
                 reg_lambda=np.array([0.0, 0.1, 0.2]),
                 learning_rate=2e-1,
                 tol=1e-10)
    X = np.array([[-1, -2, -3], [4, 5, 6]])
    y = np.array([1, 0])

    # test gradient
    beta = np.zeros([4, 2])
    grad_beta0, grad_beta = glm_mn._grad_L2loss(beta[0], beta[1:], 0, X, y)
    assert_true(grad_beta0[0] != grad_beta0[1])
    glm_mn.fit(X, y)
    y_pred_proba = glm_mn.predict_proba(X)
    assert_equal(y_pred_proba.shape,
                 (3, X.shape[0], 2))  # n_lambdas x n_samples x n_classes

    # pick one as yhat
    yhat = y_pred_proba[0]

    # uniform prediction
    ynull = np.ones(yhat.shape) / yhat.shape[1]

    # pseudo_R2 should be greater than 0
    assert_true(glm_mn[-1].score(X, y) > 0.)
    assert_equal(
        len(glm_mn.simulate(glm_mn.fit_[0]['beta0'], glm_mn.fit_[0]['beta'],
                            X)), X.shape[0])

    # check that score is computed for sliced estimator
    scorelist = glm_mn[-1].score(X, y)
    assert_equal(scorelist.shape[0], 1)

    # check that score is computed for all lambdas
    scorelist = glm_mn.score(X, y)
    assert_equal(scorelist.shape[0], y_pred_proba.shape[0])
Пример #26
0
def test_api_input_types_y():
    """Test that the input value of y can be of different types."""

    random_state = 1
    state = np.random.RandomState(random_state)
    n_samples, n_features = 100, 5

    X = state.normal(0, 1, (n_samples, n_features))
    y = state.normal(0, 1, (n_samples, ))

    glm = GLM(distr='gaussian')

    # Test that a list will not work - the types have to be ndarray
    with pytest.raises(ValueError):
        glm.fit(X, list(y))

    # Test that ValueError is raised when the shapes mismatch
    with pytest.raises(ValueError):
        GLM().fit(X, y[3:])

    # This would work without errors
    glm.fit(X, y)
    glm.predict(X)
    glm.score(X, y)
Пример #27
0
n_samples, n_features = X.shape

########################################################
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLM(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm[-1].predict(X_test)
print("test set pseudo $R^2$ = %f" % glm[-1].score(X_test, y_test))

########################################################
# Plot the true and predicted test set target values

plt.plot(y_test[:50], 'ko-')
plt.plot(y_test_hat[:50], 'ro-')
plt.legend(['true', 'pred'], frameon=False)
plt.xlabel('Counties')
plt.ylabel('Per capita violent crime')

plt.tick_params(axis='y', right='off')
Пример #28
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
Пример #29
0
    def get_benchmarks(self, X_train, y_train, X_test, y_test):
        """
        """
        n_repeats = self.n_repeats
        distr = self.distr

        res = dict()
        for env in self.envs:
            res[env] = dict()
            if env == 'pyglmnet':
                # initialize model
                model = GLM(distr=distr,
                            reg_lambda=[self.reg_lambda],
                            alpha=self.alpha,
                            solver='batch-gradient',
                            score_metric='pseudo_R2')

                # fit-predict-score
                model.fit(X_train, y_train)
                y_test_hat = model[-1].predict(X_test)
                y_test_hat = np.squeeze(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    model.fit(X_train, y_train)
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'sklearn':
                if distr in ['gaussian', 'binomial']:
                    # initialize model
                    if distr == 'gaussian':
                        model = ElasticNet(alpha=self.reg_lambda,
                                           l1_ratio=self.alpha)
                    elif distr == 'binomial':

                        model = SGDClassifier(loss='log',
                                              penalty='elasticnet',
                                              alpha=self.reg_lambda,
                                              l1_ratio=self.alpha)

                    # fit-predict-score
                    model.fit(X_train, y_train)
                    y_test_hat = model.predict(X_test)
                    res[env]['score'] = model.score(X_test, y_test)

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        model.fit(X_train, y_train)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                else:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

            if env == 'statsmodels':
                # initialize model
                if distr == 'gaussian':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Gaussian())
                elif distr == 'binomial':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Binomial())
                elif distr == 'poisson':
                    model = sm.GLM(y_train,
                                   sm.add_constant(X_train),
                                   family=sm.families.Poisson())

                # fit-predict-score
                statsmodels_res = model.fit()
                y_test_hat = model.predict(statsmodels_res.params,
                                           exog=sm.add_constant(X_test))
                y_test_hat = np.array(y_test_hat)

                if distr in ['gaussian', 'poisson']:
                    res[env]['score'] = \
                        r2_score(y_test, y_test_hat)
                elif distr == 'binomial':
                    res[env]['score'] = \
                        accuracy_score(y_test,
                                       (y_test_hat > 0.5).astype(int))

                # time
                tmp = list()
                for r in range(n_repeats):
                    start = time.time()
                    statsmodels_res = model.fit()
                    stop = time.time()
                    tmp.append(stop - start)
                res[env]['time'] = np.min(tmp) * 1e3

            if env == 'R':
                # initialize model
                glmnet = importr('glmnet')
                predict = robjects.r('predict')

                # fit-predict-score
                try:
                    fit = glmnet.glmnet(X_train,
                                        y_train,
                                        family=distr,
                                        alpha=self.alpha,
                                        nlambda=1)
                    tmp = predict(fit, newx=X_test, s=0)

                    y_test_hat = np.zeros(y_test.shape[0])
                    for i in range(y_test.shape[0]):
                        y_test_hat[i] = tmp[i]

                    if distr in ['gaussian', 'poisson']:
                        res[env]['score'] = \
                            r2_score(y_test, y_test_hat)
                    elif distr == 'binomial':
                        res[env]['score'] = \
                            accuracy_score(y_test,
                                           (y_test_hat > 0.5).astype(int))

                    # time
                    tmp = list()
                    for r in range(n_repeats):
                        start = time.time()
                        fit = glmnet.glmnet(X_train,
                                            y_train,
                                            family=distr,
                                            alpha=self.alpha,
                                            nlambda=1)
                        stop = time.time()
                        tmp.append(stop - start)
                    res[env]['time'] = np.min(tmp) * 1e3
                except Exception:
                    res[env]['score'] = -999.
                    res[env]['time'] = -999.

        return res
Пример #30
0
########################################################
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=10000, n_classes=5,
                           n_informative=100, n_features=100, n_redundant=0)

########################################################

########################################################
# Fit the model

########################################################

########################################################
from pyglmnet import GLM
glm_mn = GLM(distr='multinomial', alpha=0.01,
               reg_lambda=np.array([0.02, 0.01]), verbose=False)
glm_mn.threshold = 1e-5
glm_mn.fit(X, y)

########################################################

########################################################
# Predict and score the output

########################################################

y_pred = glm_mn[-1].predict(X)
print('Percentage correct = %f percent.' % (y_pred == y).mean())

########################################################
Пример #31
0
n_samples, n_features = X.shape

########################################################
# Split the data into training and test sets

X_train, X_test, y_train, y_test = \
    train_test_split(X, y, test_size=0.33, random_state=0)

########################################################
# Fit a gaussian distributed GLM with elastic net regularization

# use the default value for reg_lambda
glm = GLM(distr='gaussian', alpha=0.05, score_metric='pseudo_R2')

# fit model
glm.fit(X_train, y_train)

# score the test set prediction
y_test_hat = glm[-1].predict(X_test)
print ("test set pseudo $R^2$ = %f" % glm[-1].score(X_test, y_test))

########################################################
# Plot the true and predicted test set target values

plt.plot(y_test[:50], 'ko-')
plt.plot(y_test_hat[:50], 'ro-')
plt.legend(['true', 'pred'], frameon=False)
plt.xlabel('Counties')
plt.ylabel('Per capita violent crime')

plt.tick_params(axis='y', right='off')
Пример #32
0
Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
yr = glm_poisson.simulate(beta0, beta, Xr)

# testing data
Xt = np.random.normal(0.0, 1.0, [n_samples, n_features])
yt = glm_poisson.simulate(beta0, beta, Xt)

##########################################################
# Fit the model
# ^^^^^^^^^^^^^
# Fitting the model is accomplished by a single GLM method called `fit()`.

##########################################################

scaler = StandardScaler().fit(Xr)
glm_poisson.fit(scaler.transform(Xr), yr)

##########################################################
# Slicing the model object
# ^^^^^^^^^^^^^^^^^^^^^^^^
# Although the model is fit to all values of reg_lambda specified by a regularization
# path, often we are only interested in further analysis for a particular value of
# ``reg_lambda``. We can easily do this by slicing the object.
#
# For instance ``model[0]`` returns an object identical to model but with ``.fit_``
# as a dictionary corresponding to the estimated coefficients for ``reg_lambda[0]``.

##########################################################
# Visualize the fit coefficients
# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
# The estimated coefficients are stored in an instance variable called ``.fit_``
Пример #33
0
beta0 = np.random.normal(0.0, 1.0, 1).flatten()
beta = sps.rand(10, 1, 1)
beta = np.array(beta.todense()).flatten()

# Generate random training data by using the previous betas
train_x = np.random.normal(0.0, 1.0, [10000, 10])
train_y = simulate_glm("neg-binomial", beta0, beta, train_x)

# plot the data distribution
sns.set(color_codes=True)
sns.distplot(train_y)
plt.show()

# Create the GLM and train it
glm = GLM(distr="neg-binomial", max_iter=10000)
glm.fit(train_x, train_y)

# Print the betas and the beta0 to check for correctness
print("")
print(glm.beta0_)
print(glm.beta_)
print("")
print(beta0)
print(beta)

# Generate test data
# simulate testing data
X_test = np.random.normal(0.0, 1.0, [1000, 10])
y_test = simulate_glm("poisson", beta0, beta, X_test)

# predict using fitted model on the test data
Пример #34
0
# %%
import statsmodels.api as sm


mod = sm.GLM(df['cnt'] / df['offset'], df[np.arange(10)], family=sm.families.Poisson())

mod = mod.fit()

mod.summary()

# %%
from pyglmnet import GLM

# create an instance of the GLM class
glm = GLM(distr='poisson')
glm = glm.fit(df[np.arange(10)].values, df['cnt'].values/df['offset'].values)
glm

# %%
glm.get_params()

# %%
import keras

inl = keras.layers.Input((10,))
out = keras.layers.Dense(1, use_bias=False)(inl)
out = keras.layers.Lambda(lambda x: keras.backend.exp(x))(out)
model = keras.models.Model(inl, out)

model.compile(keras.optimizers.Adam(1e-3), 'poisson')
model.summary()
Пример #35
0
# training data
Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
yr = glm_poissonexp.simulate(beta0, beta, Xr)

# testing data
Xt = np.random.normal(0.0, 1.0, [n_samples, n_features])
yt = glm_poissonexp.simulate(beta0, beta, Xt)

########################################################
# Fit model to training data

########################################################

scaler = StandardScaler().fit(Xr)
glm_poissonexp.fit(scaler.transform(Xr),yr);

########################################################
# Use one model to predict

########################################################

m = glm_poissonexp[-1]
this_model_param = m.fit_
yrhat = m.predict(scaler.transform(Xr))
ythat = m.predict(scaler.transform(Xt))

########################################################
# Visualize predicted output

########################################################
Пример #36
0
                           n_features=100,
                           n_redundant=0)

########################################################

########################################################
# Fit the model

########################################################

########################################################
from pyglmnet import GLM
glm_mn = GLM(distr='multinomial',
             alpha=0.01,
             reg_lambda=np.array([0.02, 0.01]),
             verbose=False)
glm_mn.threshold = 1e-5
glm_mn.fit(X, y)

########################################################

########################################################
# Predict and score the output

########################################################

y_pred = glm_mn[-1].predict(X).argmax(axis=1)
print('Percentage correct = %f percent.' % (y_pred == y).mean())

########################################################
Пример #37
0
def test_glmnet():
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    for distr in distrs:
        betas_ = list()
        for solver in solvers:

            np.random.seed(random_state)

            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(distr, beta0, beta, X_train, sample=False)

            alpha = 0.
            reg_lambda = 0.
            loss_trace = list()

            def callback(beta):
                Tau = None
                eta = 2.0
                group = None

                loss_trace.append(
                    _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta,
                          group, beta))

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      reg_lambda=reg_lambda,
                      tol=1e-3,
                      max_iter=5000,
                      alpha=alpha,
                      solver=solver,
                      score_metric=score_metric,
                      random_state=random_state,
                      callback=callback)
            assert (repr(glm))

            glm.fit(X_train, y_train)

            # verify loss decreases
            assert (np.all(np.diff(loss_trace) <= 1e-7))

            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train,
                           y_train, 2.0, None, np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
            betas_.append(glm.beta_)

            y_pred = glm.predict(X_train)
            assert (y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Пример #38
0
print(position_array.shape)
pl.figure()
for n in range(n_frames):
    pl.scatter(all_position[n, 0:4], all_position[n, 4:8], s=2, c='k')

pl.show()

# GLM
glm = GLM(distr='gaussian', alpha=0.05)

X = np.delete(all_position, 0, axis=1)
y = all_position[:, 0]
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=42)

scaler = StandardScaler().fit(X_train)
glm.fit(scaler.transform(X_train), y_train)

yhat = glm.predict(scaler.transform(X))
# print(glm.score(X_test, Y_test))
#
# plot
pl.figure()
pl.plot(y, marker='x', color='b', label='observed')
pl.plot(yhat[9, :], marker='o', color='r', label='trained')

pl.show()
Пример #39
0
# training data
Xr = np.random.normal(0.0, 1.0, [n_samples, n_features])
yr = glm_poissonexp.simulate(beta0, beta, Xr)

# testing data
Xt = np.random.normal(0.0, 1.0, [n_samples, n_features])
yt = glm_poissonexp.simulate(beta0, beta, Xt)

########################################################
# Fit model to training data

########################################################

scaler = StandardScaler().fit(Xr)
glm_poissonexp.fit(scaler.transform(Xr), yr)

########################################################
# Gradient of loss function

########################################################

grad_beta0, grad_beta = glm_poissonexp._grad_L2loss(
    glm_poissonexp.fit_[-1]['beta0'], glm_poissonexp.fit_[-1]['beta'], 0.01,
    Xr, yr)
print(grad_beta[:5])

########################################################
# Use one model to predict

########################################################
plt.ylabel('time bin of response')
plt.title('Sample first 50 rows of design' ' matrix created using Hankel')
plt.show()

########################################################
# **Fitting and predicting with a linear-Gaussian GLM**
#
# For a general linear model, the observed spikes can be
# thought of an underlying parameter
# :math:`\beta_0, \beta` that control the spiking.
#
# You can simply use linear Gaussian GLM with no regularization
# to predict the spike counts.

glm_lg = GLM(distr='gaussian', reg_lambda=0.0, score_metric='pseudo_R2')
glm_lg.fit(Xdsgn, y)

# predict spike counts
ypred_lg = glm_lg.predict(Xdsgn)

########################################################
# **Fitting and predicting with a Poisson GLM**
#
# We can also assume that there is a non-linear function governing
# the underlying the firing patterns.
# In pyglmnet, we use an exponential inverse link function
# for the Poisson distribution.

glm_poisson = GLM(distr='poisson',
                  alpha=0.05,
                  learning_rate=1.0,
Пример #41
0
def test_glmnet(distr, reg_lambda, fit_intercept, solver):
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 0.
    if fit_intercept:
        beta0 = 1. / (np.float(n_features) + 1.) * \
            np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + int(fit_intercept)) * \
        np.random.normal(0.0, 1.0, (n_features,))

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    betas_ = list()

    if not (distr == 'gamma' and solver == 'cdfast'):

        np.random.seed(random_state)

        theta = 1.0
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta,
                               sample=False)

        alpha = 0.
        loss_trace = list()
        eta = 2.0
        group = None
        Tau = None

        def callback(beta):
            Tau = None
            loss_trace.append(
                _loss(distr, alpha, Tau, reg_lambda,
                      X_train, y_train, eta, theta, group, beta,
                      fit_intercept=fit_intercept))

        glm = GLM(distr, learning_rate=learning_rate,
                  reg_lambda=reg_lambda, tol=1e-5, max_iter=5000,
                  alpha=alpha, solver=solver, score_metric=score_metric,
                  random_state=random_state, callback=callback,
                  fit_intercept=fit_intercept, theta=theta)
        assert(repr(glm))

        glm.fit(X_train, y_train)

        # verify loss decreases
        assert(np.all(np.diff(loss_trace) <= 1e-7))

        # true loss and beta should be recovered when reg_lambda == 0
        if reg_lambda == 0.:
            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, alpha, Tau, reg_lambda,
                           X_train, y_train, eta, theta, group,
                           np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
        betas_.append(glm.beta_)

        y_pred = glm.predict(X_train)
        assert(y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

        # test fit_predict
        glm_poisson = GLM(distr='softplus')
        glm_poisson.fit_predict(X_train, y_train)
        raises(ValueError, glm_poisson.fit_predict,
               X_train[None, ...], y_train)
Пример #42
0
                                    base=np.exp(1)))

#set up the lasso model
glm = GLM(distr="binomial",
          tol=1e-2,
          score_metric="pseudo_R2",
          alpha=1.0,
          reg_lambda=np.logspace(np.log(100), np.log(0.01), 5, base=np.exp(1)))

print("gl_glm: ", gl_glm)
print("glm: ", glm)

##########################################################
# Fit models

gl_glm.fit(Xtrain, ytrain)
glm.fit(Xtrain, ytrain)

##########################################################
# Visualize model scores on test set

plt.figure()
plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtest, ytest), 'go-')
plt.semilogx(gl_glm.reg_lambda, gl_glm.score(Xtrain, ytrain), 'go--')
plt.semilogx(glm.reg_lambda, glm.score(Xtest, ytest), 'ro-')
plt.semilogx(glm.reg_lambda, glm.score(Xtrain, ytrain), 'ro--')
plt.legend(
    ['Group Lasso: test', 'Group Lasso: train', 'Lasso: test', 'Lasso: train'],
    frameon=False,
    loc='best')
plt.xlabel('$\lambda$')
               reg_lambda=np.array([0.02, 0.01]), learning_rate=1e-3 ,verbose=False,)


#initial values for the coefficients
beta0 = np.random.normal(0.0, 1.0, 1)
beta = sps.rand(n_features, 1, 0.1)
beta = np.array(beta.todense())


model.threshold = 1e-5

#scaler = StandardScaler().fit(X_train)
#model.fit(scaler.transform(X_train),y_train)

# Fitting the model
model.fit(X_train,y_train)


#ploting the fit coefficients
# TODO: fix this graph
fit_param = model[0].fit_
plt.plot(beta[:], 'bo', label ='bo')
plt.plot(fit_param['beta'][:], 'ro', label='ro')
plt.xlabel('samples')
plt.ylabel('outputs')
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=1,
           ncol=2, borderaxespad=0.)
plt.show()

# Makin the predictions base on fit model
yt_predicted = model[-1].predict(X_test)
Пример #44
0
# Fit models

from sklearn.cross_validation import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features,
                                                spike_counts,
                                                test_size=0.2,
                                                random_state=42)

########################################################

from pyglmnet import utils
n_samples = Xtrain.shape[0]
Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2')
glm.fit(Xtrain, Ytrain)
cvopt_lambda = glm.score(Xtest, Ytest).argmax()
print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain))
print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest))
weights = glm[cvopt_lambda].fit_['beta']

########################################################
# Visualize

for time_bin_ in range(n_temporal_basis):
    RF = strf_model.make_image_from_spatial_basis(
        spatial_basis,
        weights[range(time_bin_, n_spatial_basis * n_temporal_basis,
                      n_temporal_basis)])

    plt.subplot(1, n_temporal_basis, time_bin_ + 1)
Пример #45
0
    def fit(self, X, Y, get_history_terms=True):
        """
        Fits the model to the data in X to predict the response Y.

        Imports models and creates model instance as well.

        Parameters
        ----------
        X: float, n_samples x n_features, features of interest
        Y: float, n_samples x 1, population activity
        get_history_terms = Boolean. Whether to compute the temporal features.
                    Note that if spike_history and cov_history are False,
                    no history will be computed anyways and the flag does nothing.


        """
        if self.default_params:
            warnings.warn(
                '\n  Using default hyperparameters. Consider optimizing on' +
                ' a held-out dataset using, e.g. hyperopt or random search')

        # make the covariate matrix. Include spike or covariate history?
        # The different methods here are to satisfy the needs of recurrent keras
        # models
        if get_history_terms:
            if self.tunemodel == 'lstm':
                X, Y = self.get_all_with_history_keras(X, Y)
            else:
                X, Y = self.get_all_with_history(X, Y)

        if self.tunemodel == 'glm':
            model = GLM(**self.params)
            model.fit(X, Y)

            # we want the last of the regularization path
            self.model = model[-1]

        elif self.tunemodel == 'feedforward_nn':

            if np.ndim(X) == 1:
                X = np.transpose(np.atleast_2d(X))

            params = self.params
            model = Sequential()
            model.add(
                Dense(params['n1'],
                      input_dim=np.shape(X)[1],
                      kernel_initializer='glorot_normal',
                      activation='relu',
                      kernel_regularizer=l2(params['l2'])))
            model.add(Dropout(params['dropout']))
            model.add(BatchNormalization())
            model.add(
                Dense(params['n2'],
                      kernel_initializer='glorot_normal',
                      activation='relu',
                      kernel_regularizer=l2(params['l2'])))
            model.add(BatchNormalization())
            model.add(Dense(1, activation='softplus'))
            optim = adam(lr=params['lr'],
                         clipnorm=params['clipnorm'],
                         decay=params['decay'],
                         beta_1=1 - params['b1'],
                         beta_2=1 - params['b2'])
            model.compile(
                loss='poisson',
                optimizer=optim,
            )
            hist = model.fit(X,
                             Y,
                             batch_size=128,
                             epochs=30,
                             verbose=self.verbose)

            self.model = model

        elif self.tunemodel == 'xgboost':

            dtrain = xgb.DMatrix(X, label=Y)
            num_round = 200
            self.model = xgb.train(self.params, dtrain, num_round)

        elif self.tunemodel == 'random_forest':

            self.model = RandomForestRegressor(**self.params)
            self.model.fit(X, Y)

        elif self.tunemodel == 'lstm':

            if np.ndim(X) == 1:
                X = np.transpose(np.atleast_2d(X))

            params = self.params
            model = Sequential()  #Declare model
            #Add recurrent layer
            model.add(LSTM(int(params['n_units']),input_shape=(X.shape[1],X.shape[2]),\
                           dropout_W=params['dropout'],dropout_U=params['dropout']))
            #Within recurrent layer, include dropout
            model.add(Dropout(params['dropout'])
                      )  #Dropout some units (recurrent layer output units)

            #Add dense connections to output layer
            model.add(Dense(1, activation='softplus'))

            #Fit model (and set fitting parameters)
            model.compile(loss='poisson',
                          optimizer='rmsprop',
                          metrics=['accuracy'])
            model.fit(X,
                      Y,
                      epochs=int(params['epochs']),
                      batch_size=int(params['batch_size']),
                      verbose=self.verbose)  #Fit the model

            self.model = model

        else:  #using predefined model
            self.model.fit(X, Y)
Пример #46
0
np.shape(prior_cov)

########################################################
# Fit models

from sklearn.cross_validation import train_test_split
Xtrain, Xtest, Ytrain, Ytest = train_test_split(features, spike_counts, test_size=0.2, random_state=42)

########################################################

from pyglmnet import utils
n_samples = Xtrain.shape[0]
Tau = utils.tikhonov_from_prior(prior_cov, n_samples)

glm = GLM(distr='poisson', alpha=0., Tau=Tau, score_metric='pseudo_R2')
glm.fit(Xtrain, Ytrain)
cvopt_lambda = glm.score(Xtest, Ytest).argmax()
print("train score: %f" % glm[cvopt_lambda].score(Xtrain, Ytrain))
print("test score: %f" % glm[cvopt_lambda].score(Xtest, Ytest))
weights = glm[cvopt_lambda].fit_['beta']

########################################################
# Visualize

for time_bin_ in range(n_temporal_basis):
    RF = strf_model.make_image_from_spatial_basis(spatial_basis,
                                             weights[range(time_bin_,
                                                           n_spatial_basis * n_temporal_basis,
                                                           n_temporal_basis)])

    plt.subplot(1, n_temporal_basis, time_bin_+1)