示例#1
0
        def callback(beta):
            Tau = None
            eta = 2.0
            group = None

            loss_trace.append(
                _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta,
                      group, beta))
示例#2
0
 def callback(beta):
     Tau = None
     loss_trace.append(
         _loss(distr, alpha, Tau, reg_lambda,
               X_train, y_train, eta, theta, group, beta,
               fit_intercept=fit_intercept))
示例#3
0
def test_glmnet(distr, reg_lambda, fit_intercept, solver):
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 0.
    if fit_intercept:
        beta0 = 1. / (np.float(n_features) + 1.) * \
            np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + int(fit_intercept)) * \
        np.random.normal(0.0, 1.0, (n_features,))

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    betas_ = list()

    if not (distr == 'gamma' and solver == 'cdfast'):

        np.random.seed(random_state)

        theta = 1.0
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta,
                               sample=False)

        alpha = 0.
        loss_trace = list()
        eta = 2.0
        group = None
        Tau = None

        def callback(beta):
            Tau = None
            loss_trace.append(
                _loss(distr, alpha, Tau, reg_lambda,
                      X_train, y_train, eta, theta, group, beta,
                      fit_intercept=fit_intercept))

        glm = GLM(distr, learning_rate=learning_rate,
                  reg_lambda=reg_lambda, tol=1e-5, max_iter=5000,
                  alpha=alpha, solver=solver, score_metric=score_metric,
                  random_state=random_state, callback=callback,
                  fit_intercept=fit_intercept, theta=theta)
        assert(repr(glm))

        glm.fit(X_train, y_train)

        # verify loss decreases
        assert(np.all(np.diff(loss_trace) <= 1e-7))

        # true loss and beta should be recovered when reg_lambda == 0
        if reg_lambda == 0.:
            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, alpha, Tau, reg_lambda,
                           X_train, y_train, eta, theta, group,
                           np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
        betas_.append(glm.beta_)

        y_pred = glm.predict(X_train)
        assert(y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

        # test fit_predict
        glm_poisson = GLM(distr='softplus')
        glm_poisson.fit_predict(X_train, y_train)
        raises(ValueError, glm_poisson.fit_predict,
               X_train[None, ...], y_train)
示例#4
0
def test_glmnet():
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    for distr in distrs:
        betas_ = list()
        for solver in solvers:

            np.random.seed(random_state)

            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(distr, beta0, beta, X_train, sample=False)

            alpha = 0.
            reg_lambda = 0.
            loss_trace = list()

            def callback(beta):
                Tau = None
                eta = 2.0
                group = None

                loss_trace.append(
                    _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta,
                          group, beta))

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      reg_lambda=reg_lambda,
                      tol=1e-3,
                      max_iter=5000,
                      alpha=alpha,
                      solver=solver,
                      score_metric=score_metric,
                      random_state=random_state,
                      callback=callback)
            assert (repr(glm))

            glm.fit(X_train, y_train)

            # verify loss decreases
            assert (np.all(np.diff(loss_trace) <= 1e-7))

            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train,
                           y_train, 2.0, None, np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
            betas_.append(glm.beta_)

            y_pred = glm.predict(X_train)
            assert (y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)