Пример #1
0
def test_random_state_consistency():
    """Test model's random_state."""
    # Generate the dataset
    n_samples, n_features = 1000, 10

    beta0 = 1. / (np.float(n_features) + 1.) * np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))
    Xtrain = np.random.normal(0.0, 1.0, [n_samples, n_features])

    ytrain = simulate_glm("gaussian", beta0, beta, Xtrain,
                          sample=False, random_state=42)

    # Test simple glm
    glm_a = GLM(distr="gaussian", random_state=1)
    ypred_a = glm_a.fit_predict(Xtrain, ytrain)
    glm_b = GLM(distr="gaussian", random_state=1)
    ypred_b = glm_b.fit_predict(Xtrain, ytrain)

    # Consistency between two different models
    assert_array_equal(ypred_a, ypred_b)

    # Test also cross-validation
    glm_cv_a = GLMCV(distr="gaussian", cv=3, random_state=1)
    ypred_a = glm_cv_a.fit_predict(Xtrain, ytrain)
    glm_cv_b = GLMCV(distr="gaussian", cv=3, random_state=1)
    ypred_b = glm_cv_b.fit_predict(Xtrain, ytrain)
    ypred_c = glm_cv_b.fit_predict(Xtrain, ytrain)

    assert_array_equal(ypred_a, ypred_b)
    assert_array_equal(ypred_b, ypred_c)
Пример #2
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['softplus', 'poisson', 'gaussian', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(X_train, y_train)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='softplus')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)
Пример #3
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 1000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, [n_features, 1])

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    solvers = ['batch-gradient', 'cdfast']
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate, solver=solver)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = glm.simulate(beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.fit_[-1]['beta'][:]
            assert_allclose(beta[:], beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.score(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm_poisson = GLM(distr='poisson')
    assert_raises(ValueError, glm_poisson.__getitem__, 2)

    # test fit_predict
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Пример #4
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 10000, 100
    density = 0.1
    n_lambda = 10

    # coefficients
    beta0 = np.random.rand()
    beta = sps.rand(n_features, 1, density=density).toarray()

    distrs = ['poisson', 'poissonexp', 'normal', 'binomial']
    for distr in distrs:

        # FIXME: why do we need such this learning rate for 'poissonexp'?
        learning_rate = 1e-5 if distr == 'poissonexp' else 1e-4
        glm = GLM(distr, learning_rate=learning_rate)

        assert_true(repr(glm))

        np.random.seed(glm.random_state)
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = glm.simulate(beta0, beta, X_train)

        X_train = scaler.fit_transform(X_train)
        glm.fit(X_train, y_train)

        beta_ = glm.fit_[-2]['beta'][:]
        assert_allclose(beta[:], beta_, atol=0.1)  # check fit
        density_ = np.sum(beta_ > 0.1) / float(n_features)
        assert_allclose(density_, density, atol=0.05)  # check density

        y_pred = glm.predict(scaler.transform(X_train))
        assert_equal(y_pred.shape, (n_lambda, X_train.shape[0]))

    # checks for slicing.
    glm = glm[:3]
    glm_copy = glm.copy()
    assert_true(glm_copy is not glm)
    assert_equal(len(glm.reg_lambda), 3)
    y_pred = glm[:2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (2, X_train.shape[0]))
    y_pred = glm[2].predict(scaler.transform(X_train))
    assert_equal(y_pred.shape, (X_train.shape[0], ))
    assert_raises(IndexError, glm.__getitem__, [2])
    glm.deviance(y_train, y_pred)

    # don't allow slicing if model has not been fit yet.
    glm = GLM(distr='poisson')
    assert_raises(ValueError, glm.__getitem__, 2)

    # test fit_predict
    glm.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm.fit_predict, X_train[None, ...], y_train)
Пример #5
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      solver=solver,
                      score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict, X_train[None, ...],
                  y_train)
Пример #6
0
def test_glmnet():
    """Test glmnet."""
    scaler = StandardScaler()
    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']
    score_metric = 'pseudo_R2'
    learning_rate = 2e-1

    for solver in solvers:
        for distr in distrs:

            glm = GLM(distr, learning_rate=learning_rate,
                      solver=solver, score_metric=score_metric)

            assert_true(repr(glm))

            np.random.seed(glm.random_state)
            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(glm.distr, beta0, beta, X_train)

            X_train = scaler.fit_transform(X_train)
            glm.fit(X_train, y_train)

            beta_ = glm.beta_
            assert_allclose(beta, beta_, atol=0.5)  # check fit

            y_pred = glm.predict(scaler.transform(X_train))
            assert_equal(y_pred.shape[0], X_train.shape[0])

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    assert_raises(ValueError, glm_poisson.fit_predict,
                  X_train[None, ...], y_train)
Пример #7
0
def test_glmnet(distr, reg_lambda, fit_intercept, solver):
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 0.
    if fit_intercept:
        beta0 = 1. / (np.float(n_features) + 1.) * \
            np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + int(fit_intercept)) * \
        np.random.normal(0.0, 1.0, (n_features,))

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    betas_ = list()

    if not (distr == 'gamma' and solver == 'cdfast'):

        np.random.seed(random_state)

        theta = 1.0
        X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
        y_train = simulate_glm(distr, beta0, beta, X_train, theta=theta,
                               sample=False)

        alpha = 0.
        loss_trace = list()
        eta = 2.0
        group = None
        Tau = None

        def callback(beta):
            Tau = None
            loss_trace.append(
                _loss(distr, alpha, Tau, reg_lambda,
                      X_train, y_train, eta, theta, group, beta,
                      fit_intercept=fit_intercept))

        glm = GLM(distr, learning_rate=learning_rate,
                  reg_lambda=reg_lambda, tol=1e-5, max_iter=5000,
                  alpha=alpha, solver=solver, score_metric=score_metric,
                  random_state=random_state, callback=callback,
                  fit_intercept=fit_intercept, theta=theta)
        assert(repr(glm))

        glm.fit(X_train, y_train)

        # verify loss decreases
        assert(np.all(np.diff(loss_trace) <= 1e-7))

        # true loss and beta should be recovered when reg_lambda == 0
        if reg_lambda == 0.:
            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, alpha, Tau, reg_lambda,
                           X_train, y_train, eta, theta, group,
                           np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
        betas_.append(glm.beta_)

        y_pred = glm.predict(X_train)
        assert(y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

        # test fit_predict
        glm_poisson = GLM(distr='softplus')
        glm_poisson.fit_predict(X_train, y_train)
        raises(ValueError, glm_poisson.fit_predict,
               X_train[None, ...], y_train)
Пример #8
0
def test_glmnet():
    """Test glmnet."""
    raises(ValueError, GLM, distr='blah')
    raises(ValueError, GLM, distr='gaussian', max_iter=1.8)

    n_samples, n_features = 100, 10

    # coefficients
    beta0 = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0)
    beta = 1. / (np.float(n_features) + 1.) * \
        np.random.normal(0.0, 1.0, (n_features,))

    distrs = ['softplus', 'gaussian', 'poisson', 'binomial', 'probit']
    solvers = ['batch-gradient', 'cdfast']

    score_metric = 'pseudo_R2'
    learning_rate = 2e-1
    random_state = 0

    for distr in distrs:
        betas_ = list()
        for solver in solvers:

            np.random.seed(random_state)

            X_train = np.random.normal(0.0, 1.0, [n_samples, n_features])
            y_train = simulate_glm(distr, beta0, beta, X_train, sample=False)

            alpha = 0.
            reg_lambda = 0.
            loss_trace = list()

            def callback(beta):
                Tau = None
                eta = 2.0
                group = None

                loss_trace.append(
                    _loss(distr, alpha, Tau, reg_lambda, X_train, y_train, eta,
                          group, beta))

            glm = GLM(distr,
                      learning_rate=learning_rate,
                      reg_lambda=reg_lambda,
                      tol=1e-3,
                      max_iter=5000,
                      alpha=alpha,
                      solver=solver,
                      score_metric=score_metric,
                      random_state=random_state,
                      callback=callback)
            assert (repr(glm))

            glm.fit(X_train, y_train)

            # verify loss decreases
            assert (np.all(np.diff(loss_trace) <= 1e-7))

            # verify loss at convergence = loss when beta=beta_
            l_true = _loss(distr, 0., np.eye(beta.shape[0]), 0., X_train,
                           y_train, 2.0, None, np.concatenate(([beta0], beta)))
            assert_allclose(loss_trace[-1], l_true, rtol=1e-4, atol=1e-5)
            # beta=beta_ when reg_lambda = 0.
            assert_allclose(beta, glm.beta_, rtol=0.05, atol=1e-2)
            betas_.append(glm.beta_)

            y_pred = glm.predict(X_train)
            assert (y_pred.shape[0] == X_train.shape[0])

        # compare all solvers pairwise to make sure they're close
        for i, first_beta in enumerate(betas_[:-1]):
            for second_beta in betas_[i + 1:]:
                assert_allclose(first_beta, second_beta, rtol=0.05, atol=1e-2)

    # test fit_predict
    glm_poisson = GLM(distr='softplus')
    glm_poisson.fit_predict(X_train, y_train)
    raises(ValueError, glm_poisson.fit_predict, X_train[None, ...], y_train)