Exemplo n.º 1
0
def check_regressors_pickle(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)   # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    regressor.fit(X, y_)
    y_pred = regressor.predict(X)
    # store old predictions
    pickled_regressor = pickle.dumps(regressor)
    unpickled_regressor = pickle.loads(pickled_regressor)
    pickled_y_pred = unpickled_regressor.predict(X)
    assert_array_almost_equal(pickled_y_pred, y_pred)
Exemplo n.º 2
0
def check_regressors_pickle(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)  # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    regressor.fit(X, y_)
    y_pred = regressor.predict(X)
    # store old predictions
    pickled_regressor = pickle.dumps(regressor)
    unpickled_regressor = pickle.loads(pickled_regressor)
    pickled_y_pred = unpickled_regressor.predict(X)
    assert_array_almost_equal(pickled_y_pred, y_pred)
Exemplo n.º 3
0
def check_regressors_int(name, Regressor, X, y):
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        # separate estimators to control random seeds
        regressor_1 = Regressor()
        regressor_2 = Regressor()
    set_random_state(regressor_1)
    set_random_state(regressor_2)

    if name in ('_PLS', 'PLSCanonical', 'PLSRegression'):
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y

    # fit
    regressor_1.fit(X, y_)
    pred1 = regressor_1.predict(X)
    regressor_2.fit(X, y_.astype(np.float))
    pred2 = regressor_2.predict(X)
    assert_array_almost_equal(pred1, pred2, 2, name)
Exemplo n.º 4
0
def check_regressors_train(name, Regressor, X, y):
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    # raises error on malformed input for fit
    assert_raises(ValueError, regressor.fit, X, y[:-1])
    # fit
    if name in ('PLSCanonical', 'PLSRegression', 'CCA'):
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    set_random_state(regressor)
    regressor.fit(X, y_)
    regressor.predict(X)

      # TODO: find out why PLS and CCA fail. RANSAC is random
      # and furthermore assumes the presence of outliers, hence
      # skipped
    if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
        assert_greater(regressor.score(X, y_), 0.5)
Exemplo n.º 5
0
def check_regressors_train(name, Regressor, X, y):
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    # raises error on malformed input for fit
    assert_raises(ValueError, regressor.fit, X, y[:-1])
    # fit
    if name in ('PLSCanonical', 'PLSRegression', 'CCA'):
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    set_random_state(regressor)
    regressor.fit(X, y_)
    regressor.predict(X)

    # TODO: find out why PLS and CCA fail. RANSAC is random
    # and furthermore assumes the presence of outliers, hence
    # skipped
    if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
        assert_greater(regressor.score(X, y_), 0.5)
Exemplo n.º 6
0
def check_regressors_int(name, Regressor):
    X, _ = _boston_subset()
    X = X[:50]
    rnd = np.random.RandomState(0)
    y = rnd.randint(3, size=X.shape[0])
    y = multioutput_estimator_convert_y_2d(name, y)
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        # separate estimators to control random seeds
        regressor_1 = Regressor()
        regressor_2 = Regressor()
    set_fast_parameters(regressor_1)
    set_fast_parameters(regressor_2)
    set_random_state(regressor_1)
    set_random_state(regressor_2)

    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y

    # fit
    regressor_1.fit(X, y_)
    pred1 = regressor_1.predict(X)
    regressor_2.fit(X, y_.astype(np.float))
    pred2 = regressor_2.predict(X)
    assert_array_almost_equal(pred1, pred2, 2, name)
Exemplo n.º 7
0
def check_regressors_train(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)   # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    # raises error on malformed input for fit
    assert_raises(ValueError, regressor.fit, X, y[:-1])
    # fit
    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    set_random_state(regressor)
    regressor.fit(X, y_)
    regressor.fit(X.tolist(), y_.tolist())
    regressor.predict(X)

      # TODO: find out why PLS and CCA fail. RANSAC is random
      # and furthermore assumes the presence of outliers, hence
      # skipped
    if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
        assert_greater(regressor.score(X, y_), 0.5)
Exemplo n.º 8
0
def check_regressors_int(name, Regressor):
    X, _ = _boston_subset()
    X = X[:50]
    rnd = np.random.RandomState(0)
    y = rnd.randint(3, size=X.shape[0])
    y = multioutput_estimator_convert_y_2d(name, y)
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        # separate estimators to control random seeds
        regressor_1 = Regressor()
        regressor_2 = Regressor()
    set_fast_parameters(regressor_1)
    set_fast_parameters(regressor_2)
    set_random_state(regressor_1)
    set_random_state(regressor_2)

    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y

    # fit
    regressor_1.fit(X, y_)
    pred1 = regressor_1.predict(X)
    regressor_2.fit(X, y_.astype(np.float))
    pred2 = regressor_2.predict(X)
    assert_array_almost_equal(pred1, pred2, 2, name)
Exemplo n.º 9
0
def check_regressors_train(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)  # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    # raises error on malformed input for fit
    assert_raises(ValueError, regressor.fit, X, y[:-1])
    # fit
    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    set_random_state(regressor)
    regressor.fit(X, y_)
    regressor.fit(X.tolist(), y_.tolist())
    regressor.predict(X)

    # TODO: find out why PLS and CCA fail. RANSAC is random
    # and furthermore assumes the presence of outliers, hence
    # skipped
    if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
        assert_greater(regressor.score(X, y_), 0.5)
Exemplo n.º 10
0
def test_omp_cv():
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Exemplo n.º 11
0
def test_omp_cv():
    # FIXME: This test is unstable on Travis, see issue #3190 for more detail.
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True, fit_intercept=False,
                                        max_iter=10, cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True, fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Exemplo n.º 12
0
def test_regressors_int():
    # test if regressors can cope with integer labels (by converting them to
    # float)
    regressors = all_estimators(type_filter='regressor')
    X, _ = _boston_subset()
    X = X[:50]
    rnd = np.random.RandomState(0)
    y = rnd.randint(3, size=X.shape[0])
    for name, Regressor in regressors:
        if name in dont_test or name in ('CCA'):
            continue
        elif name in ('OrthogonalMatchingPursuitCV'):
            # FIXME: This test is unstable on Travis, see issue #3190.
            check_skip_travis()
        yield (check_regressors_int, name, Regressor, X,
               multioutput_estimator_convert_y_2d(name, y))
Exemplo n.º 13
0
def test_omp_cv():
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True,
                                        fit_intercept=False,
                                        max_iter=10,
                                        cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True,
                                    fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Exemplo n.º 14
0
def test_regressors_int():
    # test if regressors can cope with integer labels (by converting them to
    # float)
    regressors = all_estimators(type_filter='regressor')
    X, _ = _boston_subset()
    X = X[:50]
    rnd = np.random.RandomState(0)
    y = rnd.randint(3, size=X.shape[0])
    for name, Regressor in regressors:
        if name in dont_test or name in ('CCA'):
            continue
        elif name in ('OrthogonalMatchingPursuitCV'):
            # FIXME: This test is unstable on Travis, see issue #3190.
            check_skip_travis()
        yield (check_regressors_int, name, Regressor, X,
               multioutput_estimator_convert_y_2d(name, y))
Exemplo n.º 15
0
def set_fast_parameters(estimator):
    # speed up some estimators
    params = estimator.get_params()
    if estimator.__class__.__name__ == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    if ("n_iter" in params
            and estimator.__class__.__name__ != "TSNE"):
        estimator.set_params(n_iter=5)
    if "max_iter" in params:
        # NMF
        if estimator.max_iter is not None:
            estimator.set_params(max_iter=min(5, estimator.max_iter))
        # LinearSVR
        if estimator.__class__.__name__ == 'LinearSVR':
            estimator.set_params(max_iter=20)
    if "n_resampling" in params:
        # randomized lasso
        estimator.set_params(n_resampling=5)
    if "n_estimators" in params:
        # especially gradient boosting with default 100
        estimator.set_params(n_estimators=min(5, estimator.n_estimators))
    if "max_trials" in params:
        # RANSAC
        estimator.set_params(max_trials=10)
    if "n_init" in params:
        # K-Means
        estimator.set_params(n_init=2)

    if estimator.__class__.__name__ == "SelectFdr":
        # be tolerant of noisy datasets (not actually speed)
        estimator.set_params(alpha=.5)

    if estimator.__class__.__name__ == "TheilSenRegressor":
        estimator.max_subpopulation = 100

    if isinstance(estimator, BaseRandomProjection):
        # Due to the jl lemma and often very few samples, the number
        # of components of the random matrix projection will be probably
        # greater than the number of features.
        # So we impose a smaller number (avoid "auto" mode)
        estimator.set_params(n_components=1)

    if isinstance(estimator, SelectKBest):
        # SelectKBest has a default of k=10
        # which is more feature than we have in most case.
        estimator.set_params(k=1)
Exemplo n.º 16
0
def test_omp_cv():
    # FIXME: This test is unstable on Travis, see issue #3190 for more detail.
    check_skip_travis()
    y_ = y[:, 0]
    gamma_ = gamma[:, 0]
    ompcv = OrthogonalMatchingPursuitCV(normalize=True,
                                        fit_intercept=False,
                                        max_iter=10,
                                        cv=5)
    ompcv.fit(X, y_)
    assert_equal(ompcv.n_nonzero_coefs_, n_nonzero_coefs)
    assert_array_almost_equal(ompcv.coef_, gamma_)
    omp = OrthogonalMatchingPursuit(normalize=True,
                                    fit_intercept=False,
                                    n_nonzero_coefs=ompcv.n_nonzero_coefs_)
    omp.fit(X, y_)
    assert_array_almost_equal(ompcv.coef_, omp.coef_)
Exemplo n.º 17
0
def set_fast_parameters(estimator):
    # speed up some estimators
    params = estimator.get_params()
    if estimator.__class__.__name__ == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    if ("n_iter" in params and estimator.__class__.__name__ != "TSNE"):
        estimator.set_params(n_iter=5)
    if "max_iter" in params:
        # NMF
        if estimator.max_iter is not None:
            estimator.set_params(max_iter=min(5, estimator.max_iter))
        # LinearSVR
        if estimator.__class__.__name__ == 'LinearSVR':
            estimator.set_params(max_iter=20)
    if "n_resampling" in params:
        # randomized lasso
        estimator.set_params(n_resampling=5)
    if "n_estimators" in params:
        # especially gradient boosting with default 100
        estimator.set_params(n_estimators=min(5, estimator.n_estimators))
    if "max_trials" in params:
        # RANSAC
        estimator.set_params(max_trials=10)
    if "n_init" in params:
        # K-Means
        estimator.set_params(n_init=2)

    if estimator.__class__.__name__ == "SelectFdr":
        # be tolerant of noisy datasets (not actually speed)
        estimator.set_params(alpha=.5)

    if estimator.__class__.__name__ == "TheilSenRegressor":
        estimator.max_subpopulation = 100

    if isinstance(estimator, BaseRandomProjection):
        # Due to the jl lemma and often very few samples, the number
        # of components of the random matrix projection will be probably
        # greater than the number of features.
        # So we impose a smaller number (avoid "auto" mode)
        estimator.set_params(n_components=1)

    if isinstance(estimator, SelectKBest):
        # SelectKBest has a default of k=10
        # which is more feature than we have in most case.
        estimator.set_params(k=1)
Exemplo n.º 18
0
def check_regressors_pickle(name, Regressor, X, y):
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    if name in ('PLSCanonical', 'PLSRegression', 'CCA'):
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    regressor.fit(X, y_)
    y_pred = regressor.predict(X)
    # store old predictions
    pickled_regressor = pickle.dumps(regressor)
    unpickled_regressor = pickle.loads(pickled_regressor)
    pickled_y_pred = unpickled_regressor.predict(X)
    assert_array_almost_equal(pickled_y_pred, y_pred)
Exemplo n.º 19
0
def check_regressors_pickle(name, Regressor, X, y):
    if name == 'OrthogonalMatchingPursuitCV':
        # FIXME: This test is unstable on Travis, see issue #3190.
        check_skip_travis()
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01

    if name in ('PLSCanonical', 'PLSRegression', 'CCA'):
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    regressor.fit(X, y_)
    y_pred = regressor.predict(X)
    # store old predictions
    pickled_regressor = pickle.dumps(regressor)
    unpickled_regressor = pickle.loads(pickled_regressor)
    pickled_y_pred = unpickled_regressor.predict(X)
    assert_array_almost_equal(pickled_y_pred, y_pred)