def test_parameters_sampler_replacement():
    # raise error if n_iter too large
    params = {'first': [0, 1], 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params, n_iter=7)
    assert_raises(ValueError, list, sampler)
    # degenerates to GridSearchCV if n_iter the same as grid_size
    sampler = ParameterSampler(params, n_iter=6)
    samples = list(sampler)
    assert_equal(len(samples), 6)
    for values in ParameterGrid(params):
        assert_true(values in samples)

    # test sampling without replacement in a large grid
    params = {'a': range(10), 'b': range(10), 'c': range(10)}
    sampler = ParameterSampler(params, n_iter=99, random_state=42)
    samples = list(sampler)
    assert_equal(len(samples), 99)
    hashable_samples = ["a%db%dc%d" % (p['a'], p['b'], p['c'])
                        for p in samples]
    assert_equal(len(set(hashable_samples)), 99)

    # doesn't go into infinite loops
    params_distribution = {'first': bernoulli(.5), 'second': ['a', 'b', 'c']}
    sampler = ParameterSampler(params_distribution, n_iter=7)
    samples = list(sampler)
    assert_equal(len(samples), 7)
示例#2
0
def test_radius_neighbors_classifier_when_no_neighbors():
    """ Test radius-based classifier when no neighbors found.
    In this case it should rise an informative exception """

    X = np.array([[1.0, 1.0], [2.0, 2.0]])
    y = np.array([1, 2])
    radius = 0.1

    z1 = np.array([[1.01, 1.01], [2.01, 2.01]])  # no outliers
    z2 = np.array([[1.01, 1.01], [1.4, 1.4]])    # one outlier

    weight_func = _weight_func

    for outlier_label in [0, -1, None]:
        for algorithm in ALGORITHMS:
            for weights in ['uniform', 'distance', weight_func]:
                rnc = neighbors.RadiusNeighborsClassifier
                clf = rnc(radius=radius, weights=weights, algorithm=algorithm,
                          outlier_label=outlier_label)
                clf.fit(X, y)
                assert_array_equal(np.array([1, 2]),
                                   clf.predict(z1))
                if outlier_label is None:
                    assert_raises(ValueError, clf.predict, z2)
                elif False:
                    assert_array_equal(np.array([1, outlier_label]),
                                       clf.predict(z2))
def test_grid_search_precomputed_kernel():
    """Test that grid search works when the input features are given in the
    form of a precomputed kernel matrix """
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)

    # compute the training kernel matrix corresponding to the linear kernel
    K_train = np.dot(X_[:180], X_[:180].T)
    y_train = y_[:180]

    clf = SVC(kernel='precomputed')
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    cv.fit(K_train, y_train)

    assert_true(cv.best_score_ >= 0)

    # compute the test kernel matrix
    K_test = np.dot(X_[180:], X_[:180].T)
    y_test = y_[180:]

    y_pred = cv.predict(K_test)

    assert_true(np.mean(y_pred == y_test) >= 0)

    # test error is raised when the precomputed kernel is not array-like
    # or sparse
    assert_raises(ValueError, cv.fit, K_train.tolist(), y_train)
示例#4
0
def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   sample_weight=None,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 sample_weight=None, init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1,
                  sample_weight=None)

    # kmeans for algorithm='elkan' raises TypeError on sparse matrix
    assert_raise_message(TypeError, "algorithm='elkan' not supported for "
                         "sparse input X", k_means, X=X_csr, n_clusters=2,
                         sample_weight=None, algorithm="elkan")
def test_grid_search_precomputed_kernel_error_kernel_function():
    """Test that grid search returns an error when using a kernel_function"""
    X_, y_ = make_classification(n_samples=200, n_features=100, random_state=0)
    kernel_function = lambda x1, x2: np.dot(x1, x2.T)
    clf = SVC(kernel=kernel_function)
    cv = GridSearchCV(clf, {'C': [0.1, 1.0]})
    assert_raises(ValueError, cv.fit, X_, y_)
示例#6
0
def test_ovr_multilabel_predict_proba():
    base_clf = MultinomialNB(alpha=1)
    for au in (False, True):
        X, Y = datasets.make_multilabel_classification(n_samples=100,
                                                       n_features=20,
                                                       n_classes=5,
                                                       n_labels=3,
                                                       length=50,
                                                       allow_unlabeled=au,
                                                       return_indicator=True,
                                                       random_state=0)
        X_train, Y_train = X[:80], Y[:80]
        X_test, Y_test = X[80:], Y[80:]
        clf = OneVsRestClassifier(base_clf).fit(X_train, Y_train)

        # decision function only estimator. Fails in current implementation.
        decision_only = OneVsRestClassifier(svm.SVR()).fit(X_train, Y_train)
        assert_raises(AttributeError, decision_only.predict_proba, X_test)

        # Estimator with predict_proba disabled, depending on parameters.
        decision_only = OneVsRestClassifier(svm.SVC(probability=False))
        decision_only.fit(X_train, Y_train)
        assert_raises(AttributeError, decision_only.predict_proba, X_test)

        Y_pred = clf.predict(X_test)
        Y_proba = clf.predict_proba(X_test)

        # predict assigns a label if the probability that the
        # sample has the label is greater than 0.5.
        pred = Y_proba > .5
        assert_array_equal(pred, Y_pred)
def test_k_means_function():
    # test calling the k_means function directly
    # catch output
    old_stdout = sys.stdout
    sys.stdout = StringIO()
    try:
        cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters,
                                                   verbose=True)
    finally:
        sys.stdout = old_stdout
    centers = cluster_centers
    assert_equal(centers.shape, (n_clusters, n_features))

    labels = labels
    assert_equal(np.unique(labels).shape[0], n_clusters)

    # check that the labels assignment are perfect (up to a permutation)
    assert_equal(v_measure_score(true_labels, labels), 1.0)
    assert_greater(inertia, 0.0)

    # check warning when centers are passed
    assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters,
                 init=centers)

    # to many clusters desired
    assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1)
def test_score_memmap():
    # Ensure a scalar score of memmap type is accepted
    iris = load_iris()
    X, y = iris.data, iris.target
    clf = MockClassifier()
    tf = tempfile.NamedTemporaryFile(mode='wb', delete=False)
    tf.write(b'Hello world!!!!!')
    tf.close()
    scores = np.memmap(tf.name, dtype=np.float64)
    score = np.memmap(tf.name, shape=(), mode='r', dtype=np.float64)
    try:
        cross_val_score(clf, X, y, scoring=lambda est, X, y: score)
        # non-scalar should still fail
        assert_raises(ValueError, cross_val_score, clf, X, y,
                      scoring=lambda est, X, y: scores)
    finally:
        # Best effort to release the mmap file handles before deleting the
        # backing file under Windows
        scores, score = None, None
        for _ in range(3):
            try:
                os.unlink(tf.name)
                break
            except WindowsError:
                sleep(1.)
def test_skewed_chi2_sampler():
    """test that RBFSampler approximates kernel on random data"""

    # compute exact kernel
    c = 0.03
    # appreviations for easier formular
    X_c = (X + c)[:, np.newaxis, :]
    Y_c = (Y + c)[np.newaxis, :, :]

    # we do it in log-space in the hope that it's more stable
    # this array is n_samples_x x n_samples_y big x n_features
    log_kernel = ((np.log(X_c) / 2.) + (np.log(Y_c) / 2.) + np.log(2.) -
                  np.log(X_c + Y_c))
    # reduce to n_samples_x x n_samples_y by summing over features in log-space
    kernel = np.exp(log_kernel.sum(axis=2))

    # approximate kernel mapping
    transform = SkewedChi2Sampler(skewedness=c, n_components=1000,
                                  random_state=42)
    X_trans = transform.fit_transform(X)
    Y_trans = transform.transform(Y)

    kernel_approx = np.dot(X_trans, Y_trans.T)
    assert_array_almost_equal(kernel, kernel_approx, 1)

    # test error is raised on negative input
    Y_neg = Y.copy()
    Y_neg[0, 0] = -1
    assert_raises(ValueError, transform.transform, Y_neg)
def check_transformers_unfitted(name, Transformer):
    X, y = _boston_subset()

    with warnings.catch_warnings(record=True):
        transformer = Transformer()

    assert_raises(NotFittedError, transformer.transform, X)
def check_regressors_train(name, Regressor):
    X, y = _boston_subset()
    y = StandardScaler().fit_transform(y)   # X is already scaled
    y = multioutput_estimator_convert_y_2d(name, y)
    rnd = np.random.RandomState(0)
    # catch deprecation warnings
    with warnings.catch_warnings(record=True):
        regressor = Regressor()
    set_fast_parameters(regressor)
    if not hasattr(regressor, 'alphas') and hasattr(regressor, 'alpha'):
        # linear regressors need to set alpha, but not generalized CV ones
        regressor.alpha = 0.01
    if name == 'PassiveAggressiveRegressor':
        regressor.C = 0.01

    # raises error on malformed input for fit
    assert_raises(ValueError, regressor.fit, X, y[:-1])
    # fit
    if name in CROSS_DECOMPOSITION:
        y_ = np.vstack([y, 2 * y + rnd.randint(2, size=len(y))])
        y_ = y_.T
    else:
        y_ = y
    set_random_state(regressor)
    regressor.fit(X, y_)
    regressor.fit(X.tolist(), y_.tolist())
    regressor.predict(X)

    # TODO: find out why PLS and CCA fail. RANSAC is random
    # and furthermore assumes the presence of outliers, hence
    # skipped
    if name not in ('PLSCanonical', 'CCA', 'RANSACRegressor'):
        print(regressor)
        assert_greater(regressor.score(X, y_), 0.5)
示例#12
0
def check_min_samples_leaf(name):
    X, y = hastie_X, hastie_y

    # Test if leaves contain more than leaf_count training examples
    ForestEstimator = FOREST_ESTIMATORS[name]

    # test boundary value
    assert_raises(ValueError,
                  ForestEstimator(min_samples_leaf=-1).fit, X, y)
    assert_raises(ValueError,
                  ForestEstimator(min_samples_leaf=0).fit, X, y)

    est = ForestEstimator(min_samples_leaf=5, n_estimators=1, random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), 4,
                   "Failed with {0}".format(name))

    est = ForestEstimator(min_samples_leaf=0.25, n_estimators=1,
                          random_state=0)
    est.fit(X, y)
    out = est.estimators_[0].tree_.apply(X)
    node_counts = np.bincount(out)
    # drop inner nodes
    leaf_count = node_counts[node_counts != 0]
    assert_greater(np.min(leaf_count), len(X) * 0.25 - 1,
                   "Failed with {0}".format(name))
示例#13
0
def test_set_params():
    # test nested estimator parameter setting
    clf = Pipeline([("svc", SVC())])
    # non-existing parameter in svc
    assert_raises(ValueError, clf.set_params, svc__stupid_param=True)
    # non-existing parameter of pipeline
    assert_raises(ValueError, clf.set_params, svm__stupid_param=True)
示例#14
0
def test_multioutput_number_of_output_differ():
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0], [1, 0], [0, 0]])

    for name in MULTIOUTPUT_METRICS:
        metric = ALL_METRICS[name]
        assert_raises(ValueError, metric, y_true, y_pred)
示例#15
0
def test_scikit_vs_scipy():
    """Test scikit linkage with full connectivity (i.e. unstructured) vs scipy
    """
    n, p, k = 10, 5, 3
    rng = np.random.RandomState(0)

    # Not using a lil_matrix here, just to check that non sparse
    # matrices are well handled
    connectivity = np.ones((n, n))
    for linkage in _TREE_BUILDERS.keys():
        for i in range(5):
            X = .1 * rng.normal(size=(n, p))
            X -= 4. * np.arange(n)[:, np.newaxis]
            X -= X.mean(axis=1)[:, np.newaxis]

            out = hierarchy.linkage(X, method=linkage)

            children_ = out[:, :2].astype(np.int)
            children, _, n_leaves, _ = _TREE_BUILDERS[linkage](X, connectivity)

            cut = _hc_cut(k, children, n_leaves)
            cut_ = _hc_cut(k, children_, n_leaves)
            assess_same_labelling(cut, cut_)

    # Test error management in _hc_cut
    assert_raises(ValueError, _hc_cut, n_leaves + 1, children, n_leaves)
示例#16
0
def test_constant_size_multioutput_regressor():
    random_state = np.random.RandomState(seed=1)
    X = random_state.randn(10, 10)
    y = random_state.randn(10, 5)

    est = DummyRegressor(strategy='constant', constant=[1, 2, 3, 4])
    assert_raises(ValueError, est.fit, X, y)
示例#17
0
def test_cross_val_score():
    clf = MockClassifier()
    for a in range(-10, 10):
        clf.a = a
        # Smoke test
        scores = cval.cross_val_score(clf, X, y)
        assert_array_equal(scores, clf.score(X, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

        scores = cval.cross_val_score(clf, X_sparse, y)
        assert_array_equal(scores, clf.score(X_sparse, y))

        # test with multioutput y
        scores = cval.cross_val_score(clf, X_sparse, X)
        assert_array_equal(scores, clf.score(X_sparse, X))

    # test with X as list
    clf = MockListClassifier()
    scores = cval.cross_val_score(clf, X.tolist(), y)

    assert_raises(ValueError, cval.cross_val_score, clf, X, y,
                  scoring="sklearn")
示例#18
0
def test_nan():
    # Test proper NaN handling.
    # Regression test for Issue #252: fit used to go into an infinite loop.
    Xnan = np.array(X, dtype=np.float64)
    Xnan[0, 1] = np.nan
    logistic = LogisticRegression(random_state=0)
    assert_raises(ValueError, logistic.fit, Xnan, Y1)
示例#19
0
def test_spectral_amg_mode():
    # Test the amg mode of SpectralClustering
    centers = np.array([
        [0., 0., 0.],
        [10., 10., 10.],
        [20., 20., 20.],
    ])
    X, true_labels = make_blobs(n_samples=100, centers=centers,
                                cluster_std=1., random_state=42)
    D = pairwise_distances(X)  # Distance matrix
    S = np.max(D) - D  # Similarity matrix
    S = sparse.coo_matrix(S)
    try:
        from pyamg import smoothed_aggregation_solver
        amg_loaded = True
    except ImportError:
        amg_loaded = False
    if amg_loaded:
        labels = spectral_clustering(S, n_clusters=len(centers),
                                     random_state=0, mode="amg")
        # We don't care too much that it's good, just that it *worked*.
        # There does have to be some lower limit on the performance though.
        assert_greater(np.mean(labels == true_labels), .3)
    else:
        assert_raises(ValueError, spectral_embedding, S,
                      n_components=len(centers), random_state=0, mode="amg")
def test_warm_start_smaller_n_estimators(Cls):
    # Test if warm start with smaller n_estimators raises error
    X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1)
    est = Cls(n_estimators=100, max_depth=1, warm_start=True)
    est.fit(X, y)
    est.set_params(n_estimators=99)
    assert_raises(ValueError, est.fit, X, y)
示例#21
0
def test_logistic_regressioncv_class_weights():
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               n_classes=3, random_state=0)

    # Test the liblinear fails when class_weight of type dict is
    # provided, when it is multiclass. However it can handle
    # binary problems.
    clf_lib = LogisticRegressionCV(class_weight={0: 0.1, 1: 0.2},
                                   solver='liblinear')
    assert_raises(ValueError, clf_lib.fit, X, y)
    y_ = y.copy()
    y_[y == 2] = 1
    clf_lib.fit(X, y_)
    assert_array_equal(clf_lib.classes_, [0, 1])

    # Test for class_weight=auto
    X, y = make_classification(n_samples=20, n_features=20, n_informative=10,
                               random_state=0)
    clf_lbf = LogisticRegressionCV(solver='lbfgs', fit_intercept=False,
                                   class_weight='auto')
    clf_lbf.fit(X, y)
    clf_lib = LogisticRegressionCV(solver='liblinear', fit_intercept=False,
                                   class_weight='auto')
    clf_lib.fit(X, y)
    assert_array_almost_equal(clf_lib.coef_, clf_lbf.coef_, decimal=4)
def check_boston(presort, loss, subsample):
    # Check consistency on dataset boston house prices with least squares
    # and least absolute deviation.
    ones = np.ones(len(boston.target))
    last_y_pred = None
    for sample_weight in None, ones, 2 * ones:
        clf = GradientBoostingRegressor(n_estimators=100,
                                        loss=loss,
                                        max_depth=4,
                                        subsample=subsample,
                                        min_samples_split=2,
                                        random_state=1,
                                        presort=presort)

        assert_raises(ValueError, clf.predict, boston.data)
        clf.fit(boston.data, boston.target,
                sample_weight=sample_weight)
        leaves = clf.apply(boston.data)
        assert_equal(leaves.shape, (506, 100))

        y_pred = clf.predict(boston.data)
        mse = mean_squared_error(boston.target, y_pred)
        assert_less(mse, 6.0)

        if last_y_pred is not None:
            assert_array_almost_equal(last_y_pred, y_pred)

        last_y_pred = y_pred
def test_multilabel_binarizer_non_integer_labels():
    tuple_classes = np.empty(3, dtype=object)
    tuple_classes[:] = [(1,), (2,), (3,)]
    inputs = [
        ([('2', '3'), ('1',), ('1', '2')], ['1', '2', '3']),
        ([('b', 'c'), ('a',), ('a', 'b')], ['a', 'b', 'c']),
        ([((2,), (3,)), ((1,),), ((1,), (2,))], tuple_classes),
    ]
    indicator_mat = np.array([[0, 1, 1],
                              [1, 0, 0],
                              [1, 1, 0]])
    for inp, classes in inputs:
        # fit_transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit_transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

        # fit().transform()
        mlb = MultiLabelBinarizer()
        assert_array_equal(mlb.fit(inp).transform(inp), indicator_mat)
        assert_array_equal(mlb.classes_, classes)
        assert_array_equal(mlb.inverse_transform(indicator_mat), inp)

    mlb = MultiLabelBinarizer()
    assert_raises(TypeError, mlb.fit_transform, [({}), ({}, {'a': 'b'})])
def test_thresholded_scorers():
    """Test scorers that take thresholds."""
    X, y = make_blobs(random_state=0, centers=2)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf = LogisticRegression(random_state=0)
    clf.fit(X_train, y_train)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.decision_function(X_test))
    score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)
    assert_almost_equal(score1, score3)

    logscore = SCORERS['log_loss'](clf, X_test, y_test)
    logloss = log_loss(y_test, clf.predict_proba(X_test))
    assert_almost_equal(-logscore, logloss)

    # same for an estimator without decision_function
    clf = DecisionTreeClassifier()
    clf.fit(X_train, y_train)
    score1 = SCORERS['roc_auc'](clf, X_test, y_test)
    score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1])
    assert_almost_equal(score1, score2)

    # Test that an exception is raised on more than two classes
    X, y = make_blobs(random_state=0, centers=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
    clf.fit(X_train, y_train)
    assert_raises(ValueError, SCORERS['roc_auc'], clf, X_test, y_test)
示例#25
0
def test_partial_fit():
    # Checks whether inserting array is consistent with fitted data.
    # `partial_fit` method should set all attribute values correctly.
    n_samples = 12
    n_samples_partial_fit = 3
    n_features = 2
    rng = np.random.RandomState(42)
    X = rng.rand(n_samples, n_features)
    X_partial_fit = rng.rand(n_samples_partial_fit, n_features)

    lshf = ignore_warnings(LSHForest, category=DeprecationWarning)()

    # Test unfitted estimator
    ignore_warnings(lshf.partial_fit)(X)
    assert_array_equal(X, lshf._fit_X)

    ignore_warnings(lshf.fit)(X)

    # Insert wrong dimension
    assert_raises(ValueError, lshf.partial_fit,
                  np.random.randn(n_samples_partial_fit, n_features - 1))

    ignore_warnings(lshf.partial_fit)(X_partial_fit)

    # size of _input_array = samples + 1 after insertion
    assert_equal(lshf._fit_X.shape[0],
                 n_samples + n_samples_partial_fit)
    # size of original_indices_[1] = samples + 1
    assert_equal(len(lshf.original_indices_[0]),
                 n_samples + n_samples_partial_fit)
    # size of trees_[1] = samples + 1
    assert_equal(len(lshf.trees_[1]),
                 n_samples + n_samples_partial_fit)
示例#26
0
def test_kernel_density_sampling(n_samples=100, n_features=3):
    rng = np.random.RandomState(0)
    X = rng.randn(n_samples, n_features)

    bandwidth = 0.2

    for kernel in ['gaussian', 'tophat']:
        # draw a tophat sample
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        samp = kde.sample(100)
        assert_equal(X.shape, samp.shape)

        # check that samples are in the right range
        nbrs = NearestNeighbors(n_neighbors=1).fit(X)
        dist, ind = nbrs.kneighbors(X, return_distance=True)

        if kernel == 'tophat':
            assert np.all(dist < bandwidth)
        elif kernel == 'gaussian':
            # 5 standard deviations is safe for 100 samples, but there's a
            # very small chance this test could fail.
            assert np.all(dist < 5 * bandwidth)

    # check unsupported kernels
    for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']:
        kde = KernelDensity(bandwidth, kernel=kernel).fit(X)
        assert_raises(NotImplementedError, kde.sample, 100)

    # non-regression test: used to return a scalar
    X = rng.randn(4, 1)
    kde = KernelDensity(kernel="gaussian").fit(X)
    assert_equal(kde.sample().shape, (1, 1))
示例#27
0
def test_pca_randomized_solver():
    # PCA on dense arrays
    X = iris.data

    # Loop excluding the 0, invalid for randomized
    for n_comp in np.arange(1, X.shape[1]):
        pca = PCA(n_components=n_comp, svd_solver='randomized', random_state=0)

        X_r = pca.fit(X).transform(X)
        np.testing.assert_equal(X_r.shape[1], n_comp)

        X_r2 = pca.fit_transform(X)
        assert_array_almost_equal(X_r, X_r2)

        X_r = pca.transform(X)
        assert_array_almost_equal(X_r, X_r2)

        # Test get_covariance and get_precision
        cov = pca.get_covariance()
        precision = pca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]), 12)

    pca = PCA(n_components=0, svd_solver='randomized', random_state=0)
    assert_raises(ValueError, pca.fit, X)

    pca = PCA(n_components=0, svd_solver='randomized', random_state=0)
    assert_raises(ValueError, pca.fit, X)
    # Check internal state
    assert_equal(pca.n_components,
                 PCA(n_components=0,
                     svd_solver='randomized', random_state=0).n_components)
    assert_equal(pca.svd_solver,
                 PCA(n_components=0,
                     svd_solver='randomized', random_state=0).svd_solver)
def test_zero_estimator_clf():
    # Test if ZeroEstimator works for classification.
    X = iris.data
    y = np.array(iris.target)
    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init=ZeroEstimator())
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='zero')
    est.fit(X, y)

    assert_greater(est.score(X, y), 0.96)

    # binary clf
    mask = y != 0
    y[mask] = 1
    y[~mask] = 0
    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='zero')
    est.fit(X, y)
    assert_greater(est.score(X, y), 0.96)

    est = GradientBoostingClassifier(n_estimators=20, max_depth=1,
                                     random_state=1, init='foobar')
    assert_raises(ValueError, est.fit, X, y)
def test_ward_clustering():
    """
    Check that we obtain the correct number of clusters with Ward clustering.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    X = rnd.randn(100, 50)
    connectivity = grid_to_graph(*mask.shape)
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    clustering.fit(X)
    # test caching
    clustering = Ward(n_clusters=10, connectivity=connectivity,
                      memory=mkdtemp())
    clustering.fit(X)
    labels = clustering.labels_
    assert_true(np.size(np.unique(labels)) == 10)
    # Turn caching off now
    clustering = Ward(n_clusters=10, connectivity=connectivity)
    # Check that we obtain the same solution with early-stopping of the
    # tree building
    clustering.compute_full_tree = False
    clustering.fit(X)
    np.testing.assert_array_equal(clustering.labels_, labels)
    clustering.connectivity = None
    clustering.fit(X)
    assert_true(np.size(np.unique(clustering.labels_)) == 10)
    # Check that we raise a TypeError on dense matrices
    clustering = Ward(n_clusters=10,
                      connectivity=connectivity.todense())
    assert_raises(TypeError, clustering.fit, X)
    clustering = Ward(n_clusters=10,
                      connectivity=sparse.lil_matrix(
                          connectivity.todense()[:10, :10]))
    assert_raises(ValueError, clustering.fit, X)
def test_staged_predict_proba():
    # Test whether staged predict proba eventually gives
    # the same prediction.
    X, y = datasets.make_hastie_10_2(n_samples=1200,
                                     random_state=1)
    X_train, y_train = X[:200], y[:200]
    X_test, y_test = X[200:], y[200:]
    clf = GradientBoostingClassifier(n_estimators=20)
    # test raise NotFittedError if not fitted
    assert_raises(NotFittedError, lambda X: np.fromiter(
        clf.staged_predict_proba(X), dtype=np.float64), X_test)

    clf.fit(X_train, y_train)

    # test if prediction for last stage equals ``predict``
    for y_pred in clf.staged_predict(X_test):
        assert_equal(y_test.shape, y_pred.shape)

    assert_array_equal(clf.predict(X_test), y_pred)

    # test if prediction for last stage equals ``predict_proba``
    for staged_proba in clf.staged_predict_proba(X_test):
        assert_equal(y_test.shape[0], staged_proba.shape[0])
        assert_equal(2, staged_proba.shape[1])

    assert_array_almost_equal(clf.predict_proba(X_test), staged_proba)
示例#31
0
def test_bad_input():
    # Test that it gives proper exception on deficient input
    # impossible value of C
    assert_raises(ValueError, svm.SVC(C=-1).fit, X, Y)

    # impossible value of nu
    clf = svm.NuSVC(nu=0.0)
    assert_raises(ValueError, clf.fit, X, Y)

    Y2 = Y[:-1]  # wrong dimensions for labels
    assert_raises(ValueError, clf.fit, X, Y2)

    # Test with arrays that are non-contiguous.
    for clf in (svm.SVC(), svm.LinearSVC(random_state=0)):
        Xf = np.asfortranarray(X)
        assert not Xf.flags['C_CONTIGUOUS']
        yf = np.ascontiguousarray(np.tile(Y, (2, 1)).T)
        yf = yf[:, -1]
        assert not yf.flags['F_CONTIGUOUS']
        assert not yf.flags['C_CONTIGUOUS']
        clf.fit(Xf, yf)
        assert_array_equal(clf.predict(T), true_result)

    # error for precomputed kernelsx
    clf = svm.SVC(kernel='precomputed')
    assert_raises(ValueError, clf.fit, X, Y)

    # sample_weight bad dimensions
    clf = svm.SVC()
    assert_raises(ValueError, clf.fit, X, Y, sample_weight=range(len(X) - 1))

    # predict with sparse input when trained with dense
    clf = svm.SVC().fit(X, Y)
    assert_raises(ValueError, clf.predict, sparse.lil_matrix(X))

    Xt = np.array(X).T
    clf.fit(np.dot(X, Xt), Y)
    assert_raises(ValueError, clf.predict, X)

    clf = svm.SVC()
    clf.fit(X, Y)
    assert_raises(ValueError, clf.predict, Xt)
示例#32
0
def test_stratified_shuffle_split_init():
    y = np.asarray([0, 1, 1, 1, 2, 2, 2])
    # Check that error is raised if there is a class with only one sample
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, 3, 0.2)

    # Check that error is raised if the test set size is smaller than n_classes
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, 3, 2)
    # Check that error is raised if the train set size is smaller than
    # n_classes
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, 3, 3, 2)

    y = np.asarray([0, 0, 0, 1, 1, 1, 2, 2, 2])
    # Check that errors are raised if there is not enough samples
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, 3, 0.5, 0.6)
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, 3, 8, 0.6)
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, 3, 0.6, 8)

    # Train size or test size too small
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, train_size=2)
    assert_raises(ValueError, cval.StratifiedShuffleSplit, y, test_size=2)
示例#33
0
def check_label_kfold(shuffle):
    rng = np.random.RandomState(0)

    # Parameters of the test
    n_labels = 15
    n_samples = 1000
    n_folds = 5

    # Construct the test data
    tolerance = 0.05 * n_samples  # 5 percent error allowed
    labels = rng.randint(0, n_labels, n_samples)
    folds = cval.LabelKFold(labels,
                            n_folds=n_folds,
                            shuffle=shuffle,
                            random_state=rng).idxs
    ideal_n_labels_per_fold = n_samples // n_folds

    # Check that folds have approximately the same size
    assert_equal(len(folds), len(labels))
    for i in np.unique(folds):
        assert_greater_equal(tolerance,
                             abs(sum(folds == i) - ideal_n_labels_per_fold))

    # Check that each label appears only in 1 fold
    for label in np.unique(labels):
        assert_equal(len(np.unique(folds[labels == label])), 1)

    # Check that no label is on both sides of the split
    labels = np.asarray(labels, dtype=object)
    for train, test in cval.LabelKFold(labels,
                                       n_folds=n_folds,
                                       shuffle=shuffle,
                                       random_state=rng):
        assert_equal(len(np.intersect1d(labels[train], labels[test])), 0)

    # Construct the test data
    labels = [
        'Albert', 'Jean', 'Bertrand', 'Michel', 'Jean', 'Francis', 'Robert',
        'Michel', 'Rachel', 'Lois', 'Michelle', 'Bernard', 'Marion', 'Laura',
        'Jean', 'Rachel', 'Franck', 'John', 'Gael', 'Anna', 'Alix', 'Robert',
        'Marion', 'David', 'Tony', 'Abel', 'Becky', 'Madmood', 'Cary', 'Mary',
        'Alexandre', 'David', 'Francis', 'Barack', 'Abdoul', 'Rasha', 'Xi',
        'Silvia'
    ]
    labels = np.asarray(labels, dtype=object)

    n_labels = len(np.unique(labels))
    n_samples = len(labels)
    n_folds = 5
    tolerance = 0.05 * n_samples  # 5 percent error allowed
    folds = cval.LabelKFold(labels,
                            n_folds=n_folds,
                            shuffle=shuffle,
                            random_state=rng).idxs
    ideal_n_labels_per_fold = n_samples // n_folds

    # Check that folds have approximately the same size
    assert_equal(len(folds), len(labels))
    for i in np.unique(folds):
        assert_greater_equal(tolerance,
                             abs(sum(folds == i) - ideal_n_labels_per_fold))

    # Check that each label appears only in 1 fold
    for label in np.unique(labels):
        assert_equal(len(np.unique(folds[labels == label])), 1)

    # Check that no label is on both sides of the split
    for train, test in cval.LabelKFold(labels,
                                       n_folds=n_folds,
                                       shuffle=shuffle,
                                       random_state=rng):
        assert_equal(len(np.intersect1d(labels[train], labels[test])), 0)

    # Should fail if there are more folds than labels
    labels = np.array([1, 1, 1, 2, 2])
    assert_raises(ValueError, cval.LabelKFold, labels, n_folds=3)
示例#34
0
def test_kfold_valueerrors():
    # Check that errors are raised if there is not enough samples
    assert_raises(ValueError, cval.KFold, 3, 4)

    # Check that a warning is raised if the least populated class has too few
    # members.
    y = [3, 3, -1, -1, 2]

    cv = assert_warns_message(Warning, "The least populated class",
                              cval.StratifiedKFold, y, 3)

    # Check that despite the warning the folds are still computed even
    # though all the classes are not necessarily represented at on each
    # side of the split at each split
    check_cv_coverage(cv, expected_n_iter=3, n_samples=len(y))

    # Error when number of folds is <= 1
    assert_raises(ValueError, cval.KFold, 2, 0)
    assert_raises(ValueError, cval.KFold, 2, 1)
    assert_raises(ValueError, cval.StratifiedKFold, y, 0)
    assert_raises(ValueError, cval.StratifiedKFold, y, 1)

    # When n is not integer:
    assert_raises(ValueError, cval.KFold, 2.5, 2)

    # When n_folds is not integer:
    assert_raises(ValueError, cval.KFold, 5, 1.5)
    assert_raises(ValueError, cval.StratifiedKFold, y, 1.5)
示例#35
0
def test_shufflesplit_errors():
    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=2.0)
    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=1.0)
    assert_raises(ValueError,
                  cval.ShuffleSplit,
                  10,
                  test_size=0.1,
                  train_size=0.95)
    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=11)
    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=10)
    assert_raises(ValueError, cval.ShuffleSplit, 10, test_size=8, train_size=3)
    assert_raises(ValueError, cval.ShuffleSplit, 10, train_size=1j)
    assert_raises(ValueError,
                  cval.ShuffleSplit,
                  10,
                  test_size=None,
                  train_size=None)
示例#36
0
def test_precomputed():
    # SVC with a precomputed kernel.
    # We test it with a toy dataset and with iris.
    clf = svm.SVC(kernel='precomputed')
    # Gram matrix for train data (square matrix)
    # (we use just a linear kernel)
    K = np.dot(X, np.array(X).T)
    clf.fit(K, Y)
    # Gram matrix for test data (rectangular matrix)
    KT = np.dot(T, np.array(X).T)
    pred = clf.predict(KT)
    assert_raises(ValueError, clf.predict, KT.T)

    assert_array_equal(clf.dual_coef_, [[-0.25, .25]])
    assert_array_equal(clf.support_, [1, 3])
    assert_array_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.support_, [1, 3])
    assert_array_equal(pred, true_result)

    # Gram matrix for test data but compute KT[i,j]
    # for support vectors j only.
    KT = np.zeros_like(KT)
    for i in range(len(T)):
        for j in clf.support_:
            KT[i, j] = np.dot(T[i], X[j])

    pred = clf.predict(KT)
    assert_array_equal(pred, true_result)

    # same as before, but using a callable function instead of the kernel
    # matrix. kernel is just a linear kernel

    kfunc = lambda x, y: np.dot(x, y.T)
    clf = svm.SVC(kernel=kfunc)
    clf.fit(X, Y)
    pred = clf.predict(T)

    assert_array_equal(clf.dual_coef_, [[-0.25, .25]])
    assert_array_equal(clf.intercept_, [0])
    assert_array_almost_equal(clf.support_, [1, 3])
    assert_array_equal(pred, true_result)

    # test a precomputed kernel with the iris dataset
    # and check parameters against a linear SVC
    clf = svm.SVC(kernel='precomputed')
    clf2 = svm.SVC(kernel='linear')
    K = np.dot(iris.data, iris.data.T)
    clf.fit(K, iris.target)
    clf2.fit(iris.data, iris.target)
    pred = clf.predict(K)
    assert_array_almost_equal(clf.support_, clf2.support_)
    assert_array_almost_equal(clf.dual_coef_, clf2.dual_coef_)
    assert_array_almost_equal(clf.intercept_, clf2.intercept_)
    assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)

    # Gram matrix for test data but compute KT[i,j]
    # for support vectors j only.
    K = np.zeros_like(K)
    for i in range(len(iris.data)):
        for j in clf.support_:
            K[i, j] = np.dot(iris.data[i], iris.data[j])

    pred = clf.predict(K)
    assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)

    clf = svm.SVC(kernel=kfunc)
    clf.fit(iris.data, iris.target)
    assert_almost_equal(np.mean(pred == iris.target), .99, decimal=2)
示例#37
0
def test_svc_bad_kernel():
    svc = svm.SVC(kernel=lambda x, y: x)
    assert_raises(ValueError, svc.fit, X, Y)
示例#38
0
def check_unfitted_feature_importances(name):
    assert_raises(ValueError, getattr, FOREST_ESTIMATORS[name](random_state=0),
                  "feature_importances_")
示例#39
0
 def test_prediction_proba_parameter(self):
     with assert_raises(ValueError):
         self.clf.predict_proba(self.X_test, method='something')
示例#40
0
def test_check_array():
    # accept_sparse == False
    # raise error on sparse inputs
    X = [[1, 2], [3, 4]]
    X_csr = sp.csr_matrix(X)
    assert_raises(TypeError, check_array, X_csr)
    # ensure_2d=False
    X_array = check_array([0, 1, 2], ensure_2d=False)
    assert X_array.ndim == 1
    # ensure_2d=True with 1d array
    assert_raise_message(ValueError,
                         'Expected 2D array, got 1D array instead',
                         check_array, [0, 1, 2],
                         ensure_2d=True)
    # ensure_2d=True with scalar array
    assert_raise_message(ValueError,
                         'Expected 2D array, got scalar array instead',
                         check_array,
                         10,
                         ensure_2d=True)
    # don't allow ndim > 3
    X_ndim = np.arange(8).reshape(2, 2, 2)
    assert_raises(ValueError, check_array, X_ndim)
    check_array(X_ndim, allow_nd=True)  # doesn't raise

    # dtype and order enforcement.
    X_C = np.arange(4).reshape(2, 2).copy("C")
    X_F = X_C.copy("F")
    X_int = X_C.astype(np.int)
    X_float = X_C.astype(np.float)
    Xs = [X_C, X_F, X_int, X_float]
    dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object]
    orders = ['C', 'F', None]
    copys = [True, False]

    for X, dtype, order, copy in product(Xs, dtypes, orders, copys):
        X_checked = check_array(X, dtype=dtype, order=order, copy=copy)
        if dtype is not None:
            assert X_checked.dtype == dtype
        else:
            assert X_checked.dtype == X.dtype
        if order == 'C':
            assert X_checked.flags['C_CONTIGUOUS']
            assert not X_checked.flags['F_CONTIGUOUS']
        elif order == 'F':
            assert X_checked.flags['F_CONTIGUOUS']
            assert not X_checked.flags['C_CONTIGUOUS']
        if copy:
            assert X is not X_checked
        else:
            # doesn't copy if it was already good
            if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS']
                    == X.flags['C_CONTIGUOUS']
                    and X_checked.flags['F_CONTIGUOUS']
                    == X.flags['F_CONTIGUOUS']):
                assert X is X_checked

    # allowed sparse != None
    X_csc = sp.csc_matrix(X_C)
    X_coo = X_csc.tocoo()
    X_dok = X_csc.todok()
    X_int = X_csc.astype(np.int)
    X_float = X_csc.astype(np.float)

    Xs = [X_csc, X_coo, X_dok, X_int, X_float]
    accept_sparses = [['csr', 'coo'], ['coo', 'dok']]
    for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses,
                                                 copys):
        with warnings.catch_warnings(record=True) as w:
            X_checked = check_array(X,
                                    dtype=dtype,
                                    accept_sparse=accept_sparse,
                                    copy=copy)
        if (dtype is object or sp.isspmatrix_dok(X)) and len(w):
            message = str(w[0].message)
            messages = [
                "object dtype is not supported by sparse matrices",
                "Can't check dok sparse matrix for nan or inf."
            ]
            assert message in messages
        else:
            assert len(w) == 0
        if dtype is not None:
            assert X_checked.dtype == dtype
        else:
            assert X_checked.dtype == X.dtype
        if X.format in accept_sparse:
            # no change if allowed
            assert X.format == X_checked.format
        else:
            # got converted
            assert X_checked.format == accept_sparse[0]
        if copy:
            assert X is not X_checked
        else:
            # doesn't copy if it was already good
            if X.dtype == X_checked.dtype and X.format == X_checked.format:
                assert X is X_checked

    # other input formats
    # convert lists to arrays
    X_dense = check_array([[1, 2], [3, 4]])
    assert isinstance(X_dense, np.ndarray)
    # raise on too deep lists
    assert_raises(ValueError, check_array, X_ndim.tolist())
    check_array(X_ndim.tolist(), allow_nd=True)  # doesn't raise
    # convert weird stuff to arrays
    X_no_array = _NotAnArray(X_dense)
    result = check_array(X_no_array)
    assert isinstance(result, np.ndarray)

    # deprecation warning if string-like array with dtype="numeric"
    expected_warn_regex = r"converted to decimal numbers if dtype='numeric'"
    X_str = [['11', '12'], ['13', 'xx']]
    for X in [X_str, np.array(X_str, dtype='U'), np.array(X_str, dtype='S')]:
        with pytest.warns(FutureWarning, match=expected_warn_regex):
            check_array(X, dtype="numeric")

    # deprecation warning if byte-like array with dtype="numeric"
    X_bytes = [[b'a', b'b'], [b'c', b'd']]
    for X in [X_bytes, np.array(X_bytes, dtype='V1')]:
        with pytest.warns(FutureWarning, match=expected_warn_regex):
            check_array(X, dtype="numeric")
示例#41
0
def test_pipeline_init():
    # Test the various init parameters of the pipeline.
    assert_raises(TypeError, Pipeline)
    # Check that we can't instantiate pipelines with objects without fit
    # method
    assert_raises_regex(
        TypeError, 'Last step of Pipeline should implement fit. '
        '.*NoFit.*', Pipeline, [('clf', NoFit())])
    # Smoke test with only an estimator
    clf = NoTrans()
    pipe = Pipeline([('svc', clf)])
    assert_equal(
        pipe.get_params(deep=True),
        dict(svc__a=None, svc__b=None, svc=clf, **pipe.get_params(deep=False)))

    # Check that params are set
    pipe.set_params(svc__a=0.1)
    assert_equal(clf.a, 0.1)
    assert_equal(clf.b, None)
    # Smoke test the repr:
    repr(pipe)

    # Test with two objects
    clf = SVC()
    filter1 = SelectKBest(f_classif)
    pipe = Pipeline([('anova', filter1), ('svc', clf)])

    # Check that we can't instantiate with non-transformers on the way
    # Note that NoTrans implements fit, but not transform
    assert_raises_regex(
        TypeError, 'All intermediate steps should be transformers'
        '.*\\bNoTrans\\b.*', Pipeline, [('t', NoTrans()), ('svc', clf)])

    # Check that params are set
    pipe.set_params(svc__C=0.1)
    assert_equal(clf.C, 0.1)
    # Smoke test the repr:
    repr(pipe)

    # Check that params are not set when naming them wrong
    assert_raises(ValueError, pipe.set_params, anova__C=0.1)

    # Test clone
    pipe2 = clone(pipe)
    assert_false(pipe.named_steps['svc'] is pipe2.named_steps['svc'])

    # Check that apart from estimators, the parameters are the same
    params = pipe.get_params(deep=True)
    params2 = pipe2.get_params(deep=True)

    for x in pipe.get_params(deep=False):
        params.pop(x)

    for x in pipe2.get_params(deep=False):
        params2.pop(x)

    # Remove estimators that where copied
    params.pop('svc')
    params.pop('anova')
    params2.pop('svc')
    params2.pop('anova')
    assert_equal(params, params2)
def test_iforest_error():
    """Test that it gives proper exception on deficient input."""
    X = iris.data

    # Test max_samples
    assert_raises(ValueError, IsolationForest(max_samples=-1).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=0.0).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=2.0).fit, X)
    # The dataset has less than 256 samples, explicitly setting
    # max_samples > n_samples should result in a warning. If not set
    # explicitly there should be no warning
    assert_warns_message(
        UserWarning, "max_samples will be set to n_samples for estimation",
        IsolationForest(max_samples=1000).fit, X)
    assert_no_warnings(IsolationForest(max_samples='auto').fit, X)
    assert_no_warnings(IsolationForest(max_samples=np.int64(2)).fit, X)
    assert_raises(ValueError, IsolationForest(max_samples='foobar').fit, X)
    assert_raises(ValueError, IsolationForest(max_samples=1.5).fit, X)

    # test X_test n_features match X_train one:
    assert_raises(ValueError, IsolationForest().fit(X).predict, X[:, 1:])
示例#43
0
def test_neighbors_badargs():
    """Test bad argument values: these should all raise ValueErrors"""
    assert_raises(ValueError,
                  neighbors.NearestNeighbors,
                  algorithm='blah')

    X = rng.random_sample((10, 2))
    Xsparse = csr_matrix(X)
    y = np.ones(10)

    for cls in (neighbors.KNeighborsClassifier,
                neighbors.RadiusNeighborsClassifier,
                neighbors.KNeighborsRegressor,
                neighbors.RadiusNeighborsRegressor):
        assert_raises(ValueError,
                      cls,
                      weights='blah')
        assert_raises(ValueError,
                      cls, p=-1)
        assert_raises(ValueError,
                      cls, algorithm='blah')
        nbrs = cls(algorithm='ball_tree', metric='haversine')
        assert_raises(ValueError,
                      nbrs.predict,
                      X)
        assert_raises(ValueError,
                      ignore_warnings(nbrs.fit),
                      Xsparse, y)
        nbrs = cls()
        assert_raises(ValueError,
                      nbrs.fit,
                      np.ones((0, 2)), np.ones(0))
        assert_raises(ValueError,
                      nbrs.fit,
                      X[:, :, None], y)
        nbrs.fit(X, y)
        assert_raises(ValueError,
                      nbrs.predict,
                      [])

    nbrs = neighbors.NearestNeighbors().fit(X)

    assert_raises(ValueError,
                  nbrs.kneighbors_graph,
                  X, mode='blah')
    assert_raises(ValueError,
                  nbrs.radius_neighbors_graph,
                  X, mode='blah')
示例#44
0
def test_weighted_dbscan():
    # ensure sample_weight is validated
    assert_raises(ValueError, dbscan, [[0], [1]], sample_weight=[2])
    assert_raises(ValueError, dbscan, [[0], [1]], sample_weight=[2, 3, 4])

    # ensure sample_weight has an effect
    assert_array_equal([],
                       dbscan([[0], [1]], sample_weight=None,
                              min_samples=6)[0])
    assert_array_equal([],
                       dbscan([[0], [1]], sample_weight=[5, 5],
                              min_samples=6)[0])
    assert_array_equal([0],
                       dbscan([[0], [1]], sample_weight=[6, 5],
                              min_samples=6)[0])
    assert_array_equal([0, 1],
                       dbscan([[0], [1]], sample_weight=[6, 6],
                              min_samples=6)[0])

    # points within eps of each other:
    assert_array_equal([0, 1],
                       dbscan([[0], [1]],
                              eps=1.5,
                              sample_weight=[5, 1],
                              min_samples=6)[0])
    # and effect of non-positive and non-integer sample_weight:
    assert_array_equal([],
                       dbscan([[0], [1]],
                              sample_weight=[5, 0],
                              eps=1.5,
                              min_samples=6)[0])
    assert_array_equal([0, 1],
                       dbscan([[0], [1]],
                              sample_weight=[5.9, 0.1],
                              eps=1.5,
                              min_samples=6)[0])
    assert_array_equal([0, 1],
                       dbscan([[0], [1]],
                              sample_weight=[6, 0],
                              eps=1.5,
                              min_samples=6)[0])
    assert_array_equal([],
                       dbscan([[0], [1]],
                              sample_weight=[6, -1],
                              eps=1.5,
                              min_samples=6)[0])

    # for non-negative sample_weight, cores should be identical to repetition
    rng = np.random.RandomState(42)
    sample_weight = rng.randint(0, 5, X.shape[0])
    core1, label1 = dbscan(X, sample_weight=sample_weight)
    assert_equal(len(label1), len(X))

    X_repeated = np.repeat(X, sample_weight, axis=0)
    core_repeated, label_repeated = dbscan(X_repeated)
    core_repeated_mask = np.zeros(X_repeated.shape[0], dtype=bool)
    core_repeated_mask[core_repeated] = True
    core_mask = np.zeros(X.shape[0], dtype=bool)
    core_mask[core1] = True
    assert_array_equal(np.repeat(core_mask, sample_weight), core_repeated_mask)

    # sample_weight should work with precomputed distance matrix
    D = pairwise_distances(X)
    core3, label3 = dbscan(D,
                           sample_weight=sample_weight,
                           metric='precomputed')
    assert_array_equal(core1, core3)
    assert_array_equal(label1, label3)

    # sample_weight should work with estimator
    est = DBSCAN().fit(X, sample_weight=sample_weight)
    core4 = est.core_sample_indices_
    label4 = est.labels_
    assert_array_equal(core1, core4)
    assert_array_equal(label1, label4)

    est = DBSCAN()
    label5 = est.fit_predict(X, sample_weight=sample_weight)
    core5 = est.core_sample_indices_
    assert_array_equal(core1, core5)
    assert_array_equal(label1, label5)
    assert_array_equal(label1, est.labels_)
def test_classifiers_train():
    # test if classifiers do something sensible on training set
    # also test all shapes / shape errors
    classifiers = all_estimators(type_filter='classifier')
    X_m, y_m = make_blobs(random_state=0)
    X_m, y_m = shuffle(X_m, y_m, random_state=7)
    X_m = StandardScaler().fit_transform(X_m)
    # generate binary problem from multi-class one
    y_b = y_m[y_m != 2]
    X_b = X_m[y_m != 2]
    for (X, y) in [(X_m, y_m), (X_b, y_b)]:
        # do it once with binary, once with multiclass
        classes = np.unique(y)
        n_classes = len(classes)
        n_samples, n_features = X.shape
        for name, Classifier in classifiers:
            if name in dont_test:
                continue
            if name in ['MultinomialNB', 'BernoulliNB']:
                # TODO also test these!
                continue
            # catch deprecation warnings
            with warnings.catch_warnings(record=True):
                classifier = Classifier()
            # raises error on malformed input for fit
            assert_raises(ValueError, classifier.fit, X, y[:-1])

            # fit
            classifier.fit(X, y)
            assert_true(hasattr(classifier, "classes_"))
            y_pred = classifier.predict(X)
            assert_equal(y_pred.shape, (n_samples, ))
            # training set performance
            assert_greater(accuracy_score(y, y_pred), 0.85)

            # raises error on malformed input for predict
            assert_raises(ValueError, classifier.predict, X.T)
            if hasattr(classifier, "decision_function"):
                try:
                    # decision_function agrees with predict:
                    decision = classifier.decision_function(X)
                    if n_classes is 2:
                        assert_equal(decision.ravel().shape, (n_samples, ))
                        dec_pred = (decision.ravel() > 0).astype(np.int)
                        assert_array_equal(dec_pred, y_pred)
                    if (n_classes is 3
                            and not isinstance(classifier, BaseLibSVM)):
                        # 1on1 of LibSVM works differently
                        assert_equal(decision.shape, (n_samples, n_classes))
                        assert_array_equal(np.argmax(decision, axis=1), y_pred)

                    # raises error on malformed input
                    assert_raises(ValueError, classifier.decision_function,
                                  X.T)
                    # raises error on malformed input for decision_function
                    assert_raises(ValueError, classifier.decision_function,
                                  X.T)
                except NotImplementedError:
                    pass
            if hasattr(classifier, "predict_proba"):
                try:
                    # predict_proba agrees with predict:
                    y_prob = classifier.predict_proba(X)
                    assert_equal(y_prob.shape, (n_samples, n_classes))
                    assert_array_equal(np.argmax(y_prob, axis=1), y_pred)
                    # check that probas for all classes sum to one
                    assert_array_almost_equal(np.sum(y_prob, axis=1),
                                              np.ones(n_samples))
                    # raises error on malformed input
                    assert_raises(ValueError, classifier.predict_proba, X.T)
                    # raises error on malformed input for predict_proba
                    assert_raises(ValueError, classifier.predict_proba, X.T)
                except NotImplementedError:
                    pass
def test_multioutput_enetcv_error():
    X = np.random.randn(10, 2)
    y = np.random.randn(10, 2)
    clf = ElasticNetCV()
    assert_raises(ValueError, clf.fit, X, y)
示例#47
0
def test_hdbscan_no_centroid_medoid_for_noise():
    clusterer = HDBSCAN().fit(X)
    assert_raises(ValueError, clusterer.weighted_cluster_centroid, -1)
    assert_raises(ValueError, clusterer.weighted_cluster_medoid, -1)
示例#48
0
def test_fast_dot():
    # Check fast dot blas wrapper function
    if fast_dot is np.dot:
        return

    rng = np.random.RandomState(42)
    A = rng.random_sample([2, 10])
    B = rng.random_sample([2, 10])

    try:
        linalg.get_blas_funcs(['gemm'])[0]
        has_blas = True
    except (AttributeError, ValueError):
        has_blas = False

    if has_blas:
        # Test _fast_dot for invalid input.

        # Maltyped data.
        for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]:
            assert_raises(ValueError, _fast_dot, A.astype(dt1),
                          B.astype(dt2).T)

        # Malformed data.

        # ndim == 0
        E = np.empty(0)
        assert_raises(ValueError, _fast_dot, E, E)

        # ndim == 1
        assert_raises(ValueError, _fast_dot, A, A[0])

        # ndim > 2
        assert_raises(ValueError, _fast_dot, A.T, np.array([A, A]))

        # min(shape) == 1
        assert_raises(ValueError, _fast_dot, A, A[0, :][None, :])

        # test for matrix mismatch error
        assert_raises(ValueError, _fast_dot, A, A)

    # Test cov-like use case + dtypes.
    for dtype in ['f8', 'f4']:
        A = A.astype(dtype)
        B = B.astype(dtype)

        #  col < row
        C = np.dot(A.T, A)
        C_ = fast_dot(A.T, A)
        assert_almost_equal(C, C_, decimal=5)

        C = np.dot(A.T, B)
        C_ = fast_dot(A.T, B)
        assert_almost_equal(C, C_, decimal=5)

        C = np.dot(A, B.T)
        C_ = fast_dot(A, B.T)
        assert_almost_equal(C, C_, decimal=5)

    # Test square matrix * rectangular use case.
    A = rng.random_sample([2, 2])
    for dtype in ['f8', 'f4']:
        A = A.astype(dtype)
        B = B.astype(dtype)

        C = np.dot(A, B)
        C_ = fast_dot(A, B)
        assert_almost_equal(C, C_, decimal=5)

        C = np.dot(A.T, B)
        C_ = fast_dot(A.T, B)
        assert_almost_equal(C, C_, decimal=5)

    if has_blas:
        for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]:
            assert_raises(ValueError, _fast_dot, x, x.T)
示例#49
0
def test_save_filepath_condition():
    r = BaseRecommender()
    invalid_filepath = 'no suffix'
    assert_raises(ValueError, r.save, invalid_filepath)
def test_transformers():
    # test if transformers do something sensible on training set
    # also test all shapes / shape errors
    transformers = all_estimators(type_filter='transformer')
    X, y = make_blobs(n_samples=30,
                      centers=[[0, 0, 0], [1, 1, 1]],
                      random_state=0,
                      n_features=2,
                      cluster_std=0.1)
    n_samples, n_features = X.shape
    X = StandardScaler().fit_transform(X)
    X -= X.min()

    succeeded = True

    for name, Transformer in transformers:
        if name in dont_test:
            continue
        # these don't actually fit the data:
        if name in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']:
            continue
        # catch deprecation warnings
        with warnings.catch_warnings(record=True):
            transformer = Transformer()
        set_random_state(transformer)
        if hasattr(transformer, 'compute_importances'):
            transformer.compute_importances = True

        if name == 'SelectKBest':
            # SelectKBest has a default of k=10
            # which is more feature than we have.
            transformer.k = 1
        elif name in ['GaussianRandomProjection', 'SparseRandomProjection']:
            # Due to the jl lemma and very few samples, the number
            # of components of the random matrix projection will be greater
            # than the number of features.
            # So we impose a smaller number (avoid "auto" mode)
            transformer.n_components = 1
        elif name == "MiniBatchDictionaryLearning":
            transformer.set_params(n_iter=5)  # default = 1000

        elif name == "KernelPCA":
            transformer.remove_zero_eig = False

        # fit

        if name in ('_PLS', 'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD'):
            random_state = np.random.RandomState(seed=12345)
            y_ = np.vstack([y, 2 * y + random_state.randint(2, size=len(y))])
            y_ = y_.T
        else:
            y_ = y

        try:
            transformer.fit(X, y_)
            X_pred = transformer.fit_transform(X, y=y_)
            if isinstance(X_pred, tuple):
                for x_pred in X_pred:
                    assert_equal(x_pred.shape[0], n_samples)
            else:
                assert_equal(X_pred.shape[0], n_samples)
        except Exception as e:
            print(transformer)
            print(e)
            print()
            succeeded = False
            continue

        if hasattr(transformer, 'transform'):
            if name in ('_PLS', 'PLSCanonical', 'PLSRegression', 'CCA',
                        'PLSSVD'):
                X_pred2 = transformer.transform(X, y_)
                X_pred3 = transformer.fit_transform(X, y=y_)
            else:
                X_pred2 = transformer.transform(X)
                X_pred3 = transformer.fit_transform(X, y=y_)
            if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple):
                for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3):
                    assert_array_almost_equal(
                        x_pred, x_pred2, 2,
                        "fit_transform not correct in %s" % Transformer)
                    assert_array_almost_equal(
                        x_pred3, x_pred2, 2,
                        "fit_transform not correct in %s" % Transformer)
            else:
                assert_array_almost_equal(
                    X_pred, X_pred2, 2,
                    "fit_transform not correct in %s" % Transformer)
                assert_array_almost_equal(
                    X_pred3, X_pred2, 2,
                    "fit_transform not correct in %s" % Transformer)

            # raises error on malformed input for transform
            assert_raises(ValueError, transformer.transform, X.T)
    assert_true(succeeded)
示例#51
0
def test_agglomerative_clustering():
    """
    Check that we obtain the correct number of clusters with
    agglomerative clustering.
    """
    rnd = np.random.RandomState(0)
    mask = np.ones([10, 10], dtype=np.bool)
    n_samples = 100
    X = rnd.randn(n_samples, 50)
    connectivity = grid_to_graph(*mask.shape)
    for linkage in ("ward", "complete", "average"):
        clustering = AgglomerativeClustering(n_clusters=10,
                                             connectivity=connectivity,
                                             linkage=linkage)
        clustering.fit(X)
        # test caching
        clustering = AgglomerativeClustering(n_clusters=10,
                                             connectivity=connectivity,
                                             memory=mkdtemp(),
                                             linkage=linkage)
        clustering.fit(X)
        labels = clustering.labels_
        assert_true(np.size(np.unique(labels)) == 10)
        # Turn caching off now
        clustering = AgglomerativeClustering(n_clusters=10,
                                             connectivity=connectivity,
                                             linkage=linkage)
        # Check that we obtain the same solution with early-stopping of the
        # tree building
        clustering.compute_full_tree = False
        clustering.fit(X)
        np.testing.assert_array_equal(clustering.labels_, labels)
        clustering.connectivity = None
        clustering.fit(X)
        assert_true(np.size(np.unique(clustering.labels_)) == 10)
        # Check that we raise a TypeError on dense matrices
        clustering = AgglomerativeClustering(
            n_clusters=10,
            connectivity=sparse.lil_matrix(connectivity.toarray()[:10, :10]),
            linkage=linkage)
        assert_raises(ValueError, clustering.fit, X)

    # Test that using ward with another metric than euclidean raises an
    # exception
    clustering = AgglomerativeClustering(n_clusters=10,
                                         connectivity=connectivity.toarray(),
                                         affinity="manhattan",
                                         linkage="ward")
    assert_raises(ValueError, clustering.fit, X)

    # Test using another metric than euclidean works with linkage complete
    for affinity in PAIRED_DISTANCES.keys():
        # Compare our (structured) implementation to scipy
        clustering = AgglomerativeClustering(n_clusters=10,
                                             connectivity=np.ones(
                                                 (n_samples, n_samples)),
                                             affinity=affinity,
                                             linkage="complete")
        clustering.fit(X)
        clustering2 = AgglomerativeClustering(n_clusters=10,
                                              connectivity=None,
                                              affinity=affinity,
                                              linkage="complete")
        clustering2.fit(X)
        assert_almost_equal(
            normalized_mutual_info_score(clustering2.labels_,
                                         clustering.labels_), 1)
示例#52
0
def test_hdbscan_badargs():
    assert_raises(ValueError, hdbscan, X='fail')
    assert_raises(ValueError, hdbscan, X=None)
    assert_raises(ValueError, hdbscan, X, min_cluster_size='fail')
    assert_raises(ValueError, hdbscan, X, min_samples='fail')
    assert_raises(ValueError, hdbscan, X, min_samples=-1)
    assert_raises(ValueError, hdbscan, X, metric='imperial')
    assert_raises(ValueError, hdbscan, X, metric=None)
    assert_raises(ValueError, hdbscan, X, metric='minkowski', p=-1)
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='minkowski',
                  p=-1,
                  algorithm='prims_kdtree')
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='minkowski',
                  p=-1,
                  algorithm='prims_balltree')
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='minkowski',
                  p=-1,
                  algorithm='boruvka_balltree')
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='precomputed',
                  algorithm='boruvka_kdtree')
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='precomputed',
                  algorithm='prims_kdtree')
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='precomputed',
                  algorithm='prims_balltree')
    assert_raises(ValueError,
                  hdbscan,
                  X,
                  metric='precomputed',
                  algorithm='boruvka_balltree')
    assert_raises(ValueError, hdbscan, X, alpha=-1)
    assert_raises(ValueError, hdbscan, X, alpha='fail')
    assert_raises(Exception, hdbscan, X, algorithm='something_else')
    assert_raises(TypeError, hdbscan, X, metric='minkowski', p=None)
    assert_raises(ValueError, hdbscan, X, leaf_size=0)
def test_format_invariance_with_1d_vectors(name):
    random_state = check_random_state(0)
    y1 = random_state.randint(0, 2, size=(20, ))
    y2 = random_state.randint(0, 2, size=(20, ))

    y1_list = list(y1)
    y2_list = list(y2)

    y1_1d, y2_1d = np.array(y1), np.array(y2)
    assert_array_equal(y1_1d.ndim, 1)
    assert_array_equal(y2_1d.ndim, 1)
    y1_column = np.reshape(y1_1d, (-1, 1))
    y2_column = np.reshape(y2_1d, (-1, 1))
    y1_row = np.reshape(y1_1d, (1, -1))
    y2_row = np.reshape(y2_1d, (1, -1))

    with ignore_warnings():
        metric = ALL_METRICS[name]

        measure = metric(y1, y2)

        assert_allclose(metric(y1_list, y2_list),
                        measure,
                        err_msg="%s is not representation invariant with list"
                        "" % name)

        assert_allclose(metric(y1_1d, y2_1d),
                        measure,
                        err_msg="%s is not representation invariant with "
                        "np-array-1d" % name)

        assert_allclose(metric(y1_column, y2_column),
                        measure,
                        err_msg="%s is not representation invariant with "
                        "np-array-column" % name)

        # Mix format support
        assert_allclose(metric(y1_1d, y2_list),
                        measure,
                        err_msg="%s is not representation invariant with mix "
                        "np-array-1d and list" % name)

        assert_allclose(metric(y1_list, y2_1d),
                        measure,
                        err_msg="%s is not representation invariant with mix "
                        "np-array-1d and list" % name)

        assert_allclose(metric(y1_1d, y2_column),
                        measure,
                        err_msg="%s is not representation invariant with mix "
                        "np-array-1d and np-array-column" % name)

        assert_allclose(metric(y1_column, y2_1d),
                        measure,
                        err_msg="%s is not representation invariant with mix "
                        "np-array-1d and np-array-column" % name)

        assert_allclose(metric(y1_list, y2_column),
                        measure,
                        err_msg="%s is not representation invariant with mix "
                        "list and np-array-column" % name)

        assert_allclose(metric(y1_column, y2_list),
                        measure,
                        err_msg="%s is not representation invariant with mix "
                        "list and np-array-column" % name)

        # These mix representations aren't allowed
        assert_raises(ValueError, metric, y1_1d, y2_row)
        assert_raises(ValueError, metric, y1_row, y2_1d)
        assert_raises(ValueError, metric, y1_list, y2_row)
        assert_raises(ValueError, metric, y1_row, y2_list)
        assert_raises(ValueError, metric, y1_column, y2_row)
        assert_raises(ValueError, metric, y1_row, y2_column)

        # NB: We do not test for y1_row, y2_row as these may be
        # interpreted as multilabel or multioutput data.
        if (name not in (MULTIOUTPUT_METRICS | THRESHOLDED_MULTILABEL_METRICS
                         | MULTILABELS_METRICS)):
            assert_raises(ValueError, metric, y1_row, y2_row)
def test_multioutput_enetcv_error():
    rng = np.random.RandomState(0)
    X = rng.randn(10, 2)
    y = rng.randn(10, 2)
    clf = ElasticNetCV()
    assert_raises(ValueError, clf.fit, X, y)
示例#55
0
def test_validate_function():
    """Check that valid functions are accepted & invalid ones raise error"""

    # Check arity tests
    fun = make_function(function=_protected_sqrt, name='sqrt', arity=1)
    # non-integer arity
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', '1')
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 1.0)
    # non-matching arity
    assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 2)
    assert_raises(ValueError, make_function, maximum, 'max', 1)

    # Check name test
    assert_raises(ValueError, make_function, _protected_sqrt, 2, 1)

    # Check return type tests
    def bad_fun1(x1, x2):
        return 'ni'

    assert_raises(ValueError, make_function, bad_fun1, 'ni', 2)

    # Check return shape tests
    def bad_fun2(x1):
        return np.ones((2, 1))

    assert_raises(ValueError, make_function, bad_fun2, 'ni', 1)

    # Check closure for negatives test
    def _unprotected_sqrt(x1):
        with np.errstate(divide='ignore', invalid='ignore'):
            return np.sqrt(x1)

    assert_raises(ValueError, make_function, _unprotected_sqrt, 'sqrt', 1)

    # Check closure for zeros test
    def _unprotected_div(x1, x2):
        with np.errstate(divide='ignore', invalid='ignore'):
            return np.divide(x1, x2)

    assert_raises(ValueError, make_function, _unprotected_div, 'div', 2)
def test_multioutput_number_of_output_differ(name):
    y_true = np.array([[1, 0, 0, 1], [0, 1, 1, 1], [1, 1, 0, 1]])
    y_pred = np.array([[0, 0], [1, 0], [0, 0]])

    metric = ALL_METRICS[name]
    assert_raises(ValueError, metric, y_true, y_pred)
示例#57
0
def test_unknown_strategey_regressor():
    X = [[0]] * 5
    y = [1, 2, 4, 6, 8]

    est = DummyRegressor(strategy='gona')
    assert_raises(ValueError, est.fit, X, y)
def check_sample_weight_invariance(name, metric, y1, y2):
    rng = np.random.RandomState(0)
    sample_weight = rng.randint(1, 10, size=len(y1))

    # check that unit weights gives the same score as no weight
    unweighted_score = metric(y1, y2, sample_weight=None)

    assert_allclose(unweighted_score,
                    metric(y1, y2, sample_weight=np.ones(shape=len(y1))),
                    err_msg="For %s sample_weight=None is not equivalent to "
                    "sample_weight=ones" % name)

    # check that the weighted and unweighted scores are unequal
    weighted_score = metric(y1, y2, sample_weight=sample_weight)

    # use context manager to supply custom error message
    with assert_raises(AssertionError) as cm:
        assert_allclose(unweighted_score, weighted_score)
        cm.msg = ("Unweighted and weighted scores are unexpectedly almost "
                  "equal (%s) and (%s) for %s" %
                  (unweighted_score, weighted_score, name))

    # check that sample_weight can be a list
    weighted_score_list = metric(y1, y2, sample_weight=sample_weight.tolist())
    assert_allclose(
        weighted_score,
        weighted_score_list,
        err_msg=("Weighted scores for array and list "
                 "sample_weight input are not equal (%s != %s) for %s") %
        (weighted_score, weighted_score_list, name))

    # check that integer weights is the same as repeated samples
    repeat_weighted_score = metric(np.repeat(y1, sample_weight, axis=0),
                                   np.repeat(y2, sample_weight, axis=0),
                                   sample_weight=None)
    assert_allclose(weighted_score,
                    repeat_weighted_score,
                    err_msg="Weighting %s is not equal to repeating samples" %
                    name)

    # check that ignoring a fraction of the samples is equivalent to setting
    # the corresponding weights to zero
    sample_weight_subset = sample_weight[1::2]
    sample_weight_zeroed = np.copy(sample_weight)
    sample_weight_zeroed[::2] = 0
    y1_subset = y1[1::2]
    y2_subset = y2[1::2]
    weighted_score_subset = metric(y1_subset,
                                   y2_subset,
                                   sample_weight=sample_weight_subset)
    weighted_score_zeroed = metric(y1, y2, sample_weight=sample_weight_zeroed)
    assert_allclose(
        weighted_score_subset,
        weighted_score_zeroed,
        err_msg=("Zeroing weights does not give the same result as "
                 "removing the corresponding samples (%s != %s) for %s" %
                 (weighted_score_zeroed, weighted_score_subset, name)))

    if not name.startswith('unnormalized'):
        # check that the score is invariant under scaling of the weights by a
        # common factor
        for scaling in [2, 0.3]:
            assert_allclose(weighted_score,
                            metric(y1,
                                   y2,
                                   sample_weight=sample_weight * scaling),
                            err_msg="%s sample_weight is not invariant "
                            "under scaling" % name)

    # Check that if number of samples in y_true and sample_weight are not
    # equal, meaningful error is raised.
    error_message = ("Found input variables with inconsistent numbers of "
                     "samples: [{}, {}, {}]".format(
                         _num_samples(y1), _num_samples(y2),
                         _num_samples(sample_weight) * 2))
    assert_raise_message(ValueError,
                         error_message,
                         metric,
                         y1,
                         y2,
                         sample_weight=np.hstack(
                             [sample_weight, sample_weight]))
示例#59
0
def test_quantile_strategy_empty_train():
    est = DummyRegressor(strategy="quantile", quantile=0.4)
    assert_raises(ValueError, est.fit, [], [])
示例#60
0
def test_constants_not_specified_regressor():
    X = [[0]] * 5
    y = [1, 2, 4, 6, 8]

    est = DummyRegressor(strategy='constant')
    assert_raises(TypeError, est.fit, X, y)