def check_multioutput(name): # Check estimators on multi-output problems. X_train = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [-2, 1], [-1, 1], [-1, 2], [2, -1], [1, -1], [1, -2]] y_train = [[-1, 0], [-1, 0], [-1, 0], [1, 1], [1, 1], [1, 1], [-1, 2], [-1, 2], [-1, 2], [1, 3], [1, 3], [1, 3]] X_test = [[-1, -1], [1, 1], [-1, 1], [1, -1]] y_test = [[-1, 0], [1, 1], [-1, 2], [1, 3]] est = FOREST_ESTIMATORS[name](random_state=0, bootstrap=False) y_pred = est.fit(X_train, y_train).predict(X_test) assert_array_almost_equal(y_pred, y_test) if name in FOREST_CLASSIFIERS: with np.errstate(divide="ignore"): proba = est.predict_proba(X_test) assert_equal(len(proba), 2) assert_equal(proba[0].shape, (4, 2)) assert_equal(proba[1].shape, (4, 4)) log_proba = est.predict_log_proba(X_test) assert_equal(len(log_proba), 2) assert_equal(log_proba[0].shape, (4, 2)) assert_equal(log_proba[1].shape, (4, 4))
def check_boston(presort, loss, subsample): # Check consistency on dataset boston house prices with least squares # and least absolute deviation. ones = np.ones(len(boston.target)) last_y_pred = None for sample_weight in None, ones, 2 * ones: clf = GradientBoostingRegressor(n_estimators=100, loss=loss, max_depth=4, subsample=subsample, min_samples_split=2, random_state=1, presort=presort) assert_raises(ValueError, clf.predict, boston.data) clf.fit(boston.data, boston.target, sample_weight=sample_weight) leaves = clf.apply(boston.data) assert_equal(leaves.shape, (506, 100)) y_pred = clf.predict(boston.data) mse = mean_squared_error(boston.target, y_pred) assert_less(mse, 6.0) if last_y_pred is not None: assert_array_almost_equal(last_y_pred, y_pred) last_y_pred = y_pred
def test_compute_class_weight_invariance(): # Test that results with class_weight="balanced" is invariant wrt # class imbalance if the number of samples is identical. # The test uses a balanced two class dataset with 100 datapoints. # It creates three versions, one where class 1 is duplicated # resulting in 150 points of class 1 and 50 of class 0, # one where there are 50 points in class 1 and 150 in class 0, # and one where there are 100 points of each class (this one is balanced # again). # With balancing class weights, all three should give the same model. X, y = make_blobs(centers=2, random_state=0) # create dataset where class 1 is duplicated twice X_1 = np.vstack([X] + [X[y == 1]] * 2) y_1 = np.hstack([y] + [y[y == 1]] * 2) # create dataset where class 0 is duplicated twice X_0 = np.vstack([X] + [X[y == 0]] * 2) y_0 = np.hstack([y] + [y[y == 0]] * 2) # duplicate everything X_ = np.vstack([X] * 2) y_ = np.hstack([y] * 2) # results should be identical logreg1 = LogisticRegression(class_weight="balanced").fit(X_1, y_1) logreg0 = LogisticRegression(class_weight="balanced").fit(X_0, y_0) logreg = LogisticRegression(class_weight="balanced").fit(X_, y_) assert_array_almost_equal(logreg1.coef_, logreg0.coef_) assert_array_almost_equal(logreg.coef_, logreg0.coef_)
def test_logsumexp(): # Try to add some smallish numbers in logspace x = np.array([1e-40] * 1000000) logx = np.log(x) assert_almost_equal(np.exp(logsumexp(logx)), x.sum()) X = np.vstack([x, x]) logX = np.vstack([logx, logx]) assert_array_almost_equal(np.exp(logsumexp(logX, axis=0)), X.sum(axis=0)) assert_array_almost_equal(np.exp(logsumexp(logX, axis=1)), X.sum(axis=1))
def test_logistic_sigmoid(): # Check correctness and robustness of logistic sigmoid implementation def naive_log_logistic(x): return np.log(1 / (1 + np.exp(-x))) x = np.linspace(-2, 2, 50) assert_array_almost_equal(log_logistic(x), naive_log_logistic(x)) extreme_x = np.array([-100., 100.]) assert_array_almost_equal(log_logistic(extreme_x), [-100, 0])
def test_oob_improvement(): # Test if oob improvement has correct shape and regression test. clf = GradientBoostingClassifier(n_estimators=100, random_state=1, subsample=0.5) clf.fit(X, y) assert_equal(clf.oob_improvement_.shape[0], 100) # hard-coded regression test - change if modification in OOB computation assert_array_almost_equal(clf.oob_improvement_[:5], np.array([0.19, 0.15, 0.12, -0.12, -0.11]), decimal=2)
def test_warm_start_equal_n_estimators(): # Test if warm start with equal n_estimators does nothing X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1) est.fit(X, y) est2 = clone(est) est2.set_params(n_estimators=est.n_estimators, warm_start=True) est2.fit(X, y) assert_array_almost_equal(est2.predict(X), est.predict(X))
def check_probability(name): # Predict probabilities. ForestClassifier = FOREST_CLASSIFIERS[name] with np.errstate(divide="ignore"): clf = ForestClassifier(n_estimators=10, random_state=1, max_features=1, max_depth=1) clf.fit(iris.data, iris.target) assert_array_almost_equal(np.sum(clf.predict_proba(iris.data), axis=1), np.ones(iris.data.shape[0])) assert_array_almost_equal(clf.predict_proba(iris.data), np.exp(clf.predict_log_proba(iris.data)))
def test_warm_start_n_estimators(): # Test if warm start equals fit - set n_estimators. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=300, max_depth=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=300) est_ws.fit(X, y) assert_array_almost_equal(est_ws.predict(X), est.predict(X))
def test_warm_start_clear(): # Test if fit clears state. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=100, max_depth=1) est.fit(X, y) est_2 = Cls(n_estimators=100, max_depth=1, warm_start=True) est_2.fit(X, y) # inits state est_2.set_params(warm_start=False) est_2.fit(X, y) # clears old state and equals est assert_array_almost_equal(est_2.predict(X), est.predict(X))
def check_parallel(name, X, y): """Check parallel computations in classification""" ForestEstimator = FOREST_ESTIMATORS[name] forest = ForestEstimator(n_estimators=10, n_jobs=3, random_state=0) forest.fit(X, y) assert_equal(len(forest), 10) forest.set_params(n_jobs=1) y1 = forest.predict(X) forest.set_params(n_jobs=2) y2 = forest.predict(X) assert_array_almost_equal(y1, y2, 3)
def test_compute_class_weight_auto_unordered(): # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3) cw = compute_class_weight("balanced", classes, y) class_counts = np.bincount(y)[classes] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2., 1., 2. / 3])
def test_compute_class_weight_auto_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) class_counts = np.bincount(y + 2) assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2. / 3, 2., 1.])
def test_random_weights(): # set this up so that each row should have a weighted mode of 6, # with a score that is easily reproduced mode_result = 6 rng = np.random.RandomState(0) x = rng.randint(mode_result, size=(100, 10)) w = rng.random_sample(x.shape) x[:, :5] = mode_result w[:, :5] += 1 mode, score = weighted_mode(x, w, axis=1) assert_array_equal(mode, mode_result) assert_array_almost_equal(score.ravel(), w[:, :5].sum(1))
def test_parallel_train(): rng = check_random_state(12321) n_samples, n_features = 80, 30 X_train = rng.randn(n_samples, n_features) y_train = rng.randint(0, 2, n_samples) clfs = [ RandomForestClassifier(n_estimators=20, n_jobs=n_jobs, random_state=12345).fit(X_train, y_train) for n_jobs in [1, 2, 3, 8, 16, 32] ] X_test = rng.randn(n_samples, n_features) probas = [clf.predict_proba(X_test) for clf in clfs] for proba1, proba2 in zip(probas, probas[1:]): assert_array_almost_equal(proba1, proba2)
def test_warm_start_oob(): # Test if warm start OOB equals fit. X, y = datasets.make_hastie_10_2(n_samples=100, random_state=1) for Cls in [GradientBoostingRegressor, GradientBoostingClassifier]: est = Cls(n_estimators=200, max_depth=1, subsample=0.5, random_state=1) est.fit(X, y) est_ws = Cls(n_estimators=100, max_depth=1, subsample=0.5, random_state=1, warm_start=True) est_ws.fit(X, y) est_ws.set_params(n_estimators=200) est_ws.fit(X, y) assert_array_almost_equal(est_ws.oob_improvement_[:100], est.oob_improvement_[:100])
def check_warm_start_clear(name): # Test if fit clears state and grows a new forest when warm_start==False. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=1, warm_start=False, random_state=1) clf.fit(X, y) clf_2 = ForestEstimator(n_estimators=5, max_depth=1, warm_start=True, random_state=2) clf_2.fit(X, y) # inits state clf_2.set_params(warm_start=False, random_state=1) clf_2.fit(X, y) # clears old state and equals clf assert_array_almost_equal(clf_2.apply(X), clf.apply(X))
def test_quantile_loss(): # Check if quantile loss with alpha=0.5 equals lad. clf_quantile = GradientBoostingRegressor(n_estimators=100, loss='quantile', max_depth=4, alpha=0.5, random_state=7) clf_quantile.fit(boston.data, boston.target) y_quantile = clf_quantile.predict(boston.data) clf_lad = GradientBoostingRegressor(n_estimators=100, loss='lad', max_depth=4, random_state=7) clf_lad.fit(boston.data, boston.target) y_lad = clf_lad.predict(boston.data) assert_array_almost_equal(y_quantile, y_lad, decimal=4)
def test_compute_class_weight_dict(): classes = np.arange(3) class_weights = {0: 1.0, 1: 2.0, 2: 3.0} y = np.asarray([0, 0, 1, 2]) cw = compute_class_weight(class_weights, classes, y) # When the user specifies class weights, compute_class_weights should just # return them. assert_array_almost_equal(np.asarray([1.0, 2.0, 3.0]), cw) # When a class weight is specified that isn't in classes, a ValueError # should get raised msg = 'Class label 4 not present.' class_weights = {0: 1.0, 1: 2.0, 2: 3.0, 4: 1.5} assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y) msg = 'Class label -1 not present.' class_weights = {-1: 5.0, 0: 1.0, 1: 2.0, 2: 3.0} assert_raise_message(ValueError, msg, compute_class_weight, class_weights, classes, y)
def test_probability_exponential(): # Predict probabilities. clf = GradientBoostingClassifier(loss='exponential', n_estimators=100, random_state=1) assert_raises(ValueError, clf.predict_proba, T) clf.fit(X, y) assert_array_equal(clf.predict(T), true_result) # check if probabilities are in [0, 1]. y_proba = clf.predict_proba(T) assert_true(np.all(y_proba >= 0.0)) assert_true(np.all(y_proba <= 1.0)) score = clf.decision_function(T).ravel() assert_array_almost_equal(y_proba[:, 1], 1.0 / (1.0 + np.exp(-2 * score))) # derive predictions from probabilities y_pred = clf.classes_.take(y_proba.argmax(axis=1), axis=0) assert_array_equal(y_pred, true_result)
def check_importances(name, criterion, X, y): ForestEstimator = FOREST_ESTIMATORS[name] est = ForestEstimator(n_estimators=20, criterion=criterion, random_state=0) est.fit(X, y) importances = est.feature_importances_ n_important = np.sum(importances > 0.1) assert_equal(importances.shape[0], 10) assert_equal(n_important, 3) # XXX: Remove this test in 0.19 after transform support to estimators # is removed. X_new = assert_warns(DeprecationWarning, est.transform, X, threshold="mean") assert_less(0 < X_new.shape[1], X.shape[1]) # Check with parallel importances = est.feature_importances_ est.set_params(n_jobs=2) importances_parrallel = est.feature_importances_ assert_array_almost_equal(importances, importances_parrallel) # Check with sample weights sample_weight = check_random_state(0).randint(1, 10, len(X)) est = ForestEstimator(n_estimators=20, random_state=0, criterion=criterion) est.fit(X, y, sample_weight=sample_weight) importances = est.feature_importances_ assert_true(np.all(importances >= 0.0)) for scale in [0.5, 10, 100]: est = ForestEstimator(n_estimators=20, random_state=0, criterion=criterion) est.fit(X, y, sample_weight=scale * sample_weight) importances_bis = est.feature_importances_ assert_less(np.abs(importances - importances_bis).mean(), 0.001)
def test_newton_cg(): # Test that newton_cg gives same result as scipy's fmin_ncg rng = np.random.RandomState(0) A = rng.normal(size=(10, 10)) x0 = np.ones(10) def func(x): Ax = A.dot(x) return .5 * (Ax).dot(Ax) def grad(x): return A.T.dot(A.dot(x)) def hess(x, p): return p.dot(A.T.dot(A.dot(x.all()))) def grad_hess(x): return grad(x), lambda x: A.T.dot(A.dot(x)) assert_array_almost_equal( newton_cg(grad_hess, func, grad, x0, tol=1e-10)[0], fmin_ncg(f=func, x0=x0, fprime=grad, fhess_p=hess))
def test_feature_importances(): X = np.array(boston.data, dtype=np.float32) y = np.array(boston.target, dtype=np.float32) for presort in True, False: clf = GradientBoostingRegressor(n_estimators=100, max_depth=5, min_samples_split=2, random_state=1, presort=presort) clf.fit(X, y) assert_true(hasattr(clf, 'feature_importances_')) # XXX: Remove this test in 0.19 after transform support to estimators # is removed. X_new = assert_warns(DeprecationWarning, clf.transform, X, threshold="mean") assert_less(X_new.shape[1], X.shape[1]) feature_mask = (clf.feature_importances_ > clf.feature_importances_.mean()) assert_array_almost_equal(X_new, X[:, feature_mask])
def check_decision_path(name): X, y = hastie_X, hastie_y n_samples = X.shape[0] ForestEstimator = FOREST_ESTIMATORS[name] est = ForestEstimator(n_estimators=5, max_depth=1, warm_start=False, random_state=1) est.fit(X, y) indicator, n_nodes_ptr = est.decision_path(X) assert_equal(indicator.shape[1], n_nodes_ptr[-1]) assert_equal(indicator.shape[0], n_samples) assert_array_equal(np.diff(n_nodes_ptr), [e.tree_.node_count for e in est.estimators_]) # Assert that leaves index are correct leaves = est.apply(X) for est_id in range(leaves.shape[1]): leave_indicator = [ indicator[i, n_nodes_ptr[est_id] + j] for i, j in enumerate(leaves[:, est_id]) ] assert_array_almost_equal(leave_indicator, np.ones(shape=n_samples))
def test_row_norms(): X = np.random.RandomState(42).randn(100, 100) for dtype in (np.float32, np.float64): if dtype is np.float32: precision = 4 else: precision = 5 X = X.astype(dtype) sq_norm = (X**2).sum(axis=1) assert_array_almost_equal(sq_norm, row_norms(X, squared=True), precision) assert_array_almost_equal(np.sqrt(sq_norm), row_norms(X), precision) Xcsr = sparse.csr_matrix(X, dtype=dtype) assert_array_almost_equal(sq_norm, row_norms(Xcsr, squared=True), precision) assert_array_almost_equal(np.sqrt(sq_norm), row_norms(Xcsr), precision)
def check_sparse_input(name, X, X_sparse, y): ForestEstimator = FOREST_ESTIMATORS[name] dense = ForestEstimator(random_state=0, max_depth=2).fit(X, y) sparse = ForestEstimator(random_state=0, max_depth=2).fit(X_sparse, y) assert_array_almost_equal(sparse.apply(X), dense.apply(X)) if name in FOREST_CLASSIFIERS or name in FOREST_REGRESSORS: assert_array_almost_equal(sparse.predict(X), dense.predict(X)) assert_array_almost_equal(sparse.feature_importances_, dense.feature_importances_) if name in FOREST_CLASSIFIERS: assert_array_almost_equal(sparse.predict_proba(X), dense.predict_proba(X)) assert_array_almost_equal(sparse.predict_log_proba(X), dense.predict_log_proba(X)) if name in FOREST_TRANSFORMERS: assert_array_almost_equal( sparse.transform(X).toarray(), dense.transform(X).toarray()) assert_array_almost_equal( sparse.fit_transform(X).toarray(), dense.fit_transform(X).toarray())
def test_softmax(): rng = np.random.RandomState(0) X = rng.randn(3, 5) exp_X = np.exp(X) sum_exp_X = np.sum(exp_X, axis=1).reshape((-1, 1)) assert_array_almost_equal(softmax(X), exp_X / sum_exp_X)
def test_compute_sample_weight_with_subsample(): # Test compute_sample_weight with subsamples specified. # Test with balanced classes and all samples present y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with column vector of balanced classes and all samples present y = np.asarray([[1], [1], [1], [2], [2], [2]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with a subsample y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, range(4)) assert_array_almost_equal(sample_weight, [.5, .5, .5, 1.5, 1.5, 1.5]) sample_weight = compute_sample_weight("balanced", y, range(4)) assert_array_almost_equal(sample_weight, [2. / 3, 2. / 3, 2. / 3, 2., 2., 2.]) # Test with a bootstrap subsample y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, [0, 1, 1, 2, 2, 3]) expected_auto = np.asarray( [1 / 3., 1 / 3., 1 / 3., 5 / 3., 5 / 3., 5 / 3.]) assert_array_almost_equal(sample_weight, expected_auto) sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3]) expected_balanced = np.asarray([0.6, 0.6, 0.6, 3., 3., 3.]) assert_array_almost_equal(sample_weight, expected_balanced) # Test with a bootstrap subsample for multi-output y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, [0, 1, 1, 2, 2, 3]) assert_array_almost_equal(sample_weight, expected_auto**2) sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3]) assert_array_almost_equal(sample_weight, expected_balanced**2) # Test with a missing class y = np.asarray([1, 1, 1, 2, 2, 2, 3]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) # Test with a missing class for multi-output y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
def test_compute_sample_weight(): # Test (and demo) compute_sample_weight. # Test with balanced classes y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with user-defined weights sample_weight = compute_sample_weight({1: 2, 2: 1}, y) assert_array_almost_equal(sample_weight, [2., 2., 2., 1., 1., 1.]) # Test with column vector of balanced classes y = np.asarray([[1], [1], [1], [2], [2], [2]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with unbalanced classes y = np.asarray([1, 1, 1, 2, 2, 2, 3]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) expected_auto = np.asarray([.6, .6, .6, .6, .6, .6, 1.8]) assert_array_almost_equal(sample_weight, expected_auto) sample_weight = compute_sample_weight("balanced", y) expected_balanced = np.array( [0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 2.3333]) assert_array_almost_equal(sample_weight, expected_balanced, decimal=4) # Test with `None` weights sample_weight = compute_sample_weight(None, y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 1.]) # Test with multi-output of balanced classes y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with multi-output with user-defined weights y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y) assert_array_almost_equal(sample_weight, [2., 2., 2., 2., 2., 2.]) # Test with multi-output of unbalanced classes y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, expected_auto**2) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, expected_balanced**2, decimal=3)
def check_sparse_input(EstimatorClass, X, X_sparse, y): dense = EstimatorClass(n_estimators=10, random_state=0, max_depth=2).fit(X, y) sparse = EstimatorClass(n_estimators=10, random_state=0, max_depth=2, presort=False).fit(X_sparse, y) auto = EstimatorClass(n_estimators=10, random_state=0, max_depth=2, presort='auto').fit(X_sparse, y) assert_array_almost_equal(sparse.apply(X), dense.apply(X)) assert_array_almost_equal(sparse.predict(X), dense.predict(X)) assert_array_almost_equal(sparse.feature_importances_, dense.feature_importances_) assert_array_almost_equal(sparse.apply(X), auto.apply(X)) assert_array_almost_equal(sparse.predict(X), auto.predict(X)) assert_array_almost_equal(sparse.feature_importances_, auto.feature_importances_) if isinstance(EstimatorClass, GradientBoostingClassifier): assert_array_almost_equal(sparse.predict_proba(X), dense.predict_proba(X)) assert_array_almost_equal(sparse.predict_log_proba(X), dense.predict_log_proba(X)) assert_array_almost_equal(sparse.predict_proba(X), auto.predict_proba(X)) assert_array_almost_equal(sparse.predict_log_proba(X), auto.predict_log_proba(X))