def test_thresholded_scorers(): # Test scorers that take thresholds. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LogisticRegression(random_state=0) clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.decision_function(X_test)) score3 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) assert_almost_equal(score1, score3) logscore = get_scorer('log_loss')(clf, X_test, y_test) logloss = log_loss(y_test, clf.predict_proba(X_test)) assert_almost_equal(-logscore, logloss) # same for an estimator without decision_function clf = DecisionTreeClassifier() clf.fit(X_train, y_train) score1 = get_scorer('roc_auc')(clf, X_test, y_test) score2 = roc_auc_score(y_test, clf.predict_proba(X_test)[:, 1]) assert_almost_equal(score1, score2) # test with a regressor (no decision_function) reg = DecisionTreeRegressor() reg.fit(X_train, y_train) score1 = get_scorer('roc_auc')(reg, X_test, y_test) score2 = roc_auc_score(y_test, reg.predict(X_test)) assert_almost_equal(score1, score2) # Test that an exception is raised on more than two classes X, y = make_blobs(random_state=0, centers=3) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf.fit(X_train, y_train) assert_raises(ValueError, get_scorer('roc_auc'), clf, X_test, y_test)
def test_pca(): """PCA on dense arrays""" pca = PCA(n_components=2) X = iris.data X_r = pca.fit(X).transform(X) np.testing.assert_equal(X_r.shape[1], 2) X_r2 = pca.fit_transform(X) assert_array_almost_equal(X_r, X_r2) pca = PCA() pca.fit(X) assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3) X_r = pca.transform(X) X_r2 = pca.fit_transform(X) assert_array_almost_equal(X_r, X_r2) # Test get_covariance and get_precision with n_components == n_features # with n_components < n_features and with n_components == 0 for n_components in [0, 2, X.shape[1]]: pca.n_components = n_components pca.fit(X) cov = pca.get_covariance() precision = pca.get_precision() assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12)
def check_alternative_lrap_implementation(lrap_score, n_classes=5, n_samples=20, random_state=0): _, y_true = make_multilabel_classification(n_features=1, allow_unlabeled=False, random_state=random_state, n_classes=n_classes, n_samples=n_samples) # Score with ties y_score = sparse_random_matrix(n_components=y_true.shape[0], n_features=y_true.shape[1], random_state=random_state) if hasattr(y_score, "toarray"): y_score = y_score.toarray() score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap) # Uniform score random_state = check_random_state(random_state) y_score = random_state.uniform(size=(n_samples, n_classes)) score_lrap = label_ranking_average_precision_score(y_true, y_score) score_my_lrap = _my_lrap(y_true, y_score) assert_almost_equal(score_lrap, score_my_lrap)
def test_check_scoring(): """Test all branches of check_scoring""" estimator = EstimatorWithoutFit() pattern = (r"estimator should a be an estimator implementing 'fit' method," r" .* was passed") assert_raises_regexp(TypeError, pattern, check_scoring, estimator) estimator = EstimatorWithFitAndScore() estimator.fit([[1]], [1]) scorer = check_scoring(estimator) assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) estimator = EstimatorWithFitAndPredict() estimator.fit([[1]], [1]) pattern = (r"If no scoring is specified, the estimator passed should have" r" a 'score' method\. The estimator .* does not\.") assert_raises_regexp(TypeError, pattern, check_scoring, estimator) scorer = check_scoring(estimator, "accuracy") assert_almost_equal(scorer(estimator, [[1]], [1]), 1.0) estimator = EstimatorWithFit() pattern = (r"The estimator passed should have a 'score'" r" or a 'predict' method\. The estimator .* does not\.") assert_raises_regexp(TypeError, pattern, check_scoring, estimator, "accuracy") estimator = EstimatorWithFit() scorer = check_scoring(estimator, allow_none=True) assert_true(scorer is None)
def test_randomized_pca_check_list(): """Test that the projection by RandomizedPCA on list data is correct""" X = [[1.0, 0.0], [0.0, 1.0]] X_transformed = RandomizedPCA(n_components=1, random_state=0).fit(X).transform(X) assert_equal(X_transformed.shape, (2, 1)) assert_almost_equal(X_transformed.mean(), 0.00, 2) assert_almost_equal(X_transformed.std(), 0.71, 2)
def test_gradient(): # Test gradient of Kullback-Leibler divergence. random_state = check_random_state(0) n_samples = 50 n_features = 2 n_components = 2 alpha = 1.0 distances = random_state.randn(n_samples, n_features).astype(np.float32) distances = np.abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) X_embedded = random_state.randn(n_samples, n_components).astype(np.float32) P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0) def fun(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[0] def grad(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[1] assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
def test_accessible_kl_divergence(): # Ensures that the accessible kl_divergence matches the computed value random_state = check_random_state(0) X = random_state.randn(100, 2) tsne = TSNE(n_iter_without_progress=2, verbose=2, random_state=0, method='exact') old_stdout = sys.stdout sys.stdout = StringIO() try: tsne.fit_transform(X) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout # The output needs to contain the accessible kl_divergence as the error at # the last iteration for line in out.split('\n')[::-1]: if 'Iteration' in line: _, _, error = line.partition('error = ') if error: error, _, _ = error.partition(',') break assert_almost_equal(tsne.kl_divergence_, float(error), decimal=5)
def test_randomized_svd_infinite_rank(): """Check that extmath.randomized_svd can handle noisy matrices""" n_samples = 100 n_features = 500 rank = 5 k = 10 # let us try again without 'low_rank component': just regularly but slowly # decreasing singular values: the rank of the data matrix is infinite X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=1.0, random_state=0) assert_equal(X.shape, (n_samples, n_features)) # compute the singular values of X using the slow exact method _, s, _ = linalg.svd(X, full_matrices=False) # compute the singular values of X using the fast approximate method # without the iterated power method _, sa, _ = randomized_svd(X, k, n_iter=0) # the approximation does not tolerate the noise: assert_greater(np.abs(s[:k] - sa).max(), 0.1) # compute the singular values of X using the fast approximate method with # iterated power method _, sap, _ = randomized_svd(X, k, n_iter=5) # the iterated power method is still managing to get most of the structure # at the requested rank assert_almost_equal(s[:k], sap, decimal=3)
def test_symmetry(): """Test the symmetry of score and loss functions""" random_state = check_random_state(0) y_true = random_state.randint(0, 2, size=(20, )) y_pred = random_state.randint(0, 2, size=(20, )) # We shouldn't forget any metrics assert_equal(set(SYMMETRIC_METRICS).union(NOT_SYMMETRIC_METRICS, THRESHOLDED_METRICS, METRIC_UNDEFINED_MULTICLASS), set(ALL_METRICS)) assert_equal( set(SYMMETRIC_METRICS).intersection(set(NOT_SYMMETRIC_METRICS)), set([])) # Symmetric metric for name in SYMMETRIC_METRICS: metric = ALL_METRICS[name] assert_almost_equal(metric(y_true, y_pred), metric(y_pred, y_true), err_msg="%s is not symmetric" % name) # Not symmetric metrics for name in NOT_SYMMETRIC_METRICS: metric = ALL_METRICS[name] assert_true(np.any(metric(y_true, y_pred) != metric(y_pred, y_true)), msg="%s seems to be symmetric" % name)
def test_dtype_match(): # Test that np.float32 input data is not cast to np.float64 when possible X_32 = np.array(X).astype(np.float32) y_32 = np.array(Y1).astype(np.float32) X_64 = np.array(X).astype(np.float64) y_64 = np.array(Y1).astype(np.float64) X_sparse_32 = sp.csr_matrix(X, dtype=np.float32) for solver in ['newton-cg']: for multi_class in ['ovr', 'multinomial']: # Check type consistency lr_32 = LogisticRegression(solver=solver, multi_class=multi_class) lr_32.fit(X_32, y_32) assert_equal(lr_32.coef_.dtype, X_32.dtype) # check consistency with sparsity lr_32_sparse = LogisticRegression(solver=solver, multi_class=multi_class) lr_32_sparse.fit(X_sparse_32, y_32) assert_equal(lr_32_sparse.coef_.dtype, X_sparse_32.dtype) # Check accuracy consistency lr_64 = LogisticRegression(solver=solver, multi_class=multi_class) lr_64.fit(X_64, y_64) assert_equal(lr_64.coef_.dtype, X_64.dtype) assert_almost_equal(lr_32.coef_, lr_64.coef_.astype(np.float32))
def test_randomized_svd_low_rank_with_noise(): """Check that extmath.randomized_svd can handle noisy matrices""" n_samples = 100 n_features = 500 rank = 5 k = 10 # generate a matrix X wity structure approximate rank `rank` and an # important noisy component X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.5, random_state=0) assert_equal(X.shape, (n_samples, n_features)) # compute the singular values of X using the slow exact method _, s, _ = linalg.svd(X, full_matrices=False) # compute the singular values of X using the fast approximate method # without the iterated power method _, sa, _ = randomized_svd(X, k, n_iter=0) # the approximation does not tolerate the noise: assert_greater(np.abs(s[:k] - sa).max(), 0.05) # compute the singular values of X using the fast approximate method with # iterated power method _, sap, _ = randomized_svd(X, k, n_iter=5) # the iterated power method is helping getting rid of the noise: assert_almost_equal(s[:k], sap, decimal=3)
def test_precision_recall_f1_score_binary(): """Test Precision Recall and F1 Score for binary classification task""" y_true, y_pred, _ = make_prediction(binary=True) # detailed measures for each class p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average=None) assert_array_almost_equal(p, [0.73, 0.85], 2) assert_array_almost_equal(r, [0.88, 0.68], 2) assert_array_almost_equal(f, [0.80, 0.76], 2) assert_array_equal(s, [25, 25]) # individual scoring function that can be used for grid search: in the # binary class case the score is the value of the measure for the positive # class (e.g. label == 1) ps = precision_score(y_true, y_pred) assert_array_almost_equal(ps, 0.85, 2) rs = recall_score(y_true, y_pred) assert_array_almost_equal(rs, 0.68, 2) fs = f1_score(y_true, y_pred) assert_array_almost_equal(fs, 0.76, 2) assert_almost_equal(fbeta_score(y_true, y_pred, beta=2), (1 + 2 ** 2) * ps * rs / (2 ** 2 * ps + rs), 2)
def test_multilabel_hamming_loss(): # Dense label indicator matrix format y1 = np.array([[0, 1, 1], [1, 0, 1]]) y2 = np.array([[0, 0, 1], [1, 0, 1]]) assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, np.logical_not(y2)), 1) assert_equal(hamming_loss(y1, np.logical_not(y1)), 1) assert_equal(hamming_loss(y1, np.zeros(y1.shape)), 4 / 6) assert_equal(hamming_loss(y2, np.zeros(y1.shape)), 0.5) with ignore_warnings(): # sequence of sequences is deprecated # List of tuple of label y1 = [(1, 2,), (0, 2,)] y2 = [(2,), (0, 2,)] assert_equal(hamming_loss(y1, y2), 1 / 6) assert_equal(hamming_loss(y1, y1), 0) assert_equal(hamming_loss(y2, y2), 0) assert_equal(hamming_loss(y2, [(), ()]), 0.75) assert_equal(hamming_loss(y1, [tuple(), (10, )]), 0.625) assert_almost_equal(hamming_loss(y2, [tuple(), (10, )], classes=np.arange(11)), 0.1818, 2)
def test_enet_path(): # We use a large number of samples and of informative features so that # the l1_ratio selected is more toward ridge than lasso X, y, X_test, y_test = build_dataset(n_samples=200, n_features=100, n_informative_features=100) max_iter = 150 with warnings.catch_warnings(): # Here we have a small number of iterations, and thus the # ElasticNet might not converge. This is to speed up tests warnings.simplefilter("ignore", UserWarning) clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter) clf.fit(X, y) # Well-conditionned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have seleted an elastic-net # that is closer to ridge than to lasso assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) clf = ElasticNetCV(n_alphas=5, eps=2e-3, l1_ratio=[0.5, 0.7], cv=3, max_iter=max_iter, precompute=True) clf.fit(X, y) # Well-conditionned settings, we should have selected our # smallest penalty assert_almost_equal(clf.alpha_, min(clf.alphas_)) # Non-sparse ground truth: we should have seleted an elastic-net # that is closer to ridge than to lasso assert_equal(clf.l1_ratio_, min(clf.l1_ratio)) # We are in well-conditionned settings with low noise: we should # have a good test-set performance assert_greater(clf.score(X_test, y_test), 0.99)
def test_fetch_rcv1(): try: data1 = fetch_rcv1(shuffle=False, download_if_missing=False) except IOError as e: if e.errno == errno.ENOENT: raise SkipTest("Download RCV1 dataset to run this test.") X1, Y1 = data1.data, data1.target cat_list, s1 = data1.target_names.tolist(), data1.sample_id # test sparsity assert_true(sp.issparse(X1)) assert_true(sp.issparse(Y1)) assert_equal(60915113, X1.data.size) assert_equal(2606875, Y1.data.size) # test shapes assert_equal((804414, 47236), X1.shape) assert_equal((804414, 103), Y1.shape) assert_equal((804414,), s1.shape) assert_equal(103, len(cat_list)) # test ordering of categories first_categories = [u'C11', u'C12', u'C13', u'C14', u'C15', u'C151'] assert_array_equal(first_categories, cat_list[:6]) # test number of sample for some categories some_categories = ('GMIL', 'E143', 'CCAT') number_non_zero_in_cat = (5, 1206, 381327) for num, cat in zip(number_non_zero_in_cat, some_categories): j = cat_list.index(cat) assert_equal(num, Y1[:, j].data.size) # test shuffling and subset data2 = fetch_rcv1(shuffle=True, subset='train', random_state=77, download_if_missing=False) X2, Y2 = data2.data, data2.target s2 = data2.sample_id # test return_X_y option fetch_func = partial(fetch_rcv1, shuffle=False, subset='train', download_if_missing=False) check_return_X_y(data2, fetch_func) # The first 23149 samples are the training samples assert_array_equal(np.sort(s1[:23149]), np.sort(s2)) # test some precise values some_sample_ids = (2286, 3274, 14042) for sample_id in some_sample_ids: idx1 = s1.tolist().index(sample_id) idx2 = s2.tolist().index(sample_id) feature_values_1 = X1[idx1, :].toarray() feature_values_2 = X2[idx2, :].toarray() assert_almost_equal(feature_values_1, feature_values_2) target_values_1 = Y1[idx1, :].toarray() target_values_2 = Y2[idx2, :].toarray() assert_almost_equal(target_values_1, target_values_2)
def test_check_sample_weight(): from sklearn.cluster.k_means_ import _check_sample_weight sample_weight = None checked_sample_weight = _check_sample_weight(X, sample_weight) assert_equal(_num_samples(X), _num_samples(checked_sample_weight)) assert_almost_equal(checked_sample_weight.sum(), _num_samples(X)) assert_equal(X.dtype, checked_sample_weight.dtype)
def test_pca(): # PCA on dense arrays X = iris.data for n_comp in np.arange(X.shape[1]): pca = PCA(n_components=n_comp, svd_solver='full') X_r = pca.fit(X).transform(X) np.testing.assert_equal(X_r.shape[1], n_comp) X_r2 = pca.fit_transform(X) assert_array_almost_equal(X_r, X_r2) X_r = pca.transform(X) X_r2 = pca.fit_transform(X) assert_array_almost_equal(X_r, X_r2) # Test get_covariance and get_precision cov = pca.get_covariance() precision = pca.get_precision() assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]), 12) # test explained_variance_ratio_ == 1 with all components pca = PCA(svd_solver='full') pca.fit(X) assert_almost_equal(pca.explained_variance_ratio_.sum(), 1.0, 3)
def test_compute_mi_cd(): # To test define a joint distribution as follows: # p(x, y) = p(x) p(y | x) # X ~ Bernoulli(p) # (Y | x = 0) ~ Uniform(-1, 1) # (Y | x = 1) ~ Uniform(0, 2) # Use the following formula for mutual information: # I(X; Y) = H(Y) - H(Y | X) # Two entropies can be computed by hand: # H(Y) = -(1-p)/2 * ln((1-p)/2) - p/2*log(p/2) - 1/2*log(1/2) # H(Y | X) = ln(2) # Now we need to implement sampling from out distribution, which is # done easily using conditional distribution logic. n_samples = 1000 rng = check_random_state(0) for p in [0.3, 0.5, 0.7]: x = rng.uniform(size=n_samples) > p y = np.empty(n_samples) mask = x == 0 y[mask] = rng.uniform(-1, 1, size=np.sum(mask)) y[~mask] = rng.uniform(0, 2, size=np.sum(~mask)) I_theory = -0.5 * ((1 - p) * np.log(0.5 * (1 - p)) + p * np.log(0.5 * p) + np.log(0.5)) - np.log(2) # Assert the same tolerance. for n_neighbors in [3, 5, 7]: I_computed = _compute_mi(x, y, True, False, n_neighbors) assert_almost_equal(I_computed, I_theory, 1)
def test_grid_search_score_method(): X, y = make_classification(n_samples=100, n_classes=2, flip_y=.2, random_state=0) clf = LinearSVC(random_state=0) grid = {'C': [.1]} search_no_scoring = GridSearchCV(clf, grid, scoring=None).fit(X, y) search_accuracy = GridSearchCV(clf, grid, scoring='accuracy').fit(X, y) search_no_score_method_auc = GridSearchCV(LinearSVCNoScore(), grid, scoring='roc_auc').fit(X, y) search_auc = GridSearchCV(clf, grid, scoring='roc_auc').fit(X, y) # ChangedBehaviourWarning occurred previously (prior to #9005) score_no_scoring = assert_no_warnings(search_no_scoring.score, X, y) score_accuracy = assert_no_warnings(search_accuracy.score, X, y) score_no_score_auc = assert_no_warnings(search_no_score_method_auc.score, X, y) score_auc = assert_no_warnings(search_auc.score, X, y) # ensure the test is sane assert_true(score_auc < 1.0) assert_true(score_accuracy < 1.0) assert_not_equal(score_auc, score_accuracy) assert_almost_equal(score_accuracy, score_no_scoring) assert_almost_equal(score_auc, score_no_score_auc)
def test_multiclass_multioutput_estimator_predict_proba(): seed = 542 # make test deterministic rng = np.random.RandomState(seed) # random features X = rng.normal(size=(5, 5)) # random labels y1 = np.array(['b', 'a', 'a', 'b', 'a']).reshape(5, 1) # 2 classes y2 = np.array(['d', 'e', 'f', 'e', 'd']).reshape(5, 1) # 3 classes Y = np.concatenate([y1, y2], axis=1) clf = MultiOutputClassifier(LogisticRegression(random_state=seed)) clf.fit(X, Y) y_result = clf.predict_proba(X) y_actual = [np.array([[0.23481764, 0.76518236], [0.67196072, 0.32803928], [0.54681448, 0.45318552], [0.34883923, 0.65116077], [0.73687069, 0.26312931]]), np.array([[0.5171785, 0.23878628, 0.24403522], [0.22141451, 0.64102704, 0.13755846], [0.16751315, 0.18256843, 0.64991843], [0.27357372, 0.55201592, 0.17441036], [0.65745193, 0.26062899, 0.08191907]])] for i in range(len(y_actual)): assert_almost_equal(y_result[i], y_actual[i])
def test_compute_mi_cc(): # For two continuous variables a good approach is to test on bivariate # normal distribution, where mutual information is known. # Mean of the distribution, irrelevant for mutual information. mean = np.zeros(2) # Setup covariance matrix with correlation coeff. equal 0.5. sigma_1 = 1 sigma_2 = 10 corr = 0.5 cov = np.array([ [sigma_1**2, corr * sigma_1 * sigma_2], [corr * sigma_1 * sigma_2, sigma_2**2] ]) # True theoretical mutual information. I_theory = (np.log(sigma_1) + np.log(sigma_2) - 0.5 * np.log(np.linalg.det(cov))) rng = check_random_state(0) Z = rng.multivariate_normal(mean, cov, size=1000) x, y = Z[:, 0], Z[:, 1] # Theory and computed values won't be very close, assert that the # first figures after decimal point match. for n_neighbors in [3, 5, 7]: I_computed = _compute_mi(x, y, False, False, n_neighbors) assert_almost_equal(I_computed, I_theory, 1)
def test_sgd_averaged_computed_correctly(self): """Tests the average regressor matches the naive implementation""" eta = 0.001 alpha = 0.01 n_samples = 20 n_features = 10 rng = np.random.RandomState(0) X = rng.normal(size=(n_samples, n_features)) w = rng.normal(size=n_features) # simple linear function without noise y = np.dot(X, w) clf = self.factory( loss="squared_loss", learning_rate="constant", eta0=eta, alpha=alpha, fit_intercept=True, n_iter=1, average=True, shuffle=False, ) clf.fit(X, y) average_weights, average_intercept = self.asgd(X, y, eta, alpha) assert_array_almost_equal(clf.coef_, average_weights, decimal=16) assert_almost_equal(clf.intercept_, average_intercept, decimal=16)
def test_average_binary_computed_correctly(self): """Checks the SGDClassifier correctly computes the average weights""" eta = 0.1 alpha = 2.0 n_samples = 20 n_features = 10 rng = np.random.RandomState(0) X = rng.normal(size=(n_samples, n_features)) w = rng.normal(size=n_features) clf = self.factory( loss="squared_loss", learning_rate="constant", eta0=eta, alpha=alpha, fit_intercept=True, n_iter=1, average=True, shuffle=False, ) # simple linear function without noise y = np.dot(X, w) y = np.sign(y) clf.fit(X, y) average_weights, average_intercept = self.asgd(X, y, eta, alpha) average_weights = average_weights.reshape(1, -1) assert_array_almost_equal(clf.coef_, average_weights, decimal=14) assert_almost_equal(clf.intercept_, average_intercept, decimal=14)
def test_dtype_match_cholesky(): # Test different alphas in cholesky solver to ensure full coverage. # This test is separated from test_dtype_match for clarity. rng = np.random.RandomState(0) alpha = (1.0, 0.5) n_samples, n_features, n_target = 6, 7, 2 X_64 = rng.randn(n_samples, n_features) y_64 = rng.randn(n_samples, n_target) X_32 = X_64.astype(np.float32) y_32 = y_64.astype(np.float32) # Check type consistency 32bits ridge_32 = Ridge(alpha=alpha, solver='cholesky') ridge_32.fit(X_32, y_32) coef_32 = ridge_32.coef_ # Check type consistency 64 bits ridge_64 = Ridge(alpha=alpha, solver='cholesky') ridge_64.fit(X_64, y_64) coef_64 = ridge_64.coef_ # Do all the checks at once, like this is easier to debug assert coef_32.dtype == X_32.dtype assert coef_64.dtype == X_64.dtype assert ridge_32.predict(X_32).dtype == X_32.dtype assert ridge_64.predict(X_64).dtype == X_64.dtype assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
def test_lasso_cv(): X, y, X_test, y_test = build_dataset() max_iter = 150 clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter).fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) clf = LassoCV(n_alphas=10, eps=1e-3, max_iter=max_iter, precompute=True) clf.fit(X, y) assert_almost_equal(clf.alpha_, 0.056, 2) # Check that the lars and the coordinate descent implementation # select a similar alpha lars = LassoLarsCV(normalize=False, max_iter=30).fit(X, y) # for this we check that they don't fall in the grid of # clf.alphas further than 1 assert_true(np.abs( np.searchsorted(clf.alphas_[::-1], lars.alpha_) - np.searchsorted(clf.alphas_[::-1], clf.alpha_)) <= 1) # check that they also give a similar MSE mse_lars = interpolate.interp1d(lars.cv_alphas_, lars.cv_mse_path_.T) np.testing.assert_approx_equal(mse_lars(clf.alphas_[5]).mean(), clf.mse_path_[5].mean(), significant=2) # test set assert_greater(clf.score(X_test, y_test), 0.99)
def test_ovr_partial_fit(): # Test if partial_fit is working as intented X, y = shuffle(iris.data, iris.target, random_state=0) ovr = OneVsRestClassifier(MultinomialNB()) ovr.partial_fit(X[:100], y[:100], np.unique(y)) ovr.partial_fit(X[100:], y[100:]) pred = ovr.predict(X) ovr2 = OneVsRestClassifier(MultinomialNB()) pred2 = ovr2.fit(X, y).predict(X) assert_almost_equal(pred, pred2) assert_equal(len(ovr.estimators_), len(np.unique(y))) assert_greater(np.mean(y == pred), 0.65) # Test when mini batches doesn't have all classes ovr = OneVsRestClassifier(MultinomialNB()) ovr.partial_fit(iris.data[:60], iris.target[:60], np.unique(iris.target)) ovr.partial_fit(iris.data[60:], iris.target[60:]) pred = ovr.predict(iris.data) ovr2 = OneVsRestClassifier(MultinomialNB()) pred2 = ovr2.fit(iris.data, iris.target).predict(iris.data) assert_almost_equal(pred, pred2) assert_equal(len(ovr.estimators_), len(np.unique(iris.target))) assert_greater(np.mean(iris.target == pred), 0.65)
def test_dtype_match(): rng = np.random.RandomState(0) alpha = 1.0 n_samples, n_features = 6, 5 X_64 = rng.randn(n_samples, n_features) y_64 = rng.randn(n_samples) X_32 = X_64.astype(np.float32) y_32 = y_64.astype(np.float32) solvers = ["svd", "sparse_cg", "cholesky", "lsqr"] for solver in solvers: # Check type consistency 32bits ridge_32 = Ridge(alpha=alpha, solver=solver) ridge_32.fit(X_32, y_32) coef_32 = ridge_32.coef_ # Check type consistency 64 bits ridge_64 = Ridge(alpha=alpha, solver=solver) ridge_64.fit(X_64, y_64) coef_64 = ridge_64.coef_ # Do the actual checks at once for easier debug assert coef_32.dtype == X_32.dtype assert coef_64.dtype == X_64.dtype assert ridge_32.predict(X_32).dtype == X_32.dtype assert ridge_64.predict(X_64).dtype == X_64.dtype assert_almost_equal(ridge_32.coef_, ridge_64.coef_, decimal=5)
def test_sgd_multiclass_average(self): eta = 0.001 alpha = 0.01 """Multi-class average test case""" clf = self.factory( loss="squared_loss", learning_rate="constant", eta0=eta, alpha=alpha, fit_intercept=True, n_iter=1, average=True, shuffle=False, ) np_Y2 = np.array(Y2) clf.fit(X2, np_Y2) classes = np.unique(np_Y2) for i, cl in enumerate(classes): y_i = np.ones(np_Y2.shape[0]) y_i[np_Y2 != cl] = -1 average_coef, average_intercept = self.asgd(X2, y_i, eta, alpha) assert_array_almost_equal(average_coef, clf.coef_[i], decimal=16) assert_almost_equal(average_intercept, clf.intercept_[i], decimal=16)
def test_ovo_partial_fit_predict(): X, y = shuffle(iris.data, iris.target) ovo1 = OneVsOneClassifier(MultinomialNB()) ovo1.partial_fit(X[:100], y[:100], np.unique(y)) ovo1.partial_fit(X[100:], y[100:]) pred1 = ovo1.predict(X) ovo2 = OneVsOneClassifier(MultinomialNB()) ovo2.fit(X, y) pred2 = ovo2.predict(X) assert_equal(len(ovo1.estimators_), n_classes * (n_classes - 1) / 2) assert_greater(np.mean(y == pred1), 0.65) assert_almost_equal(pred1, pred2) # Test when mini-batches don't have all target classes ovo1 = OneVsOneClassifier(MultinomialNB()) ovo1.partial_fit(iris.data[:60], iris.target[:60], np.unique(iris.target)) ovo1.partial_fit(iris.data[60:], iris.target[60:]) pred1 = ovo1.predict(iris.data) ovo2 = OneVsOneClassifier(MultinomialNB()) pred2 = ovo2.fit(iris.data, iris.target).predict(iris.data) assert_almost_equal(pred1, pred2) assert_equal(len(ovo1.estimators_), len(np.unique(iris.target))) assert_greater(np.mean(iris.target == pred1), 0.65)
def test_sgd_averaged_partial_fit(self): """Tests whether the partial fit yields the same average as the fit""" eta = 0.001 alpha = 0.01 n_samples = 20 n_features = 10 rng = np.random.RandomState(0) X = rng.normal(size=(n_samples, n_features)) w = rng.normal(size=n_features) # simple linear function without noise y = np.dot(X, w) clf = self.factory( loss="squared_loss", learning_rate="constant", eta0=eta, alpha=alpha, fit_intercept=True, n_iter=1, average=True, shuffle=False, ) clf.partial_fit(X[: int(n_samples / 2)][:], y[: int(n_samples / 2)]) clf.partial_fit(X[int(n_samples / 2) :][:], y[int(n_samples / 2) :]) average_weights, average_intercept = self.asgd(X, y, eta, alpha) assert_array_almost_equal(clf.coef_, average_weights, decimal=16) assert_almost_equal(clf.intercept_[0], average_intercept, decimal=16)
def test_minibatch_update_consistency(): # Check that dense and sparse minibatch update give the same results rng = np.random.RandomState(42) old_centers = centers + rng.normal(size=centers.shape) new_centers = old_centers.copy() new_centers_csr = old_centers.copy() weight_sums = np.zeros(new_centers.shape[0], dtype=np.double) weight_sums_csr = np.zeros(new_centers.shape[0], dtype=np.double) x_squared_norms = (X**2).sum(axis=1) x_squared_norms_csr = row_norms(X_csr, squared=True) buffer = np.zeros(centers.shape[1], dtype=np.double) buffer_csr = np.zeros(centers.shape[1], dtype=np.double) # extract a small minibatch X_mb = X[:10] X_mb_csr = X_csr[:10] x_mb_squared_norms = x_squared_norms[:10] x_mb_squared_norms_csr = x_squared_norms_csr[:10] sample_weight_mb = np.ones(X_mb.shape[0], dtype=np.double) # step 1: compute the dense minibatch update old_inertia, incremental_diff = _mini_batch_step(X_mb, sample_weight_mb, x_mb_squared_norms, new_centers, weight_sums, buffer, 1, None, random_reassign=False) assert old_inertia > 0.0 # compute the new inertia on the same batch to check that it decreased labels, new_inertia = _labels_inertia(X_mb, sample_weight_mb, x_mb_squared_norms, new_centers) assert new_inertia > 0.0 assert new_inertia < old_inertia # check that the incremental difference computation is matching the # final observed value effective_diff = np.sum((new_centers - old_centers)**2) assert_almost_equal(incremental_diff, effective_diff) # step 2: compute the sparse minibatch update old_inertia_csr, incremental_diff_csr = _mini_batch_step( X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr, weight_sums_csr, buffer_csr, 1, None, random_reassign=False) assert old_inertia_csr > 0.0 # compute the new inertia on the same batch to check that it decreased labels_csr, new_inertia_csr = _labels_inertia(X_mb_csr, sample_weight_mb, x_mb_squared_norms_csr, new_centers_csr) assert new_inertia_csr > 0.0 assert new_inertia_csr < old_inertia_csr # check that the incremental difference computation is matching the # final observed value effective_diff = np.sum((new_centers_csr - old_centers)**2) assert_almost_equal(incremental_diff_csr, effective_diff) # step 3: check that sparse and dense updates lead to the same results assert_array_equal(labels, labels_csr) assert_array_almost_equal(new_centers, new_centers_csr) assert_almost_equal(incremental_diff, incremental_diff_csr) assert_almost_equal(old_inertia, old_inertia_csr) assert_almost_equal(new_inertia, new_inertia_csr)
def test_fast_dot(): # Check fast dot blas wrapper function if fast_dot is np.dot: return rng = np.random.RandomState(42) A = rng.random_sample([2, 10]) B = rng.random_sample([2, 10]) try: linalg.get_blas_funcs(['gemm'])[0] has_blas = True except (AttributeError, ValueError): has_blas = False if has_blas: # Test _fast_dot for invalid input. # Maltyped data. for dt1, dt2 in [['f8', 'f4'], ['i4', 'i4']]: assert_raises(ValueError, _fast_dot, A.astype(dt1), B.astype(dt2).T) # Malformed data. # ndim == 0 E = np.empty(0) assert_raises(ValueError, _fast_dot, E, E) # ndim == 1 assert_raises(ValueError, _fast_dot, A, A[0]) # ndim > 2 assert_raises(ValueError, _fast_dot, A.T, np.array([A, A])) # min(shape) == 1 assert_raises(ValueError, _fast_dot, A, A[0, :][None, :]) # test for matrix mismatch error assert_raises(ValueError, _fast_dot, A, A) # Test cov-like use case + dtypes. for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) # col < row C = np.dot(A.T, A) C_ = fast_dot(A.T, A) assert_almost_equal(C, C_, decimal=5) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) assert_almost_equal(C, C_, decimal=5) C = np.dot(A, B.T) C_ = fast_dot(A, B.T) assert_almost_equal(C, C_, decimal=5) # Test square matrix * rectangular use case. A = rng.random_sample([2, 2]) for dtype in ['f8', 'f4']: A = A.astype(dtype) B = B.astype(dtype) C = np.dot(A, B) C_ = fast_dot(A, B) assert_almost_equal(C, C_, decimal=5) C = np.dot(A.T, B) C_ = fast_dot(A.T, B) assert_almost_equal(C, C_, decimal=5) if has_blas: for x in [np.array([[d] * 10] * 2) for d in [np.inf, np.nan]]: assert_raises(ValueError, _fast_dot, x, x.T)
def test_randomized_svd_transpose_consistency(): # Check that transposing the design matrix has limited impact n_samples = 100 n_features = 500 rank = 4 k = 10 X = make_low_rank_matrix(n_samples=n_samples, n_features=n_features, effective_rank=rank, tail_strength=0.5, random_state=0) assert_equal(X.shape, (n_samples, n_features)) U1, s1, V1 = randomized_svd(X, k, n_iter=3, transpose=False, random_state=0) U2, s2, V2 = randomized_svd(X, k, n_iter=3, transpose=True, random_state=0) U3, s3, V3 = randomized_svd(X, k, n_iter=3, transpose='auto', random_state=0) U4, s4, V4 = linalg.svd(X, full_matrices=False) assert_almost_equal(s1, s4[:k], decimal=3) assert_almost_equal(s2, s4[:k], decimal=3) assert_almost_equal(s3, s4[:k], decimal=3) assert_almost_equal(np.dot(U1, V1), np.dot(U4[:, :k], V4[:k, :]), decimal=2) assert_almost_equal(np.dot(U2, V2), np.dot(U4[:, :k], V4[:k, :]), decimal=2) # in this case 'auto' is equivalent to transpose assert_almost_equal(s2, s3)
def test_stratified_kfold_ratios(): # Check that stratified kfold preserves label ratios in individual splits # Repeat with shuffling turned off and on n_samples = 1000 labels = np.array([4] * int(0.10 * n_samples) + [0] * int(0.89 * n_samples) + [1] * int(0.01 * n_samples)) for shuffle in [False, True]: for train, test in cval.StratifiedKFold(labels, 5, shuffle=shuffle): assert_almost_equal(np.sum(labels[train] == 4) / len(train), 0.10, 2) assert_almost_equal(np.sum(labels[train] == 0) / len(train), 0.89, 2) assert_almost_equal(np.sum(labels[train] == 1) / len(train), 0.01, 2) assert_almost_equal(np.sum(labels[test] == 4) / len(test), 0.10, 2) assert_almost_equal(np.sum(labels[test] == 0) / len(test), 0.89, 2) assert_almost_equal(np.sum(labels[test] == 1) / len(test), 0.01, 2)
def test_stratified_kfold_ratios(): # Check that stratified kfold preserves class ratios in individual splits # Repeat with shuffling turned off and on n_samples = 1000 X = np.ones(n_samples) y = np.array([4] * int(0.10 * n_samples) + [0] * int(0.89 * n_samples) + [1] * int(0.01 * n_samples)) for shuffle in (False, True): for train, test in StratifiedKFold(5, shuffle=shuffle).split(X, y): assert_almost_equal(np.sum(y[train] == 4) / len(train), 0.10, 2) assert_almost_equal(np.sum(y[train] == 0) / len(train), 0.89, 2) assert_almost_equal(np.sum(y[train] == 1) / len(train), 0.01, 2) assert_almost_equal(np.sum(y[test] == 4) / len(test), 0.10, 2) assert_almost_equal(np.sum(y[test] == 0) / len(test), 0.89, 2) assert_almost_equal(np.sum(y[test] == 1) / len(test), 0.01, 2)
def test_fastica_simple(add_noise=False): """ Test the FastICA algorithm on very simple data. """ rng = np.random.RandomState(0) # scipy.stats uses the global RNG: np.random.seed(0) n_samples = 1000 # Generate two sources: s1 = (2 * np.sin(np.linspace(0, 100, n_samples)) > 0) - 1 s2 = stats.t.rvs(1, size=n_samples) s = np.c_[s1, s2].T center_and_norm(s) s1, s2 = s # Mixing angle phi = 0.6 mixing = np.array([[np.cos(phi), np.sin(phi)], [np.sin(phi), -np.cos(phi)]]) m = np.dot(mixing, s) if add_noise: m += 0.1 * rng.randn(2, 1000) center_and_norm(m) # function as fun arg def g_test(x): return x ** 3, (3 * x ** 2).mean(axis=-1) algos = ['parallel', 'deflation'] nls = ['logcosh', 'exp', 'cube', g_test] whitening = [True, False] for algo, nl, whiten in itertools.product(algos, nls, whitening): if whiten: k_, mixing_, s_ = fastica(m.T, fun=nl, algorithm=algo) assert_raises(ValueError, fastica, m.T, fun=np.tanh, algorithm=algo) else: X = PCA(n_components=2, whiten=True).fit_transform(m.T) k_, mixing_, s_ = fastica(X, fun=nl, algorithm=algo, whiten=False) assert_raises(ValueError, fastica, X, fun=np.tanh, algorithm=algo) s_ = s_.T # Check that the mixing model described in the docstring holds: if whiten: assert_almost_equal(s_, np.dot(np.dot(mixing_, k_), m)) center_and_norm(s_) s1_, s2_ = s_ # Check to see if the sources have been estimated # in the wrong order if abs(np.dot(s1_, s2)) > abs(np.dot(s1_, s1)): s2_, s1_ = s_ s1_ *= np.sign(np.dot(s1_, s1)) s2_ *= np.sign(np.dot(s2_, s2)) # Check that we have estimated the original sources if not add_noise: assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=2) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=2) else: assert_almost_equal(np.dot(s1_, s1) / n_samples, 1, decimal=1) assert_almost_equal(np.dot(s2_, s2) / n_samples, 1, decimal=1) # Test FastICA class _, _, sources_fun = fastica(m.T, fun=nl, algorithm=algo, random_state=0) ica = FastICA(fun=nl, algorithm=algo, random_state=0) sources = ica.fit_transform(m.T) assert_equal(ica.components_.shape, (2, 2)) assert_equal(sources.shape, (1000, 2)) assert_array_almost_equal(sources_fun, sources) assert_array_almost_equal(sources, ica.transform(m.T)) assert_equal(ica.mixing_.shape, (2, 2)) for fn in [np.tanh, "exp(-.5(x^2))"]: ica = FastICA(fun=fn, algorithm=algo, random_state=0) assert_raises(ValueError, ica.fit, m.T) assert_raises(TypeError, FastICA(fun=moves.xrange(10)).fit, m.T)
def test_classification_scores(): # Test classification scorers. X, y = make_blobs(random_state=0, centers=2) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0) clf = LinearSVC(random_state=0) clf.fit(X_train, y_train) for prefix, metric in [('f1', f1_score), ('precision', precision_score), ('recall', recall_score)]: score1 = get_scorer('%s_weighted' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='weighted') assert_almost_equal(score1, score2) score1 = get_scorer('%s_macro' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='macro') assert_almost_equal(score1, score2) score1 = get_scorer('%s_micro' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=None, average='micro') assert_almost_equal(score1, score2) score1 = get_scorer('%s' % prefix)(clf, X_test, y_test) score2 = metric(y_test, clf.predict(X_test), pos_label=1) assert_almost_equal(score1, score2) # test fbeta score that takes an argument scorer = make_scorer(fbeta_score, beta=2) score1 = scorer(clf, X_test, y_test) score2 = fbeta_score(y_test, clf.predict(X_test), beta=2) assert_almost_equal(score1, score2) # test that custom scorer can be pickled unpickled_scorer = pickle.loads(pickle.dumps(scorer)) score3 = unpickled_scorer(clf, X_test, y_test) assert_almost_equal(score1, score3) # smoke test the repr: repr(fbeta_score)
def _test_ridge_loo(filter_): # test that can work with both dense or sparse matrices n_samples = X_diabetes.shape[0] ret = [] ridge_gcv = _RidgeGCV(fit_intercept=False) ridge = Ridge(alpha=1.0, fit_intercept=False) # generalized cross-validation (efficient leave-one-out) decomp = ridge_gcv._pre_compute(X_diabetes, y_diabetes) errors, c = ridge_gcv._errors(1.0, y_diabetes, *decomp) values, c = ridge_gcv._values(1.0, y_diabetes, *decomp) # brute-force leave-one-out: remove one example at a time errors2 = [] values2 = [] for i in range(n_samples): sel = np.arange(n_samples) != i X_new = X_diabetes[sel] y_new = y_diabetes[sel] ridge.fit(X_new, y_new) value = ridge.predict([X_diabetes[i]])[0] error = (y_diabetes[i] - value)**2 errors2.append(error) values2.append(value) # check that efficient and brute-force LOO give same results assert_almost_equal(errors, errors2) assert_almost_equal(values, values2) # generalized cross-validation (efficient leave-one-out, # SVD variation) decomp = ridge_gcv._pre_compute_svd(X_diabetes, y_diabetes) errors3, c = ridge_gcv._errors_svd(ridge.alpha, y_diabetes, *decomp) values3, c = ridge_gcv._values_svd(ridge.alpha, y_diabetes, *decomp) # check that efficient and SVD efficient LOO give same results assert_almost_equal(errors, errors3) assert_almost_equal(values, values3) # check best alpha ridge_gcv.fit(filter_(X_diabetes), y_diabetes) alpha_ = ridge_gcv.alpha_ ret.append(alpha_) # check that we get same best alpha with custom loss_func ridge_gcv2 = RidgeCV(fit_intercept=False, loss_func=mean_squared_error) ridge_gcv2.fit(filter_(X_diabetes), y_diabetes) assert_equal(ridge_gcv2.alpha_, alpha_) # check that we get same best alpha with custom score_func func = lambda x, y: -mean_squared_error(x, y) ridge_gcv3 = RidgeCV(fit_intercept=False, score_func=func) ridge_gcv3.fit(filter_(X_diabetes), y_diabetes) assert_equal(ridge_gcv3.alpha_, alpha_) # check that we get same best alpha with sample weights ridge_gcv.fit(filter_(X_diabetes), y_diabetes, sample_weight=np.ones(n_samples)) assert_equal(ridge_gcv.alpha_, alpha_) # simulate several responses Y = np.vstack((y_diabetes, y_diabetes)).T ridge_gcv.fit(filter_(X_diabetes), Y) Y_pred = ridge_gcv.predict(filter_(X_diabetes)) ridge_gcv.fit(filter_(X_diabetes), y_diabetes) y_pred = ridge_gcv.predict(filter_(X_diabetes)) assert_array_almost_equal(np.vstack((y_pred, y_pred)).T, Y_pred, decimal=5) return ret
def test_sgd_proba(self): # Check SGD.predict_proba # Hinge loss does not allow for conditional prob estimate. # We cannot use the factory here, because it defines predict_proba # anyway. clf = SGDClassifier(loss="hinge", alpha=0.01, n_iter=10).fit(X, Y) assert_false(hasattr(clf, "predict_proba")) assert_false(hasattr(clf, "predict_log_proba")) # log and modified_huber losses can output probability estimates # binary case for loss in ["log", "modified_huber"]: clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10) clf.fit(X, Y) p = clf.predict_proba([3, 2]) assert_true(p[0, 1] > 0.5) p = clf.predict_proba([-1, -1]) assert_true(p[0, 1] < 0.5) p = clf.predict_log_proba([3, 2]) assert_true(p[0, 1] > p[0, 0]) p = clf.predict_log_proba([-1, -1]) assert_true(p[0, 1] < p[0, 0]) # log loss multiclass probability estimates clf = self.factory(loss="log", alpha=0.01, n_iter=10).fit(X2, Y2) d = clf.decision_function([[.1, -.1], [.3, .2]]) p = clf.predict_proba([[.1, -.1], [.3, .2]]) assert_array_equal(np.argmax(p, axis=1), np.argmax(d, axis=1)) assert_almost_equal(p[0].sum(), 1) assert_true(np.all(p[0] >= 0)) p = clf.predict_proba([-1, -1]) d = clf.decision_function([-1, -1]) assert_array_equal(np.argsort(p[0]), np.argsort(d[0])) l = clf.predict_log_proba([3, 2]) p = clf.predict_proba([3, 2]) assert_array_almost_equal(np.log(p), l) l = clf.predict_log_proba([-1, -1]) p = clf.predict_proba([-1, -1]) assert_array_almost_equal(np.log(p), l) # Modified Huber multiclass probability estimates; requires a separate # test because the hard zero/one probabilities may destroy the # ordering present in decision_function output. clf = self.factory(loss="modified_huber", alpha=0.01, n_iter=10) clf.fit(X2, Y2) d = clf.decision_function([3, 2]) p = clf.predict_proba([3, 2]) if not isinstance(self, SparseSGDClassifierTestCase): assert_equal(np.argmax(d, axis=1), np.argmax(p, axis=1)) else: # XXX the sparse test gets a different X2 (?) assert_equal(np.argmin(d, axis=1), np.argmin(p, axis=1)) # the following sample produces decision_function values < -1, # which would cause naive normalization to fail (see comment # in SGDClassifier.predict_proba) x = X.mean(axis=0) d = clf.decision_function(x) if np.all(d < -1): # XXX not true in sparse test case (why?) p = clf.predict_proba(x) assert_array_almost_equal(p[0], [1 / 3.] * 3)
def test_kernel_diag(): """ Test that diag method of kernel returns consistent results. """ for kernel in kernels: K_call_diag = np.diag(kernel(X)) K_diag = kernel.diag(X) assert_almost_equal(K_call_diag, K_diag, 5)
def test_oas(): """Tests OAS module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) oa = OAS(assume_centered=True) oa.fit(X_centered) shrinkage_ = oa.shrinkage_ score_ = oa.score(X_centered) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_centered, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS(assume_centered=True) oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d, assume_centered=True) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, oa.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False, assume_centered=True) oa.fit(X_centered) assert_almost_equal(oa.score(X_centered), score_, 4) assert (oa.precision_ is None) ### Same tests without assuming centered data # test shrinkage coeff on a simple data set oa = OAS() oa.fit(X) assert_almost_equal(oa.shrinkage_, shrinkage_, 4) assert_almost_equal(oa.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate oa_cov_from_mle, oa_shinkrage_from_mle = oas(X) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) # compare estimates given by OAS and ShrunkCovariance scov = ShrunkCovariance(shrinkage=oa.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, oa.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) oa = OAS() oa.fit(X_1d) oa_cov_from_mle, oa_shinkrage_from_mle = oas(X_1d) assert_array_almost_equal(oa_cov_from_mle, oa.covariance_, 4) assert_almost_equal(oa_shinkrage_from_mle, oa.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), oa.covariance_, 4) # test with one sample X_1sample = np.arange(5) oa = OAS() with warnings.catch_warnings(record=True): oa.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) oa = OAS(store_precision=False) oa.fit(X) assert_almost_equal(oa.score(X), score_, 4) assert (oa.precision_ is None)
def test_calibration_curve(): """Check calibration_curve function""" y_true = np.array([0, 0, 0, 1, 1, 1]) y_pred = np.array([0., 0.1, 0.2, 0.8, 0.9, 1.]) prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=2) prob_true_unnormalized, prob_pred_unnormalized = \ calibration_curve(y_true, y_pred * 2, n_bins=2, normalize=True) assert len(prob_true) == len(prob_pred) assert len(prob_true) == 2 assert_almost_equal(prob_true, [0, 1]) assert_almost_equal(prob_pred, [0.1, 0.9]) assert_almost_equal(prob_true, prob_true_unnormalized) assert_almost_equal(prob_pred, prob_pred_unnormalized) # probabilities outside [0, 1] should not be accepted when normalize # is set to False assert_raises(ValueError, calibration_curve, [1.1], [-0.1], normalize=False) # test that quantiles work as expected y_true2 = np.array([0, 0, 0, 0, 1, 1]) y_pred2 = np.array([0., 0.1, 0.2, 0.5, 0.9, 1.]) prob_true_quantile, prob_pred_quantile = calibration_curve( y_true2, y_pred2, n_bins=2, strategy='quantile') assert len(prob_true_quantile) == len(prob_pred_quantile) assert len(prob_true_quantile) == 2 assert_almost_equal(prob_true_quantile, [0, 2 / 3]) assert_almost_equal(prob_pred_quantile, [0.1, 0.8]) # Check that error is raised when invalid strategy is selected assert_raises(ValueError, calibration_curve, y_true2, y_pred2, strategy='percentile')
def check_sample_weight_invariance(name, metric, y1, y2): rng = np.random.RandomState(0) sample_weight = rng.randint(1, 10, size=len(y1)) # check that unit weights gives the same score as no weight unweighted_score = metric(y1, y2, sample_weight=None) assert_almost_equal( unweighted_score, metric(y1, y2, sample_weight=np.ones(shape=len(y1))), err_msg="For %s sample_weight=None is not equivalent to " "sample_weight=ones" % name) # check that the weighted and unweighted scores are unequal weighted_score = metric(y1, y2, sample_weight=sample_weight) assert_not_equal(unweighted_score, weighted_score, msg="Unweighted and weighted scores are unexpectedly " "equal (%f) for %s" % (weighted_score, name)) # check that sample_weight can be a list weighted_score_list = metric(y1, y2, sample_weight=sample_weight.tolist()) assert_almost_equal( weighted_score, weighted_score_list, err_msg=("Weighted scores for array and list " "sample_weight input are not equal (%f != %f) for %s") % (weighted_score, weighted_score_list, name)) # check that integer weights is the same as repeated samples repeat_weighted_score = metric(np.repeat(y1, sample_weight, axis=0), np.repeat(y2, sample_weight, axis=0), sample_weight=None) assert_almost_equal( weighted_score, repeat_weighted_score, err_msg="Weighting %s is not equal to repeating samples" % name) # check that ignoring a fraction of the samples is equivalent to setting # the corresponding weights to zero sample_weight_subset = sample_weight[1::2] sample_weight_zeroed = np.copy(sample_weight) sample_weight_zeroed[::2] = 0 y1_subset = y1[1::2] y2_subset = y2[1::2] weighted_score_subset = metric(y1_subset, y2_subset, sample_weight=sample_weight_subset) weighted_score_zeroed = metric(y1, y2, sample_weight=sample_weight_zeroed) assert_almost_equal( weighted_score_subset, weighted_score_zeroed, err_msg=("Zeroing weights does not give the same result as " "removing the corresponding samples (%f != %f) for %s" % (weighted_score_zeroed, weighted_score_subset, name))) if not name.startswith('unnormalized'): # check that the score is invariant under scaling of the weights by a # common factor for scaling in [2, 0.3]: assert_almost_equal(weighted_score, metric(y1, y2, sample_weight=sample_weight * scaling), err_msg="%s sample_weight is not invariant " "under scaling" % name) # Check that if sample_weight.shape[0] != y_true.shape[0], it raised an # error assert_raises(Exception, metric, y1, y2, sample_weight=np.hstack([sample_weight, sample_weight]))
def test_covariance(): """Tests Covariance module on a simple dataset. """ # test covariance fit from data cov = EmpiricalCovariance() cov.fit(X) emp_cov = empirical_covariance(X) assert_array_almost_equal(emp_cov, cov.covariance_, 4) assert_almost_equal(cov.error_norm(emp_cov), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='spectral'), 0) assert_almost_equal(cov.error_norm(emp_cov, norm='frobenius'), 0) assert_almost_equal(cov.error_norm(emp_cov, scaling=False), 0) assert_almost_equal(cov.error_norm(emp_cov, squared=False), 0) assert_raises(NotImplementedError, cov.error_norm, emp_cov, norm='foo') # Mahalanobis distances computation test mahal_dist = cov.mahalanobis(X) print np.amin(mahal_dist), np.amax(mahal_dist) assert (np.amin(mahal_dist) > 0) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) cov = EmpiricalCovariance() cov.fit(X_1d) assert_array_almost_equal(empirical_covariance(X_1d), cov.covariance_, 4) assert_almost_equal(cov.error_norm(empirical_covariance(X_1d)), 0) assert_almost_equal( cov.error_norm(empirical_covariance(X_1d), norm='spectral'), 0) # test with one sample X_1sample = np.arange(5) cov = EmpiricalCovariance() with warnings.catch_warnings(record=True): cov.fit(X_1sample) # test integer type X_integer = np.asarray([[0, 1], [1, 0]]) result = np.asarray([[0.25, -0.25], [-0.25, 0.25]]) assert_array_almost_equal(empirical_covariance(X_integer), result) # test centered case cov = EmpiricalCovariance(assume_centered=True) cov.fit(X) assert_array_equal(cov.location_, np.zeros(X.shape[1]))
def test_label_ranking_loss(): assert_almost_equal(label_ranking_loss([[0, 1]], [[0.25, 0.75]]), 0) assert_almost_equal(label_ranking_loss([[0, 1]], [[0.75, 0.25]]), 1) assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.75]]), 0) assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.75]]), 1 / 2) assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.75]]), 0) assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.75]]), 2 / 2) assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.75]]), 1 / 2) assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.75]]), 2 / 2) # Undefined metrics - the ranking doesn't matter assert_almost_equal(label_ranking_loss([[0, 0]], [[0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[1, 1]], [[0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[0, 0]], [[0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[1, 1]], [[0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.5, 0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.5, 0.75, 0.25]]), 0) assert_almost_equal(label_ranking_loss([[0, 0, 0]], [[0.25, 0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 0) # Non trival case assert_almost_equal( label_ranking_loss([[0, 1, 0], [1, 1, 0]], [[0.1, 10., -3], [0, 1, 3]]), (0 + 2 / 2) / 2.) assert_almost_equal( label_ranking_loss([[0, 1, 0], [1, 1, 0], [0, 1, 1]], [[0.1, 10, -3], [0, 1, 3], [0, 2, 0]]), (0 + 2 / 2 + 1 / 2) / 3.) assert_almost_equal( label_ranking_loss([[0, 1, 0], [1, 1, 0], [0, 1, 1]], [[0.1, 10, -3], [3, 1, 3], [0, 2, 0]]), (0 + 2 / 2 + 1 / 2) / 3.) # Sparse csr matrices assert_almost_equal( label_ranking_loss(csr_matrix(np.array([[0, 1, 0], [1, 1, 0]])), [[0.1, 10, -3], [3, 1, 3]]), (0 + 2 / 2) / 2.)
def test_ledoit_wolf(): """Tests LedoitWolf module on a simple dataset. """ # test shrinkage coeff on a simple data set X_centered = X - X.mean(axis=0) lw = LedoitWolf(assume_centered=True) lw.fit(X_centered) shrinkage_ = lw.shrinkage_ score_ = lw.score(X_centered) assert_almost_equal( ledoit_wolf_shrinkage(X_centered, assume_centered=True), shrinkage_) assert_almost_equal( ledoit_wolf_shrinkage(X_centered, assume_centered=True, block_size=6), shrinkage_) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_centered, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_, assume_centered=True) scov.fit(X_centered) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf(assume_centered=True) lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d, assume_centered=True) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal((X_1d**2).sum() / n_samples, lw.covariance_, 4) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False, assume_centered=True) lw.fit(X_centered) assert_almost_equal(lw.score(X_centered), score_, 4) assert (lw.precision_ is None) # (too) large data set X_large = np.ones((20, 200)) assert_raises(MemoryError, ledoit_wolf, X_large, block_size=100) # Same tests without assuming centered data # test shrinkage coeff on a simple data set lw = LedoitWolf() lw.fit(X) assert_almost_equal(lw.shrinkage_, shrinkage_, 4) assert_almost_equal(lw.shrinkage_, ledoit_wolf_shrinkage(X)) assert_almost_equal(lw.shrinkage_, ledoit_wolf(X)[1]) assert_almost_equal(lw.score(X), score_, 4) # compare shrunk covariance obtained from data and from MLE estimate lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) # compare estimates given by LW and ShrunkCovariance scov = ShrunkCovariance(shrinkage=lw.shrinkage_) scov.fit(X) assert_array_almost_equal(scov.covariance_, lw.covariance_, 4) # test with n_features = 1 X_1d = X[:, 0].reshape((-1, 1)) lw = LedoitWolf() lw.fit(X_1d) lw_cov_from_mle, lw_shinkrage_from_mle = ledoit_wolf(X_1d) assert_array_almost_equal(lw_cov_from_mle, lw.covariance_, 4) assert_almost_equal(lw_shinkrage_from_mle, lw.shrinkage_) assert_array_almost_equal(empirical_covariance(X_1d), lw.covariance_, 4) # test with one sample X_1sample = np.arange(5) lw = LedoitWolf() with warnings.catch_warnings(record=True): lw.fit(X_1sample) # test shrinkage coeff on a simple data set (without saving precision) lw = LedoitWolf(store_precision=False) lw.fit(X) assert_almost_equal(lw.score(X), score_, 4) assert (lw.precision_ is None)
def test_coverage_error(): # Toy case assert_almost_equal(coverage_error([[0, 1]], [[0.25, 0.75]]), 1) assert_almost_equal(coverage_error([[0, 1]], [[0.75, 0.25]]), 2) assert_almost_equal(coverage_error([[1, 1]], [[0.75, 0.25]]), 2) assert_almost_equal(coverage_error([[0, 0]], [[0.75, 0.25]]), 0) assert_almost_equal(coverage_error([[0, 0, 0]], [[0.25, 0.5, 0.75]]), 0) assert_almost_equal(coverage_error([[0, 0, 1]], [[0.25, 0.5, 0.75]]), 1) assert_almost_equal(coverage_error([[0, 1, 0]], [[0.25, 0.5, 0.75]]), 2) assert_almost_equal(coverage_error([[0, 1, 1]], [[0.25, 0.5, 0.75]]), 2) assert_almost_equal(coverage_error([[1, 0, 0]], [[0.25, 0.5, 0.75]]), 3) assert_almost_equal(coverage_error([[1, 0, 1]], [[0.25, 0.5, 0.75]]), 3) assert_almost_equal(coverage_error([[1, 1, 0]], [[0.25, 0.5, 0.75]]), 3) assert_almost_equal(coverage_error([[1, 1, 1]], [[0.25, 0.5, 0.75]]), 3) assert_almost_equal(coverage_error([[0, 0, 0]], [[0.75, 0.5, 0.25]]), 0) assert_almost_equal(coverage_error([[0, 0, 1]], [[0.75, 0.5, 0.25]]), 3) assert_almost_equal(coverage_error([[0, 1, 0]], [[0.75, 0.5, 0.25]]), 2) assert_almost_equal(coverage_error([[0, 1, 1]], [[0.75, 0.5, 0.25]]), 3) assert_almost_equal(coverage_error([[1, 0, 0]], [[0.75, 0.5, 0.25]]), 1) assert_almost_equal(coverage_error([[1, 0, 1]], [[0.75, 0.5, 0.25]]), 3) assert_almost_equal(coverage_error([[1, 1, 0]], [[0.75, 0.5, 0.25]]), 2) assert_almost_equal(coverage_error([[1, 1, 1]], [[0.75, 0.5, 0.25]]), 3) assert_almost_equal(coverage_error([[0, 0, 0]], [[0.5, 0.75, 0.25]]), 0) assert_almost_equal(coverage_error([[0, 0, 1]], [[0.5, 0.75, 0.25]]), 3) assert_almost_equal(coverage_error([[0, 1, 0]], [[0.5, 0.75, 0.25]]), 1) assert_almost_equal(coverage_error([[0, 1, 1]], [[0.5, 0.75, 0.25]]), 3) assert_almost_equal(coverage_error([[1, 0, 0]], [[0.5, 0.75, 0.25]]), 2) assert_almost_equal(coverage_error([[1, 0, 1]], [[0.5, 0.75, 0.25]]), 3) assert_almost_equal(coverage_error([[1, 1, 0]], [[0.5, 0.75, 0.25]]), 2) assert_almost_equal(coverage_error([[1, 1, 1]], [[0.5, 0.75, 0.25]]), 3) # Non trival case assert_almost_equal( coverage_error([[0, 1, 0], [1, 1, 0]], [[0.1, 10., -3], [0, 1, 3]]), (1 + 3) / 2.) assert_almost_equal( coverage_error([[0, 1, 0], [1, 1, 0], [0, 1, 1]], [[0.1, 10, -3], [0, 1, 3], [0, 2, 0]]), (1 + 3 + 3) / 3.) assert_almost_equal( coverage_error([[0, 1, 0], [1, 1, 0], [0, 1, 1]], [[0.1, 10, -3], [3, 1, 3], [0, 2, 0]]), (1 + 3 + 3) / 3.)
def test_format_invariance_with_1d_vectors(): random_state = check_random_state(0) y1 = random_state.randint(0, 2, size=(20, )) y2 = random_state.randint(0, 2, size=(20, )) y1_list = list(y1) y2_list = list(y2) y1_1d, y2_1d = np.array(y1), np.array(y2) assert_equal(y1_1d.ndim, 1) assert_equal(y2_1d.ndim, 1) y1_column = np.reshape(y1_1d, (-1, 1)) y2_column = np.reshape(y2_1d, (-1, 1)) y1_row = np.reshape(y1_1d, (1, -1)) y2_row = np.reshape(y2_1d, (1, -1)) for name, metric in ALL_METRICS.items(): if name in METRIC_UNDEFINED_BINARY_MULTICLASS: continue measure = metric(y1, y2) assert_almost_equal(metric(y1_list, y2_list), measure, err_msg="%s is not representation invariant " "with list" % name) assert_almost_equal(metric(y1_1d, y2_1d), measure, err_msg="%s is not representation invariant " "with np-array-1d" % name) assert_almost_equal(metric(y1_column, y2_column), measure, err_msg="%s is not representation invariant " "with np-array-column" % name) # Mix format support assert_almost_equal(metric(y1_1d, y2_list), measure, err_msg="%s is not representation invariant " "with mix np-array-1d and list" % name) assert_almost_equal(metric(y1_list, y2_1d), measure, err_msg="%s is not representation invariant " "with mix np-array-1d and list" % name) assert_almost_equal(metric(y1_1d, y2_column), measure, err_msg="%s is not representation invariant " "with mix np-array-1d and np-array-column" % name) assert_almost_equal(metric(y1_column, y2_1d), measure, err_msg="%s is not representation invariant " "with mix np-array-1d and np-array-column" % name) assert_almost_equal(metric(y1_list, y2_column), measure, err_msg="%s is not representation invariant " "with mix list and np-array-column" % name) assert_almost_equal(metric(y1_column, y2_list), measure, err_msg="%s is not representation invariant " "with mix list and np-array-column" % name) # These mix representations aren't allowed assert_raises(ValueError, metric, y1_1d, y2_row) assert_raises(ValueError, metric, y1_row, y2_1d) assert_raises(ValueError, metric, y1_list, y2_row) assert_raises(ValueError, metric, y1_row, y2_list) assert_raises(ValueError, metric, y1_column, y2_row) assert_raises(ValueError, metric, y1_row, y2_column) # NB: We do not test for y1_row, y2_row as these may be # interpreted as multilabel or multioutput data. if (name not in (MULTIOUTPUT_METRICS + THRESHOLDED_MULTILABEL_METRICS + MULTILABELS_METRICS)): assert_raises(ValueError, metric, y1_row, y2_row)
def test_precision_recall_curve_toydata(): with np.errstate(all="raise"): # Binary classification y_true = [0, 1] y_score = [0, 1] p, r, _ = precision_recall_curve(y_true, y_score) auc_prc = average_precision_score(y_true, y_score) assert_array_almost_equal(p, [1, 1]) assert_array_almost_equal(r, [1, 0]) assert_almost_equal(auc_prc, 1.) y_true = [0, 1] y_score = [1, 0] p, r, _ = precision_recall_curve(y_true, y_score) auc_prc = average_precision_score(y_true, y_score) assert_array_almost_equal(p, [0.5, 0., 1.]) assert_array_almost_equal(r, [1., 0., 0.]) assert_almost_equal(auc_prc, 0.25) y_true = [1, 0] y_score = [1, 1] p, r, _ = precision_recall_curve(y_true, y_score) auc_prc = average_precision_score(y_true, y_score) assert_array_almost_equal(p, [0.5, 1]) assert_array_almost_equal(r, [1., 0]) assert_almost_equal(auc_prc, .75) y_true = [1, 0] y_score = [1, 0] p, r, _ = precision_recall_curve(y_true, y_score) auc_prc = average_precision_score(y_true, y_score) assert_array_almost_equal(p, [1, 1]) assert_array_almost_equal(r, [1, 0]) assert_almost_equal(auc_prc, 1.) y_true = [1, 0] y_score = [0.5, 0.5] p, r, _ = precision_recall_curve(y_true, y_score) auc_prc = average_precision_score(y_true, y_score) assert_array_almost_equal(p, [0.5, 1]) assert_array_almost_equal(r, [1, 0.]) assert_almost_equal(auc_prc, .75) y_true = [0, 0] y_score = [0.25, 0.75] assert_raises(Exception, precision_recall_curve, y_true, y_score) assert_raises(Exception, average_precision_score, y_true, y_score) y_true = [1, 1] y_score = [0.25, 0.75] p, r, _ = precision_recall_curve(y_true, y_score) assert_almost_equal(average_precision_score(y_true, y_score), 1.) assert_array_almost_equal(p, [1., 1., 1.]) assert_array_almost_equal(r, [1, 0.5, 0.]) # Multi-label classification task y_true = np.array([[0, 1], [0, 1]]) y_score = np.array([[0, 1], [0, 1]]) assert_raises(Exception, average_precision_score, y_true, y_score, average="macro") assert_raises(Exception, average_precision_score, y_true, y_score, average="weighted") assert_almost_equal( average_precision_score(y_true, y_score, average="samples"), 1.) assert_almost_equal( average_precision_score(y_true, y_score, average="micro"), 1.) y_true = np.array([[0, 1], [0, 1]]) y_score = np.array([[0, 1], [1, 0]]) assert_raises(Exception, average_precision_score, y_true, y_score, average="macro") assert_raises(Exception, average_precision_score, y_true, y_score, average="weighted") assert_almost_equal( average_precision_score(y_true, y_score, average="samples"), 0.625) assert_almost_equal( average_precision_score(y_true, y_score, average="micro"), 0.625) y_true = np.array([[1, 0], [0, 1]]) y_score = np.array([[0, 1], [1, 0]]) assert_almost_equal( average_precision_score(y_true, y_score, average="macro"), 0.25) assert_almost_equal( average_precision_score(y_true, y_score, average="weighted"), 0.25) assert_almost_equal( average_precision_score(y_true, y_score, average="samples"), 0.25) assert_almost_equal( average_precision_score(y_true, y_score, average="micro"), 0.25) y_true = np.array([[1, 0], [0, 1]]) y_score = np.array([[0.5, 0.5], [0.5, 0.5]]) assert_almost_equal( average_precision_score(y_true, y_score, average="macro"), 0.75) assert_almost_equal( average_precision_score(y_true, y_score, average="weighted"), 0.75) assert_almost_equal( average_precision_score(y_true, y_score, average="samples"), 0.75) assert_almost_equal( average_precision_score(y_true, y_score, average="micro"), 0.75)
def test_coverage_tie_handling(): assert_almost_equal(coverage_error([[0, 0]], [[0.5, 0.5]]), 0) assert_almost_equal(coverage_error([[1, 0]], [[0.5, 0.5]]), 2) assert_almost_equal(coverage_error([[0, 1]], [[0.5, 0.5]]), 2) assert_almost_equal(coverage_error([[1, 1]], [[0.5, 0.5]]), 2) assert_almost_equal(coverage_error([[0, 0, 0]], [[0.25, 0.5, 0.5]]), 0) assert_almost_equal(coverage_error([[0, 0, 1]], [[0.25, 0.5, 0.5]]), 2) assert_almost_equal(coverage_error([[0, 1, 0]], [[0.25, 0.5, 0.5]]), 2) assert_almost_equal(coverage_error([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 2) assert_almost_equal(coverage_error([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 3) assert_almost_equal(coverage_error([[1, 0, 1]], [[0.25, 0.5, 0.5]]), 3) assert_almost_equal(coverage_error([[1, 1, 0]], [[0.25, 0.5, 0.5]]), 3) assert_almost_equal(coverage_error([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 3)
def test_ranking_loss_ties_handling(): # Tie handling assert_almost_equal(label_ranking_loss([[1, 0]], [[0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[0, 1]], [[0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[0, 0, 1]], [[0.25, 0.5, 0.5]]), 1 / 2) assert_almost_equal(label_ranking_loss([[0, 1, 0]], [[0.25, 0.5, 0.5]]), 1 / 2) assert_almost_equal(label_ranking_loss([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 0) assert_almost_equal(label_ranking_loss([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[1, 0, 1]], [[0.25, 0.5, 0.5]]), 1) assert_almost_equal(label_ranking_loss([[1, 1, 0]], [[0.25, 0.5, 0.5]]), 1)
def check_lrap_toy(lrap_score): # Check on several small example that it works assert_almost_equal(lrap_score([[0, 1]], [[0.25, 0.75]]), 1) assert_almost_equal(lrap_score([[0, 1]], [[0.75, 0.25]]), 1 / 2) assert_almost_equal(lrap_score([[1, 1]], [[0.75, 0.25]]), 1) assert_almost_equal(lrap_score([[0, 0, 1]], [[0.25, 0.5, 0.75]]), 1) assert_almost_equal(lrap_score([[0, 1, 0]], [[0.25, 0.5, 0.75]]), 1 / 2) assert_almost_equal(lrap_score([[0, 1, 1]], [[0.25, 0.5, 0.75]]), 1) assert_almost_equal(lrap_score([[1, 0, 0]], [[0.25, 0.5, 0.75]]), 1 / 3) assert_almost_equal(lrap_score([[1, 0, 1]], [[0.25, 0.5, 0.75]]), (2 / 3 + 1 / 1) / 2) assert_almost_equal(lrap_score([[1, 1, 0]], [[0.25, 0.5, 0.75]]), (2 / 3 + 1 / 2) / 2) assert_almost_equal(lrap_score([[0, 0, 1]], [[0.75, 0.5, 0.25]]), 1 / 3) assert_almost_equal(lrap_score([[0, 1, 0]], [[0.75, 0.5, 0.25]]), 1 / 2) assert_almost_equal(lrap_score([[0, 1, 1]], [[0.75, 0.5, 0.25]]), (1 / 2 + 2 / 3) / 2) assert_almost_equal(lrap_score([[1, 0, 0]], [[0.75, 0.5, 0.25]]), 1) assert_almost_equal(lrap_score([[1, 0, 1]], [[0.75, 0.5, 0.25]]), (1 + 2 / 3) / 2) assert_almost_equal(lrap_score([[1, 1, 0]], [[0.75, 0.5, 0.25]]), 1) assert_almost_equal(lrap_score([[1, 1, 1]], [[0.75, 0.5, 0.25]]), 1) assert_almost_equal(lrap_score([[0, 0, 1]], [[0.5, 0.75, 0.25]]), 1 / 3) assert_almost_equal(lrap_score([[0, 1, 0]], [[0.5, 0.75, 0.25]]), 1) assert_almost_equal(lrap_score([[0, 1, 1]], [[0.5, 0.75, 0.25]]), (1 + 2 / 3) / 2) assert_almost_equal(lrap_score([[1, 0, 0]], [[0.5, 0.75, 0.25]]), 1 / 2) assert_almost_equal(lrap_score([[1, 0, 1]], [[0.5, 0.75, 0.25]]), (1 / 2 + 2 / 3) / 2) assert_almost_equal(lrap_score([[1, 1, 0]], [[0.5, 0.75, 0.25]]), 1) assert_almost_equal(lrap_score([[1, 1, 1]], [[0.5, 0.75, 0.25]]), 1) # Tie handling assert_almost_equal(lrap_score([[1, 0]], [[0.5, 0.5]]), 0.5) assert_almost_equal(lrap_score([[0, 1]], [[0.5, 0.5]]), 0.5) assert_almost_equal(lrap_score([[1, 1]], [[0.5, 0.5]]), 1) assert_almost_equal(lrap_score([[0, 0, 1]], [[0.25, 0.5, 0.5]]), 0.5) assert_almost_equal(lrap_score([[0, 1, 0]], [[0.25, 0.5, 0.5]]), 0.5) assert_almost_equal(lrap_score([[0, 1, 1]], [[0.25, 0.5, 0.5]]), 1) assert_almost_equal(lrap_score([[1, 0, 0]], [[0.25, 0.5, 0.5]]), 1 / 3) assert_almost_equal(lrap_score([[1, 0, 1]], [[0.25, 0.5, 0.5]]), (2 / 3 + 1 / 2) / 2) assert_almost_equal(lrap_score([[1, 1, 0]], [[0.25, 0.5, 0.5]]), (2 / 3 + 1 / 2) / 2) assert_almost_equal(lrap_score([[1, 1, 1]], [[0.25, 0.5, 0.5]]), 1) assert_almost_equal(lrap_score([[1, 1, 0]], [[0.5, 0.5, 0.5]]), 2 / 3) assert_almost_equal(lrap_score([[1, 1, 1, 0]], [[0.5, 0.5, 0.5, 0.5]]), 3 / 4)
def test_importances_asymptotic(): # Check whether variable importances of totally randomized trees # converge towards their theoretical values (See Louppe et al, # Understanding variable importances in forests of randomized trees, 2013). def binomial(k, n): return 0 if k < 0 or k > n else comb(int(n), int(k), exact=True) def entropy(samples): n_samples = len(samples) entropy = 0. for count in np.bincount(samples): p = 1. * count / n_samples if p > 0: entropy -= p * np.log2(p) return entropy def mdi_importance(X_m, X, y): n_samples, n_features = X.shape features = list(range(n_features)) features.pop(X_m) values = [np.unique(X[:, i]) for i in range(n_features)] imp = 0. for k in range(n_features): # Weight of each B of size k coef = 1. / (binomial(k, n_features) * (n_features - k)) # For all B of size k for B in combinations(features, k): # For all values B=b for b in product(*[values[B[j]] for j in range(k)]): mask_b = np.ones(n_samples, dtype=np.bool) for j in range(k): mask_b &= X[:, B[j]] == b[j] X_, y_ = X[mask_b, :], y[mask_b] n_samples_b = len(X_) if n_samples_b > 0: children = [] for xi in values[X_m]: mask_xi = X_[:, X_m] == xi children.append(y_[mask_xi]) imp += ( coef * (1. * n_samples_b / n_samples) # P(B=b) * (entropy(y_) - sum([ entropy(c) * len(c) / n_samples_b for c in children ]))) return imp data = np.array([[0, 0, 1, 0, 0, 1, 0, 1], [1, 0, 1, 1, 1, 0, 1, 2], [1, 0, 1, 1, 0, 1, 1, 3], [0, 1, 1, 1, 0, 1, 0, 4], [1, 1, 0, 1, 0, 1, 1, 5], [1, 1, 0, 1, 1, 1, 1, 6], [1, 0, 1, 0, 0, 1, 0, 7], [1, 1, 1, 1, 1, 1, 1, 8], [1, 1, 1, 1, 0, 1, 1, 9], [1, 1, 1, 0, 1, 1, 1, 0]]) X, y = np.array(data[:, :7], dtype=np.bool), data[:, 7] n_features = X.shape[1] # Compute true importances true_importances = np.zeros(n_features) for i in range(n_features): true_importances[i] = mdi_importance(i, X, y) # Estimate importances with totally randomized trees clf = ExtraTreesClassifier(n_estimators=500, max_features=1, criterion="entropy", random_state=0).fit(X, y) importances = sum( tree.tree_.compute_feature_importances(normalize=False) for tree in clf.estimators_) / clf.n_estimators # Check correctness assert_almost_equal(entropy(y), sum(importances)) assert_less(np.abs(true_importances - importances).mean(), 0.01)
def test_roc_curve_toydata(): # Binary classification y_true = [0, 1] y_score = [0, 1] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1]) assert_array_almost_equal(fpr, [1, 1]) assert_almost_equal(roc_auc, 1.) y_true = [0, 1] y_score = [1, 0] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1, 1]) assert_array_almost_equal(fpr, [0, 0, 1]) assert_almost_equal(roc_auc, 0.) y_true = [1, 0] y_score = [1, 1] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1]) assert_array_almost_equal(fpr, [0, 1]) assert_almost_equal(roc_auc, 0.5) y_true = [1, 0] y_score = [1, 0] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1]) assert_array_almost_equal(fpr, [1, 1]) assert_almost_equal(roc_auc, 1.) y_true = [1, 0] y_score = [0.5, 0.5] tpr, fpr, _ = roc_curve(y_true, y_score) roc_auc = roc_auc_score(y_true, y_score) assert_array_almost_equal(tpr, [0, 1]) assert_array_almost_equal(fpr, [0, 1]) assert_almost_equal(roc_auc, .5) y_true = [0, 0] y_score = [0.25, 0.75] # assert UndefinedMetricWarning because of no positive sample in y_true tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true, y_score) assert_raises(ValueError, roc_auc_score, y_true, y_score) assert_array_almost_equal(tpr, [0., 0.5, 1.]) assert_array_almost_equal(fpr, [np.nan, np.nan, np.nan]) y_true = [1, 1] y_score = [0.25, 0.75] # assert UndefinedMetricWarning because of no negative sample in y_true tpr, fpr, _ = assert_warns(UndefinedMetricWarning, roc_curve, y_true, y_score) assert_raises(ValueError, roc_auc_score, y_true, y_score) assert_array_almost_equal(tpr, [np.nan, np.nan]) assert_array_almost_equal(fpr, [0.5, 1.]) # Multi-label classification task y_true = np.array([[0, 1], [0, 1]]) y_score = np.array([[0, 1], [0, 1]]) assert_raises(ValueError, roc_auc_score, y_true, y_score, average="macro") assert_raises(ValueError, roc_auc_score, y_true, y_score, average="weighted") assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 1.) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 1.) y_true = np.array([[0, 1], [0, 1]]) y_score = np.array([[0, 1], [1, 0]]) assert_raises(ValueError, roc_auc_score, y_true, y_score, average="macro") assert_raises(ValueError, roc_auc_score, y_true, y_score, average="weighted") assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 0.5) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 0.5) y_true = np.array([[1, 0], [0, 1]]) y_score = np.array([[0, 1], [1, 0]]) assert_almost_equal(roc_auc_score(y_true, y_score, average="macro"), 0) assert_almost_equal(roc_auc_score(y_true, y_score, average="weighted"), 0) assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), 0) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), 0) y_true = np.array([[1, 0], [0, 1]]) y_score = np.array([[0.5, 0.5], [0.5, 0.5]]) assert_almost_equal(roc_auc_score(y_true, y_score, average="macro"), .5) assert_almost_equal(roc_auc_score(y_true, y_score, average="weighted"), .5) assert_almost_equal(roc_auc_score(y_true, y_score, average="samples"), .5) assert_almost_equal(roc_auc_score(y_true, y_score, average="micro"), .5)
def test_sparsa_multiclass_l1l2(): for data in (mult_dense, mult_csr): clf = SparsaClassifier(max_iter=500, penalty="l1/l2", multiclass=True) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.97)
def test_search_iid_param(): # Test the IID parameter # noise-free simple 2d-data X, y = make_blobs(centers=[[0, 0], [1, 0], [0, 1], [1, 1]], random_state=0, cluster_std=0.1, shuffle=False, n_samples=80) # split dataset into two folds that are not iid # first one contains data of all 4 blobs, second only from two. mask = np.ones(X.shape[0], dtype=np.bool) mask[np.where(y == 1)[0][::2]] = 0 mask[np.where(y == 2)[0][::2]] = 0 # this leads to perfect classification on one fold and a score of 1/3 on # the other # create "cv" for splits cv = [[mask, ~mask], [~mask, mask]] # once with iid=True (default) grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, cv=cv) random_search = RandomizedSearchCV(SVC(), n_iter=2, param_distributions={'C': [1, 10]}, cv=cv) for search in (grid_search, random_search): search.fit(X, y) assert_true(search.iid) test_cv_scores = np.array( list(search.cv_results_['split%d_test_score' % s_i][0] for s_i in range(search.n_splits_))) train_cv_scores = np.array( list(search.cv_results_['split%d_train_' 'score' % s_i][0] for s_i in range(search.n_splits_))) test_mean = search.cv_results_['mean_test_score'][0] test_std = search.cv_results_['std_test_score'][0] train_cv_scores = np.array( list(search.cv_results_['split%d_train_' 'score' % s_i][0] for s_i in range(search.n_splits_))) train_mean = search.cv_results_['mean_train_score'][0] train_std = search.cv_results_['std_train_score'][0] # Test the first candidate assert_equal(search.cv_results_['param_C'][0], 1) assert_array_almost_equal(test_cv_scores, [1, 1. / 3.]) assert_array_almost_equal(train_cv_scores, [1, 1]) # for first split, 1/4 of dataset is in test, for second 3/4. # take weighted average and weighted std expected_test_mean = 1 * 1. / 4. + 1. / 3. * 3. / 4. expected_test_std = np.sqrt(1. / 4 * (expected_test_mean - 1)**2 + 3. / 4 * (expected_test_mean - 1. / 3.)**2) assert_almost_equal(test_mean, expected_test_mean) assert_almost_equal(test_std, expected_test_std) # For the train scores, we do not take a weighted mean irrespective of # i.i.d. or not assert_almost_equal(train_mean, 1) assert_almost_equal(train_std, 0) # once with iid=False grid_search = GridSearchCV(SVC(), param_grid={'C': [1, 10]}, cv=cv, iid=False) random_search = RandomizedSearchCV(SVC(), n_iter=2, param_distributions={'C': [1, 10]}, cv=cv, iid=False) for search in (grid_search, random_search): search.fit(X, y) assert_false(search.iid) test_cv_scores = np.array( list(search.cv_results_['split%d_test_score' % s][0] for s in range(search.n_splits_))) test_mean = search.cv_results_['mean_test_score'][0] test_std = search.cv_results_['std_test_score'][0] train_cv_scores = np.array( list(search.cv_results_['split%d_train_' 'score' % s][0] for s in range(search.n_splits_))) train_mean = search.cv_results_['mean_train_score'][0] train_std = search.cv_results_['std_train_score'][0] assert_equal(search.cv_results_['param_C'][0], 1) # scores are the same as above assert_array_almost_equal(test_cv_scores, [1, 1. / 3.]) # Unweighted mean/std is used assert_almost_equal(test_mean, np.mean(test_cv_scores)) assert_almost_equal(test_std, np.std(test_cv_scores)) # For the train scores, we do not take a weighted mean irrespective of # i.i.d. or not assert_almost_equal(train_mean, 1) assert_almost_equal(train_std, 0)
def test_adjusted_mutual_info_score(): # Compute the Adjusted Mutual Information and test against known values labels_a = np.array([1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3]) labels_b = np.array([1, 1, 1, 1, 2, 1, 2, 2, 2, 2, 3, 1, 3, 3, 3, 2, 2]) # Mutual information mi = mutual_info_score(labels_a, labels_b) assert_almost_equal(mi, 0.41022, 5) # with provided sparse contingency C = contingency_matrix(labels_a, labels_b, sparse=True) mi = mutual_info_score(labels_a, labels_b, contingency=C) assert_almost_equal(mi, 0.41022, 5) # with provided dense contingency C = contingency_matrix(labels_a, labels_b) mi = mutual_info_score(labels_a, labels_b, contingency=C) assert_almost_equal(mi, 0.41022, 5) # Expected mutual information n_samples = C.sum() emi = expected_mutual_information(C, n_samples) assert_almost_equal(emi, 0.15042, 5) # Adjusted mutual information ami = adjusted_mutual_info_score(labels_a, labels_b) assert_almost_equal(ami, 0.27502, 5) ami = adjusted_mutual_info_score([1, 1, 2, 2], [2, 2, 3, 3]) assert_equal(ami, 1.0) # Test with a very large array a110 = np.array([list(labels_a) * 110]).flatten() b110 = np.array([list(labels_b) * 110]).flatten() ami = adjusted_mutual_info_score(a110, b110) # This is not accurate to more than 2 places assert_almost_equal(ami, 0.37, 2)
def test_sparsa_multitask_l1(): for data in (mult_dense, mult_csr): clf = SparsaClassifier(max_iter=500, penalty="l1", multiclass=False) clf.fit(data, mult_target) assert_almost_equal(clf.score(data, mult_target), 0.87, 2)
def test_non_consecutive_labels(): # regression tests for labels with gaps h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 2, 2, 2], [0, 1, 0, 1, 2, 2]) assert_almost_equal(h, 0.67, 2) assert_almost_equal(c, 0.42, 2) assert_almost_equal(v, 0.52, 2) h, c, v = homogeneity_completeness_v_measure([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2]) assert_almost_equal(h, 0.67, 2) assert_almost_equal(c, 0.42, 2) assert_almost_equal(v, 0.52, 2) ari_1 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 1, 0, 1, 2, 2]) ari_2 = adjusted_rand_score([0, 0, 0, 1, 1, 1], [0, 4, 0, 4, 2, 2]) assert_almost_equal(ari_1, 0.24, 2) assert_almost_equal(ari_2, 0.24, 2)
def test_entropy(): ent = entropy([0, 0, 42.]) assert_almost_equal(ent, 0.6365141, 5) assert_almost_equal(entropy([]), 1)