def test_kernel_density_sampling(n_samples=100, n_features=3): rng = np.random.RandomState(0) X = rng.randn(n_samples, n_features) bandwidth = 0.2 for kernel in ['gaussian', 'tophat']: # draw a tophat sample kde = KernelDensity(bandwidth, kernel=kernel).fit(X) samp = kde.sample(100) assert X.shape == samp.shape # check that samples are in the right range nbrs = NearestNeighbors(n_neighbors=1).fit(X) dist, ind = nbrs.kneighbors(X, return_distance=True) if kernel == 'tophat': assert np.all(dist < bandwidth) elif kernel == 'gaussian': # 5 standard deviations is safe for 100 samples, but there's a # very small chance this test could fail. assert np.all(dist < 5 * bandwidth) # check unsupported kernels for kernel in ['epanechnikov', 'exponential', 'linear', 'cosine']: kde = KernelDensity(bandwidth, kernel=kernel).fit(X) assert_raises(NotImplementedError, kde.sample, 100) # non-regression test: used to return a scalar X = rng.randn(4, 1) kde = KernelDensity(kernel="gaussian").fit(X) assert kde.sample().shape == (1, 1)
def test_random_projection_transformer_invalid_input(): for RandomProjection in all_RandomProjection: assert_raises(ValueError, RandomProjection(n_components='auto').fit, [[0, 1, 2]]) assert_raises(ValueError, RandomProjection(n_components=-10).fit, data)
def test_csc_row_median(): # Test csc_row_median actually calculates the median. # Test that it gives the same output when X is dense. rng = np.random.RandomState(0) X = rng.rand(100, 50) dense_median = np.median(X, axis=0) csc = sp.csc_matrix(X) sparse_median = csc_median_axis_0(csc) assert_array_equal(sparse_median, dense_median) # Test that it gives the same output when X is sparse X = rng.rand(51, 100) X[X < 0.7] = 0.0 ind = rng.randint(0, 50, 10) X[ind] = -X[ind] csc = sp.csc_matrix(X) dense_median = np.median(X, axis=0) sparse_median = csc_median_axis_0(csc) assert_array_equal(sparse_median, dense_median) # Test for toy data. X = [[0, -2], [-1, -1], [1, 0], [2, 1]] csc = sp.csc_matrix(X) assert_array_equal(csc_median_axis_0(csc), np.array([0.5, -0.5])) X = [[0, -2], [-1, -5], [1, -3]] csc = sp.csc_matrix(X) assert_array_equal(csc_median_axis_0(csc), np.array([0., -3])) # Test that it raises an Error for non-csc matrices. assert_raises(TypeError, csc_median_axis_0, sp.csr_matrix(X))
def test_mean_variance_axis1(): X, _ = make_classification(5, 4, random_state=0) # Sparsify the array a little bit X[0, 0] = 0 X[2, 1] = 0 X[4, 3] = 0 X_lil = sp.lil_matrix(X) X_lil[1, 0] = 0 X[1, 0] = 0 assert_raises(TypeError, mean_variance_axis, X_lil, axis=1) X_csr = sp.csr_matrix(X_lil) X_csc = sp.csc_matrix(X_lil) expected_dtypes = [(np.float32, np.float32), (np.float64, np.float64), (np.int32, np.float64), (np.int64, np.float64)] for input_dtype, output_dtype in expected_dtypes: X_test = X.astype(input_dtype) for X_sparse in (X_csr, X_csc): X_sparse = X_sparse.astype(input_dtype) X_means, X_vars = mean_variance_axis(X_sparse, axis=0) assert X_means.dtype == output_dtype assert X_vars.dtype == output_dtype assert_array_almost_equal(X_means, np.mean(X_test, axis=0)) assert_array_almost_equal(X_vars, np.var(X_test, axis=0))
def test_inplace_row_scale(): rng = np.random.RandomState(0) X = sp.rand(100, 200, 0.05) Xr = X.tocsr() Xc = X.tocsc() XA = X.toarray() scale = rng.rand(100) XA *= scale.reshape(-1, 1) inplace_row_scale(Xc, scale) inplace_row_scale(Xr, scale) assert_array_almost_equal(Xr.toarray(), Xc.toarray()) assert_array_almost_equal(XA, Xc.toarray()) assert_array_almost_equal(XA, Xr.toarray()) assert_raises(TypeError, inplace_column_scale, X.tolil(), scale) X = X.astype(np.float32) scale = scale.astype(np.float32) Xr = X.tocsr() Xc = X.tocsc() XA = X.toarray() XA *= scale.reshape(-1, 1) inplace_row_scale(Xc, scale) inplace_row_scale(Xr, scale) assert_array_almost_equal(Xr.toarray(), Xc.toarray()) assert_array_almost_equal(XA, Xc.toarray()) assert_array_almost_equal(XA, Xr.toarray()) assert_raises(TypeError, inplace_column_scale, X.tolil(), scale)
def test__check_reg_targets(): # All of length 3 EXAMPLES = [ ("continuous", [1, 2, 3], 1), ("continuous", [[1], [2], [3]], 1), ("continuous-multioutput", [[1, 1], [2, 2], [3, 1]], 2), ("continuous-multioutput", [[5, 1], [4, 2], [3, 1]], 2), ("continuous-multioutput", [[1, 3, 4], [2, 2, 2], [3, 1, 1]], 3), ] for (type1, y1, n_out1), (type2, y2, n_out2) in product(EXAMPLES, repeat=2): if type1 == type2 and n_out1 == n_out2: y_type, y_check1, y_check2, multioutput = _check_reg_targets( y1, y2, None) assert type1 == y_type if type1 == 'continuous': assert_array_equal(y_check1, np.reshape(y1, (-1, 1))) assert_array_equal(y_check2, np.reshape(y2, (-1, 1))) else: assert_array_equal(y_check1, y1) assert_array_equal(y_check2, y2) else: assert_raises(ValueError, _check_reg_targets, y1, y2, None)
def test_checksubparams_n_subsamples_if_less_samples_than_features(): random_state = np.random.RandomState(0) n_samples, n_features = 10, 20 X = random_state.normal(size=(n_samples, n_features)) y = random_state.normal(size=n_samples) theil_sen = TheilSenRegressor(n_subsamples=9, random_state=0) assert_raises(ValueError, theil_sen.fit, X, y)
def test_check_outlier_corruption(): # should raise AssertionError decision = np.array([0., 1., 1.5, 2.]) assert_raises(AssertionError, check_outlier_corruption, 1, 2, decision) # should pass decision = np.array([0., 1., 1., 2.]) check_outlier_corruption(1, 2, decision)
def test_ridge_individual_penalties(): # Tests the ridge object using individual penalties rng = np.random.RandomState(42) n_samples, n_features, n_targets = 20, 10, 5 X = rng.randn(n_samples, n_features) y = rng.randn(n_samples, n_targets) penalties = np.arange(n_targets) coef_cholesky = np.array([ Ridge(alpha=alpha, solver="cholesky").fit(X, target).coef_ for alpha, target in zip(penalties, y.T) ]) coefs_indiv_pen = [ Ridge(alpha=penalties, solver=solver, tol=1e-8).fit(X, y).coef_ for solver in ['svd', 'sparse_cg', 'lsqr', 'cholesky', 'sag', 'saga'] ] for coef_indiv_pen in coefs_indiv_pen: assert_array_almost_equal(coef_cholesky, coef_indiv_pen) # Test error is raised when number of targets and penalties do not match. ridge = Ridge(alpha=penalties[:-1]) assert_raises(ValueError, ridge.fit, X, y)
def test_check_different_dimensions(): # Ensure an error is raised if the dimensions are different. XA = np.resize(np.arange(45), (5, 9)) XB = np.resize(np.arange(32), (4, 8)) assert_raises(ValueError, check_pairwise_arrays, XA, XB) XB = np.resize(np.arange(4 * 9), (4, 9)) assert_raises(ValueError, check_paired_arrays, XA, XB)
def test_wrong_class_weight_label(): # ValueError due to wrong class_weight label. X2 = np.array([[-1.0, -1.0], [-1.0, 0], [-.8, -1.0], [1.0, 1.0], [1.0, 0.0]]) y2 = [1, 1, 1, -1, -1] clf = PassiveAggressiveClassifier(class_weight={0: 0.5}, max_iter=100) assert_raises(ValueError, clf.fit, X2, y2)
def test_gnb_pfit_wrong_nb_features(): """Test whether an error is raised when the number of feature changes between two partial fit""" clf = GaussianNB() # Fit for the first time the GNB clf.fit(X, y) # Partial fit a second time with an incoherent X assert_raises(ValueError, clf.partial_fit, np.hstack((X, X)), y)
def test_sample_weight_missing(): from mrex.cluster import KMeans clf = AdaBoostClassifier(KMeans(), algorithm="SAMME") assert_raises(ValueError, clf.fit, X, y_regr) clf = AdaBoostRegressor(KMeans()) assert_raises(ValueError, clf.fit, X, y_regr)
def test_constant_strategy_exceptions(): X = [[0], [0], [0], [0]] # ignored y = [2, 1, 2, 2] clf = DummyClassifier(strategy="constant", random_state=0) assert_raises(ValueError, clf.fit, X, y) clf = DummyClassifier(strategy="constant", random_state=0, constant=[2, 0]) assert_raises(ValueError, clf.fit, X, y)
def test_make_scorer(): # Sanity check on the make_scorer factory function. f = lambda *args: 0 assert_raises(ValueError, make_scorer, f, needs_threshold=True, needs_proba=True)
def check_warm_start_smaller_n_estimators(name): # Test if warm start second fit with smaller n_estimators raises error. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=1, warm_start=True) clf.fit(X, y) clf.set_params(n_estimators=4) assert_raises(ValueError, clf.fit, X, y)
def test_input_size_jl_min_dim(): assert_raises(ValueError, johnson_lindenstrauss_min_dim, 3 * [100], 2 * [0.9]) assert_raises(ValueError, johnson_lindenstrauss_min_dim, 3 * [100], 2 * [0.9]) johnson_lindenstrauss_min_dim(np.random.randint(1, 10, size=(10, 10)), np.full((10, 10), 0.5))
def test_get_params(): test = T(K(), K()) assert 'a__d' in test.get_params(deep=True) assert 'a__d' not in test.get_params(deep=False) test.set_params(a__d=2) assert test.a.d == 2 assert_raises(ValueError, test.set_params, a__a=2)
def test_discretenb_input_check_fit(cls): # Test input checks for the fit method # check shape consistency for number of samples at fit time assert_raises(ValueError, cls().fit, X2, y2[:-1]) # check shape consistency for number of input features at predict time clf = cls().fit(X2, y2) assert_raises(ValueError, clf.predict, X2[:, :-1])
def test_pipeline_slice(): pipe = Pipeline([('transf1', Transf()), ('transf2', Transf()), ('clf', FitParamT())]) pipe2 = pipe[:-1] assert isinstance(pipe2, Pipeline) assert pipe2.steps == pipe.steps[:-1] assert 2 == len(pipe2.named_steps) assert_raises(ValueError, lambda: pipe[::-1])
def test_x_none_gram_none_raises_value_error(): # Test that lars_path with no X and Gram raises exception Xy = np.dot(X.T, y) assert_raises(ValueError, linear_model.lars_path, None, y, Gram=None, Xy=Xy)
def test_additive_chi2_sampler(): # test that AdditiveChi2Sampler approximates kernel on random data # compute exact kernel # abbreviations for easier formula X_ = X[:, np.newaxis, :] Y_ = Y[np.newaxis, :, :] large_kernel = 2 * X_ * Y_ / (X_ + Y_) # reduce to n_samples_x x n_samples_y by summing over features kernel = (large_kernel.sum(axis=2)) # approximate kernel mapping transform = AdditiveChi2Sampler(sample_steps=3) X_trans = transform.fit_transform(X) Y_trans = transform.transform(Y) kernel_approx = np.dot(X_trans, Y_trans.T) assert_array_almost_equal(kernel, kernel_approx, 1) X_sp_trans = transform.fit_transform(csr_matrix(X)) Y_sp_trans = transform.transform(csr_matrix(Y)) assert_array_equal(X_trans, X_sp_trans.A) assert_array_equal(Y_trans, Y_sp_trans.A) # test error is raised on negative input Y_neg = Y.copy() Y_neg[0, 0] = -1 assert_raises(ValueError, transform.transform, Y_neg) # test error on invalid sample_steps transform = AdditiveChi2Sampler(sample_steps=4) assert_raises(ValueError, transform.fit, X) # test that the sample interval is set correctly sample_steps_available = [1, 2, 3] for sample_steps in sample_steps_available: # test that the sample_interval is initialized correctly transform = AdditiveChi2Sampler(sample_steps=sample_steps) assert transform.sample_interval is None # test that the sample_interval is changed in the fit method transform.fit(X) assert transform.sample_interval_ is not None # test that the sample_interval is set correctly sample_interval = 0.3 transform = AdditiveChi2Sampler(sample_steps=4, sample_interval=sample_interval) assert transform.sample_interval == sample_interval transform.fit(X) assert transform.sample_interval_ == sample_interval
def test_check_invalid_dimensions(): # Ensure an error is raised on 1D input arrays. # The modified tests are not 1D. In the old test, the array was internally # converted to 2D anyways XA = np.arange(45).reshape(9, 5) XB = np.arange(32).reshape(4, 8) assert_raises(ValueError, check_pairwise_arrays, XA, XB) XA = np.arange(45).reshape(9, 5) XB = np.arange(32).reshape(4, 8) assert_raises(ValueError, check_pairwise_arrays, XA, XB)
def test_isotonic_regression_oob_bad(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="xyz") # Make sure that we throw an error for bad out_of_bounds value assert_raises(ValueError, ir.fit, x, y)
def check_1d_input(name, X, X_2d, y): ForestEstimator = FOREST_ESTIMATORS[name] assert_raises(ValueError, ForestEstimator(n_estimators=1, random_state=0).fit, X, y) est = ForestEstimator(random_state=0) est.fit(X_2d, y) if name in FOREST_CLASSIFIERS or name in FOREST_REGRESSORS: assert_raises(ValueError, est.predict, X)
def test_pairwise_kernels_filter_param(): rng = np.random.RandomState(0) X = rng.random_sample((5, 4)) Y = rng.random_sample((2, 4)) K = rbf_kernel(X, Y, gamma=0.1) params = {"gamma": 0.1, "blabla": ":)"} K2 = pairwise_kernels(X, Y, metric="rbf", filter_params=True, **params) assert_array_almost_equal(K, K2) assert_raises(TypeError, pairwise_kernels, X, Y, "rbf", **params)
def test_ovr_coef_exceptions(): # Not fitted exception! ovr = OneVsRestClassifier(LinearSVC(random_state=0)) # lambda is needed because we don't want coef_ to be evaluated right away assert_raises(ValueError, lambda x: ovr.coef_, None) # Doesn't have coef_ exception! ovr = OneVsRestClassifier(DecisionTreeClassifier()) ovr.fit(iris.data, iris.target) assert_raises(AttributeError, lambda x: ovr.coef_, None)
def test_pipeline_index(): transf = Transf() clf = FitParamT() pipe = Pipeline([('transf', transf), ('clf', clf)]) assert pipe[0] == transf assert pipe['transf'] == transf assert pipe[-1] == clf assert pipe['clf'] == clf assert_raises(IndexError, lambda: pipe[3]) assert_raises(KeyError, lambda: pipe['foobar'])
def test_valid_alpha(): n_classes = 2 X, y = make_classification(n_classes=n_classes, n_samples=200, random_state=0) for alpha in [-0.1, 0, 1, 1.1, None]: assert_raises(ValueError, lambda **kwargs: label_propagation.LabelSpreading( **kwargs).fit(X, y), alpha=alpha)
def test_isotonic_regression_oob_raise(): # Set y and x y = np.array([3, 7, 5, 9, 8, 7, 10]) x = np.arange(len(y)) # Create model and fit ir = IsotonicRegression(increasing='auto', out_of_bounds="raise") ir.fit(x, y) # Check that an exception is thrown assert_raises(ValueError, ir.predict, [min(x) - 10, max(x) + 10])