def test_compute_class_weight_auto_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3) cw = compute_class_weight("balanced", classes, y) assert_equal(len(cw), len(classes)) class_counts = np.bincount(y + 2) assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2. / 3, 2., 1.])
def check_oob_score(name, X, y, n_estimators=20): # Check that oob prediction is a good estimation of the generalization # error. # Proper behavior est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=n_estimators, bootstrap=True) n_samples = X.shape[0] est.fit(X[:n_samples // 2, :], y[:n_samples // 2]) test_score = est.score(X[n_samples // 2:, :], y[n_samples // 2:]) if name in FOREST_CLASSIFIERS: assert_less(abs(test_score - est.oob_score_), 0.1) else: assert_greater(test_score, est.oob_score_) assert_greater(est.oob_score_, .8) # Check warning if not enough estimators with np.errstate(divide="ignore", invalid="ignore"): est = FOREST_ESTIMATORS[name](oob_score=True, random_state=0, n_estimators=1, bootstrap=True) assert_warns(UserWarning, est.fit, X, y)
def test_check_symmetric(): arr_sym = np.array([[0, 1], [1, 2]]) arr_bad = np.ones(2) arr_asym = np.array([[0, 2], [0, 2]]) test_arrays = { 'dense': arr_asym, 'dok': sp.dok_matrix(arr_asym), 'csr': sp.csr_matrix(arr_asym), 'csc': sp.csc_matrix(arr_asym), 'coo': sp.coo_matrix(arr_asym), 'lil': sp.lil_matrix(arr_asym), 'bsr': sp.bsr_matrix(arr_asym) } # check error for bad inputs assert_raises(ValueError, check_symmetric, arr_bad) # check that asymmetric arrays are properly symmetrized for arr_format, arr in test_arrays.items(): # Check for warnings and errors assert_warns(UserWarning, check_symmetric, arr) assert_raises(ValueError, check_symmetric, arr, raise_exception=True) output = check_symmetric(arr, raise_warning=False) if sp.issparse(output): assert_equal(output.format, arr_format) assert_array_equal(output.toarray(), arr_sym) else: assert_array_equal(output, arr_sym)
def test_compute_sample_weight(): # Test (and demo) compute_sample_weight. # Test with balanced classes y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with user-defined weights sample_weight = compute_sample_weight({1: 2, 2: 1}, y) assert_array_almost_equal(sample_weight, [2., 2., 2., 1., 1., 1.]) # Test with column vector of balanced classes y = np.asarray([[1], [1], [1], [2], [2], [2]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with unbalanced classes y = np.asarray([1, 1, 1, 2, 2, 2, 3]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) expected_auto = np.asarray([.6, .6, .6, .6, .6, .6, 1.8]) assert_array_almost_equal(sample_weight, expected_auto) sample_weight = compute_sample_weight("balanced", y) expected_balanced = np.array( [0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 0.7777, 2.3333]) assert_array_almost_equal(sample_weight, expected_balanced, decimal=4) # Test with `None` weights sample_weight = compute_sample_weight(None, y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 1.]) # Test with multi-output of balanced classes y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with multi-output with user-defined weights y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = compute_sample_weight([{1: 2, 2: 1}, {0: 1, 1: 2}], y) assert_array_almost_equal(sample_weight, [2., 2., 2., 2., 2., 2.]) # Test with multi-output of unbalanced classes y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [3, -1]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, expected_auto**2) sample_weight = compute_sample_weight("balanced", y) assert_array_almost_equal(sample_weight, expected_balanced**2, decimal=3)
def test_shape_y(): # Test with float class labels. clf = GradientBoostingClassifier(n_estimators=100, random_state=1) y_ = np.asarray(y, dtype=np.int32) y_ = y_[:, np.newaxis] # This will raise a DataConversionWarning that we want to # "always" raise, elsewhere the warnings gets ignored in the # later tests, and the tests that check for this warning fail assert_warns(DataConversionWarning, clf.fit, X, y_) assert_array_equal(clf.predict(T), true_result) assert_equal(100, len(clf.estimators_))
def test_warn_wrong_warning(self): def f(): warnings.warn("yo", DeprecationWarning) failed = False filters = sys.modules['warnings'].filters[:] try: try: # Should raise an AssertionError assert_warns(UserWarning, f) failed = True except AssertionError: pass finally: sys.modules['warnings'].filters = filters if failed: raise AssertionError("wrong warning caught by assert_warn")
def check_warm_start_equal_n_estimators(name): # Test if warm start with equal n_estimators does nothing and returns the # same forest and raises a warning. X, y = hastie_X, hastie_y ForestEstimator = FOREST_ESTIMATORS[name] clf = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, random_state=1) clf.fit(X, y) clf_2 = ForestEstimator(n_estimators=5, max_depth=3, warm_start=True, random_state=1) clf_2.fit(X, y) # Now clf_2 equals clf. clf_2.set_params(random_state=2) assert_warns(UserWarning, clf_2.fit, X, y) # If we had fit the trees again we would have got a different forest as we # changed the random state. assert_array_equal(clf.apply(X), clf_2.apply(X))
def test_compute_class_weight(): # Test (and demo) compute_class_weight. y = np.asarray([2, 2, 2, 3, 3, 4]) classes = np.unique(y) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_true(cw[0] < cw[1] < cw[2]) cw = compute_class_weight("balanced", classes, y) # total effect of samples is preserved class_counts = np.bincount(y)[2:] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_true(cw[0] < cw[1] < cw[2])
def test_compute_class_weight_auto_unordered(): # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = assert_warns(DeprecationWarning, compute_class_weight, "auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3) cw = compute_class_weight("balanced", classes, y) class_counts = np.bincount(y)[classes] assert_almost_equal(np.dot(cw, class_counts), y.shape[0]) assert_array_almost_equal(cw, [2., 1., 2. / 3])
def check_class_weight_errors(name): # Test if class_weight raises errors and warnings when expected. ForestClassifier = FOREST_CLASSIFIERS[name] _y = np.vstack((y, np.array(y) * 2)).T # Invalid preset string clf = ForestClassifier(class_weight='the larch', random_state=0) assert_raises(ValueError, clf.fit, X, y) assert_raises(ValueError, clf.fit, X, _y) # Warning warm_start with preset clf = ForestClassifier(class_weight='auto', warm_start=True, random_state=0) assert_warns(UserWarning, clf.fit, X, y) assert_warns(UserWarning, clf.fit, X, _y) # Not a list or preset for multi-output clf = ForestClassifier(class_weight=1, random_state=0) assert_raises(ValueError, clf.fit, X, _y) # Incorrect length list for multi-output clf = ForestClassifier(class_weight=[{-1: 0.5, 1: 1.}], random_state=0) assert_raises(ValueError, clf.fit, X, _y)
def test_warn(self): def f(): warnings.warn("yo") return 3 # Test that assert_warns is not impacted by externally set # filters and is reset internally. # This is because `clean_warning_registry()` is called internally by # assert_warns and clears all previous filters. warnings.simplefilter("ignore", UserWarning) assert_equal(assert_warns(UserWarning, f), 3) # Test that the warning registry is empty after assert_warns assert_equal(sys.modules['warnings'].filters, []) assert_raises(AssertionError, assert_no_warnings, f) assert_equal(assert_no_warnings(lambda x: x, 1), 1)
def check_importances(name, criterion, X, y): ForestEstimator = FOREST_ESTIMATORS[name] est = ForestEstimator(n_estimators=20, criterion=criterion, random_state=0) est.fit(X, y) importances = est.feature_importances_ n_important = np.sum(importances > 0.1) assert_equal(importances.shape[0], 10) assert_equal(n_important, 3) # XXX: Remove this test in 0.19 after transform support to estimators # is removed. X_new = assert_warns(DeprecationWarning, est.transform, X, threshold="mean") assert_less(0 < X_new.shape[1], X.shape[1]) # Check with parallel importances = est.feature_importances_ est.set_params(n_jobs=2) importances_parrallel = est.feature_importances_ assert_array_almost_equal(importances, importances_parrallel) # Check with sample weights sample_weight = check_random_state(0).randint(1, 10, len(X)) est = ForestEstimator(n_estimators=20, random_state=0, criterion=criterion) est.fit(X, y, sample_weight=sample_weight) importances = est.feature_importances_ assert_true(np.all(importances >= 0.0)) for scale in [0.5, 10, 100]: est = ForestEstimator(n_estimators=20, random_state=0, criterion=criterion) est.fit(X, y, sample_weight=scale * sample_weight) importances_bis = est.feature_importances_ assert_less(np.abs(importances - importances_bis).mean(), 0.001)
def test_feature_importances(): X = np.array(boston.data, dtype=np.float32) y = np.array(boston.target, dtype=np.float32) for presort in True, False: clf = GradientBoostingRegressor(n_estimators=100, max_depth=5, min_samples_split=2, random_state=1, presort=presort) clf.fit(X, y) assert_true(hasattr(clf, 'feature_importances_')) # XXX: Remove this test in 0.19 after transform support to estimators # is removed. X_new = assert_warns(DeprecationWarning, clf.transform, X, threshold="mean") assert_less(X_new.shape[1], X.shape[1]) feature_mask = (clf.feature_importances_ > clf.feature_importances_.mean()) assert_array_almost_equal(X_new, X[:, feature_mask])
def test_check_array_dtype_warning(): X_int_list = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] X_float64 = np.asarray(X_int_list, dtype=np.float64) X_float32 = np.asarray(X_int_list, dtype=np.float32) X_int64 = np.asarray(X_int_list, dtype=np.int64) X_csr_float64 = sp.csr_matrix(X_float64) X_csr_float32 = sp.csr_matrix(X_float32) X_csc_float32 = sp.csc_matrix(X_float32) X_csc_int32 = sp.csc_matrix(X_int64, dtype=np.int32) y = [0, 0, 1] integer_data = [X_int64, X_csc_int32] float64_data = [X_float64, X_csr_float64] float32_data = [X_float32, X_csr_float32, X_csc_float32] for X in integer_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True) assert_equal(X_checked.dtype, np.float64) X_checked = assert_warns(DataConversionWarning, check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert_equal(X_checked.dtype, np.float64) # Check that the warning message includes the name of the Estimator X_checked = assert_warns_message(DataConversionWarning, 'SomeEstimator', check_array, X, dtype=[np.float64, np.float32], accept_sparse=True, warn_on_dtype=True, estimator='SomeEstimator') assert_equal(X_checked.dtype, np.float64) X_checked, y_checked = assert_warns_message( DataConversionWarning, 'KNeighborsClassifier', check_X_y, X, y, dtype=np.float64, accept_sparse=True, warn_on_dtype=True, estimator=KNeighborsClassifier()) assert_equal(X_checked.dtype, np.float64) for X in float64_data: X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=True) assert_equal(X_checked.dtype, np.float64) X_checked = assert_no_warnings(check_array, X, dtype=np.float64, accept_sparse=True, warn_on_dtype=False) assert_equal(X_checked.dtype, np.float64) for X in float32_data: X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=True) assert_equal(X_checked.dtype, np.float32) assert_true(X_checked is X) X_checked = assert_no_warnings(check_array, X, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=True) assert_equal(X_checked.dtype, np.float32) assert_false(X_checked is X) X_checked = assert_no_warnings(check_array, X_csc_float32, dtype=[np.float64, np.float32], accept_sparse=['csr', 'dok'], copy=False) assert_equal(X_checked.dtype, np.float32) assert_false(X_checked is X_csc_float32) assert_equal(X_checked.format, 'csr')
def test_ignore_warning(): # This check that ignore_warning decorateur and context manager are working # as expected def _warning_function(): warnings.warn("deprecation warning", DeprecationWarning) def _multiple_warning_function(): warnings.warn("deprecation warning", DeprecationWarning) warnings.warn("deprecation warning") # Check the function directly assert_no_warnings(ignore_warnings(_warning_function)) assert_no_warnings( ignore_warnings(_warning_function, category=DeprecationWarning)) assert_warns(DeprecationWarning, ignore_warnings(_warning_function, category=UserWarning)) assert_warns( UserWarning, ignore_warnings(_multiple_warning_function, category=DeprecationWarning)) assert_warns( DeprecationWarning, ignore_warnings(_multiple_warning_function, category=UserWarning)) assert_no_warnings( ignore_warnings(_warning_function, category=(DeprecationWarning, UserWarning))) # Check the decorator @ignore_warnings def decorator_no_warning(): _warning_function() _multiple_warning_function() @ignore_warnings(category=(DeprecationWarning, UserWarning)) def decorator_no_warning_multiple(): _multiple_warning_function() @ignore_warnings(category=DeprecationWarning) def decorator_no_deprecation_warning(): _warning_function() @ignore_warnings(category=UserWarning) def decorator_no_user_warning(): _warning_function() @ignore_warnings(category=DeprecationWarning) def decorator_no_deprecation_multiple_warning(): _multiple_warning_function() @ignore_warnings(category=UserWarning) def decorator_no_user_multiple_warning(): _multiple_warning_function() assert_no_warnings(decorator_no_warning) assert_no_warnings(decorator_no_warning_multiple) assert_no_warnings(decorator_no_deprecation_warning) assert_warns(DeprecationWarning, decorator_no_user_warning) assert_warns(UserWarning, decorator_no_deprecation_multiple_warning) assert_warns(DeprecationWarning, decorator_no_user_multiple_warning) # Check the context manager def context_manager_no_warning(): with ignore_warnings(): _warning_function() def context_manager_no_warning_multiple(): with ignore_warnings(category=(DeprecationWarning, UserWarning)): _multiple_warning_function() def context_manager_no_deprecation_warning(): with ignore_warnings(category=DeprecationWarning): _warning_function() def context_manager_no_user_warning(): with ignore_warnings(category=UserWarning): _warning_function() def context_manager_no_deprecation_multiple_warning(): with ignore_warnings(category=DeprecationWarning): _multiple_warning_function() def context_manager_no_user_multiple_warning(): with ignore_warnings(category=UserWarning): _multiple_warning_function() assert_no_warnings(context_manager_no_warning) assert_no_warnings(context_manager_no_warning_multiple) assert_no_warnings(context_manager_no_deprecation_warning) assert_warns(DeprecationWarning, context_manager_no_user_warning) assert_warns(UserWarning, context_manager_no_deprecation_multiple_warning) assert_warns(DeprecationWarning, context_manager_no_user_multiple_warning)
def test_compute_sample_weight_with_subsample(): # Test compute_sample_weight with subsamples specified. # Test with balanced classes and all samples present y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with column vector of balanced classes and all samples present y = np.asarray([[1], [1], [1], [2], [2], [2]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1.]) # Test with a subsample y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, range(4)) assert_array_almost_equal(sample_weight, [.5, .5, .5, 1.5, 1.5, 1.5]) sample_weight = compute_sample_weight("balanced", y, range(4)) assert_array_almost_equal(sample_weight, [2. / 3, 2. / 3, 2. / 3, 2., 2., 2.]) # Test with a bootstrap subsample y = np.asarray([1, 1, 1, 2, 2, 2]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, [0, 1, 1, 2, 2, 3]) expected_auto = np.asarray( [1 / 3., 1 / 3., 1 / 3., 5 / 3., 5 / 3., 5 / 3.]) assert_array_almost_equal(sample_weight, expected_auto) sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3]) expected_balanced = np.asarray([0.6, 0.6, 0.6, 3., 3., 3.]) assert_array_almost_equal(sample_weight, expected_balanced) # Test with a bootstrap subsample for multi-output y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, [0, 1, 1, 2, 2, 3]) assert_array_almost_equal(sample_weight, expected_auto**2) sample_weight = compute_sample_weight("balanced", y, [0, 1, 1, 2, 2, 3]) assert_array_almost_equal(sample_weight, expected_balanced**2) # Test with a missing class y = np.asarray([1, 1, 1, 2, 2, 2, 3]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) # Test with a missing class for multi-output y = np.asarray([[1, 0], [1, 0], [1, 0], [2, 1], [2, 1], [2, 1], [2, 2]]) sample_weight = assert_warns(DeprecationWarning, compute_sample_weight, "auto", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.]) sample_weight = compute_sample_weight("balanced", y, range(6)) assert_array_almost_equal(sample_weight, [1., 1., 1., 1., 1., 1., 0.])
def test_check_array(): # accept_sparse == None # raise error on sparse inputs X = [[1, 2], [3, 4]] X_csr = sp.csr_matrix(X) assert_raises(TypeError, check_array, X_csr) # ensure_2d assert_warns(DeprecationWarning, check_array, [0, 1, 2]) X_array = check_array([0, 1, 2]) assert_equal(X_array.ndim, 2) X_array = check_array([0, 1, 2], ensure_2d=False) assert_equal(X_array.ndim, 1) # don't allow ndim > 3 X_ndim = np.arange(8).reshape(2, 2, 2) assert_raises(ValueError, check_array, X_ndim) check_array(X_ndim, allow_nd=True) # doesn't raise # force_all_finite X_inf = np.arange(4).reshape(2, 2).astype(np.float) X_inf[0, 0] = np.inf assert_raises(ValueError, check_array, X_inf) check_array(X_inf, force_all_finite=False) # no raise # nan check X_nan = np.arange(4).reshape(2, 2).astype(np.float) X_nan[0, 0] = np.nan assert_raises(ValueError, check_array, X_nan) check_array(X_inf, force_all_finite=False) # no raise # dtype and order enforcement. X_C = np.arange(4).reshape(2, 2).copy("C") X_F = X_C.copy("F") X_int = X_C.astype(np.int) X_float = X_C.astype(np.float) Xs = [X_C, X_F, X_int, X_float] dtypes = [np.int32, np.int, np.float, np.float32, None, np.bool, object] orders = ['C', 'F', None] copys = [True, False] for X, dtype, order, copy in product(Xs, dtypes, orders, copys): X_checked = check_array(X, dtype=dtype, order=order, copy=copy) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if order == 'C': assert_true(X_checked.flags['C_CONTIGUOUS']) assert_false(X_checked.flags['F_CONTIGUOUS']) elif order == 'F': assert_true(X_checked.flags['F_CONTIGUOUS']) assert_false(X_checked.flags['C_CONTIGUOUS']) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X_checked.flags['C_CONTIGUOUS'] == X.flags['C_CONTIGUOUS'] and X_checked.flags['F_CONTIGUOUS'] == X.flags['F_CONTIGUOUS']): assert_true(X is X_checked) # allowed sparse != None X_csc = sp.csc_matrix(X_C) X_coo = X_csc.tocoo() X_dok = X_csc.todok() X_int = X_csc.astype(np.int) X_float = X_csc.astype(np.float) Xs = [X_csc, X_coo, X_dok, X_int, X_float] accept_sparses = [['csr', 'coo'], ['coo', 'dok']] for X, dtype, accept_sparse, copy in product(Xs, dtypes, accept_sparses, copys): with warnings.catch_warnings(record=True) as w: X_checked = check_array(X, dtype=dtype, accept_sparse=accept_sparse, copy=copy) if (dtype is object or sp.isspmatrix_dok(X)) and len(w): message = str(w[0].message) messages = [ "object dtype is not supported by sparse matrices", "Can't check dok sparse matrix for nan or inf." ] assert_true(message in messages) else: assert_equal(len(w), 0) if dtype is not None: assert_equal(X_checked.dtype, dtype) else: assert_equal(X_checked.dtype, X.dtype) if X.format in accept_sparse: # no change if allowed assert_equal(X.format, X_checked.format) else: # got converted assert_equal(X_checked.format, accept_sparse[0]) if copy: assert_false(X is X_checked) else: # doesn't copy if it was already good if (X.dtype == X_checked.dtype and X.format == X_checked.format): assert_true(X is X_checked) # other input formats # convert lists to arrays X_dense = check_array([[1, 2], [3, 4]]) assert_true(isinstance(X_dense, np.ndarray)) # raise on too deep lists assert_raises(ValueError, check_array, X_ndim.tolist()) check_array(X_ndim.tolist(), allow_nd=True) # doesn't raise # convert weird stuff to arrays X_no_array = NotAnArray(X_dense) result = check_array(X_no_array) assert_true(isinstance(result, np.ndarray))
def test_check_array_min_samples_and_features_messages(): # empty list is considered 2D by default: msg = "0 feature(s) (shape=(1, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [[]]) # If considered a 1D collection when ensure_2d=False, then the minimum # number of samples will break: msg = "0 sample(s) (shape=(0,)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_array, [], ensure_2d=False) # Invalid edge case when checking the default minimum sample of a scalar msg = "Singleton array array(42) cannot be considered a valid collection." assert_raise_message(TypeError, msg, check_array, 42, ensure_2d=False) # But this works if the input data is forced to look like a 2 array with # one sample and one feature: X_checked = assert_warns(DeprecationWarning, check_array, [42], ensure_2d=True) assert_array_equal(np.array([[42]]), X_checked) # Simulate a model that would need at least 2 samples to be well defined X = np.ones((1, 10)) y = np.ones(1) msg = "1 sample(s) (shape=(1, 10)) while a minimum of 2 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2) # The same message is raised if the data has 2 dimensions even if this is # not mandatory assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_samples=2, ensure_2d=False) # Simulate a model that would require at least 3 features (e.g. SelectKBest # with k=3) X = np.ones((10, 2)) y = np.ones(2) msg = "2 feature(s) (shape=(10, 2)) while a minimum of 3 is required." assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3) # Only the feature check is enabled whenever the number of dimensions is 2 # even if allow_nd is enabled: assert_raise_message(ValueError, msg, check_X_y, X, y, ensure_min_features=3, allow_nd=True) # Simulate a case where a pipeline stage as trimmed all the features of a # 2D dataset. X = np.empty(0).reshape(10, 0) y = np.ones(10) msg = "0 feature(s) (shape=(10, 0)) while a minimum of 1 is required." assert_raise_message(ValueError, msg, check_X_y, X, y) # nd-data is not checked for any minimum number of features by default: X = np.ones((10, 0, 28, 28)) y = np.ones(10) X_checked, y_checked = check_X_y(X, y, allow_nd=True) assert_array_equal(X, X_checked) assert_array_equal(y, y_checked)