def test_non_transformer_estimators_n_iter(): # Test that all estimators of type which are non-transformer # and which have an attribute of max_iter, return the attribute # of n_iter atleast 1. for est_type in ['regressor', 'classifier', 'cluster']: regressors = all_estimators(type_filter=est_type) for name, Estimator in regressors: # LassoLars stops early for the default alpha=1.0 for # the iris dataset. if name == 'LassoLars': estimator = Estimator(alpha=0.) else: estimator = Estimator() if hasattr(estimator, "max_iter"): # These models are dependent on external solvers like # libsvm and accessing the iter parameter is non-trivial. if name in (['Ridge', 'SVR', 'NuSVR', 'NuSVC', 'RidgeClassifier', 'SVC', 'RandomizedLasso', 'LogisticRegressionCV']): continue # Tested in test_transformer_n_iter below elif (name in CROSS_DECOMPOSITION or name in ['LinearSVC', 'LogisticRegression']): continue else: # Multitask models related to ENet cannot handle # if y is mono-output. yield (check_non_transformer_estimators_n_iter, name, estimator, 'Multi' in name)
def test_classifiers(): # test if classifiers can cope with non-consecutive classes classifiers = all_estimators(type_filter='classifier') for name, Classifier in classifiers: # test classfiers can handle non-array data yield check_classifier_data_not_an_array, name, Classifier # test classifiers trained on a single label always return this label yield check_classifiers_one_label, name, Classifier yield check_classifiers_classes, name, Classifier yield check_classifiers_pickle, name, Classifier yield check_estimators_partial_fit_n_features, name, Classifier # basic consistency testing yield check_classifiers_train, name, Classifier if (name not in ["MultinomialNB", "LabelPropagation", "LabelSpreading"] # TODO some complication with -1 label and name not in ["DecisionTreeClassifier", "ExtraTreeClassifier"]): # We don't raise a warning in these classifiers, as # the column y interface is used by the forests. # test if classifiers can cope with y.shape = (n_samples, 1) yield check_classifiers_input_shapes, name, Classifier # test if NotFittedError is raised yield check_estimators_unfitted, name, Classifier
def test_non_transformer_estimators_n_iter(): # Test that all estimators of type which are non-transformer # and which have an attribute of max_iter, return the attribute # of n_iter atleast 1. for est_type in ['regressor', 'classifier', 'cluster']: regressors = all_estimators(type_filter=est_type) for name, Estimator in regressors: # LassoLars stops early for the default alpha=1.0 for # the iris dataset. if name == 'LassoLars': estimator = Estimator(alpha=0.) else: estimator = Estimator() if hasattr(estimator, "max_iter"): # These models are dependent on external solvers like # libsvm and accessing the iter parameter is non-trivial. if name in ([ 'Ridge', 'SVR', 'NuSVR', 'NuSVC', 'RidgeClassifier', 'SVC', 'RandomizedLasso', 'LogisticRegressionCV' ]): continue # Tested in test_transformer_n_iter below elif (name in CROSS_DECOMPOSITION or name in ['LinearSVC', 'LogisticRegression']): continue else: # Multitask models related to ENet cannot handle # if y is mono-output. yield (check_non_transformer_estimators_n_iter, name, estimator, 'Multi' in name)
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if name.endswith("HMM") or name.startswith("_"): continue if name not in CROSS_DECOMPOSITION: yield check_estimators_dtypes, name, Estimator yield check_fit_score_takes_y, name, Estimator yield check_dtype_object, name, Estimator # Check that all estimator yield informative messages when # trained on empty datasets yield check_estimators_empty_data_messages, name, Estimator if name not in CROSS_DECOMPOSITION + ['SpectralEmbedding']: # SpectralEmbedding is non-deterministic, # see issue #4236 yield check_pipeline_consistency, name, Estimator if name not in CROSS_DECOMPOSITION + ['Imputer']: # Test that all estimators check their input for NaN's and infs yield check_estimators_nan_inf, name, Estimator if name not in CROSS_DECOMPOSITION + ['GaussianProcess']: # FIXME! # in particular GaussianProcess! yield check_estimators_overwrite_params, name, Estimator if hasattr(Estimator, 'sparsify'): yield check_sparsify_coefficients, name, Estimator yield check_estimator_sparse_data, name, Estimator
def test_transformer_n_iter(): transformers = all_estimators(type_filter='transformer') for name, Estimator in transformers: estimator = Estimator() # Dependent on external solvers and hence accessing the iter # param is non-trivial. external_solver = ['Isomap', 'KernelPCA', 'LocallyLinearEmbedding', 'RandomizedLasso', 'LogisticRegressionCV'] if hasattr(estimator, "max_iter") and name not in external_solver: yield check_transformer_n_iter, name, estimator
def test_all_estimators(): # Test that estimators are default-constructible, clonable # and have working repr. estimators = all_estimators(include_meta_estimators=True) # Meta sanity-check to make sure that the estimator introspection runs # properly assert_greater(len(estimators), 0) for name, Estimator in estimators: # some can just not be sensibly default constructed yield check_parameters_default_constructible, name, Estimator
def test_clustering(): # test if clustering algorithms do something sensible # also test all shapes / shape errors clustering = all_estimators(type_filter='cluster') for name, Alg in clustering: # test whether any classifier overwrites his init parameters during fit yield check_clusterer_compute_labels_predict, name, Alg if name not in ('WardAgglomeration', "FeatureAgglomeration"): # this is clustering on the features # let's not test that here. yield check_clustering, name, Alg yield check_estimators_partial_fit_n_features, name, Alg
def test_transformer_n_iter(): transformers = all_estimators(type_filter='transformer') for name, Estimator in transformers: estimator = Estimator() # Dependent on external solvers and hence accessing the iter # param is non-trivial. external_solver = [ 'Isomap', 'KernelPCA', 'LocallyLinearEmbedding', 'RandomizedLasso', 'LogisticRegressionCV' ] if hasattr(estimator, "max_iter") and name not in external_solver: yield check_transformer_n_iter, name, estimator
def test_transformers(): # test if transformers do something sensible on training set # also test all shapes / shape errors transformers = all_estimators(type_filter='transformer') for name, Transformer in transformers: # All transformers should either deal with sparse data or raise an # exception with type TypeError and an intelligible error message yield check_transformer_pickle, name, Transformer if name not in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer', 'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD']: yield check_transformer_data_not_an_array, name, Transformer # these don't actually fit the data, so don't raise errors if name not in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']: # basic tests yield check_transformer, name, Transformer yield check_transformers_unfitted, name, Transformer
def test_regressors(): regressors = all_estimators(type_filter='regressor') # TODO: test with intercept # TODO: test with multiple responses for name, Regressor in regressors: # basic testing yield check_regressors_train, name, Regressor yield check_regressor_data_not_an_array, name, Regressor yield check_estimators_partial_fit_n_features, name, Regressor # Test that estimators can be pickled, and once pickled # give the same answer as before. yield check_regressors_pickle, name, Regressor if name != 'CCA': # check that the regressor handles int input yield check_regressors_int, name, Regressor # Test if NotFittedError is raised yield check_estimators_unfitted, name, Regressor
def test_class_weight_auto_linear_classifiers(): classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): linear_classifiers = [(name, clazz) for name, clazz in classifiers if 'class_weight' in clazz().get_params().keys() and issubclass(clazz, LinearClassifierMixin)] for name, Classifier in linear_classifiers: if name == "LogisticRegressionCV": # Contrary to RidgeClassifierCV, LogisticRegressionCV use actual # CV folds and fit a model for each CV iteration before averaging # the coef. Therefore it is expected to not behave exactly as the # other linear model. continue yield check_class_weight_auto_linear_classifier, name, Classifier
def test_transformers(): # test if transformers do something sensible on training set # also test all shapes / shape errors transformers = all_estimators(type_filter='transformer') for name, Transformer in transformers: # All transformers should either deal with sparse data or raise an # exception with type TypeError and an intelligible error message yield check_transformer_pickle, name, Transformer if name not in [ 'AdditiveChi2Sampler', 'Binarizer', 'Normalizer', 'PLSCanonical', 'PLSRegression', 'CCA', 'PLSSVD' ]: yield check_transformer_data_not_an_array, name, Transformer # these don't actually fit the data, so don't raise errors if name not in ['AdditiveChi2Sampler', 'Binarizer', 'Normalizer']: # basic tests yield check_transformer, name, Transformer yield check_transformers_unfitted, name, Transformer
def test_class_weight_classifiers(): # test that class_weight works and that the semantics are consistent classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): classifiers = [c for c in classifiers if 'class_weight' in c[1]().get_params().keys()] for name, Classifier in classifiers: if name == "NuSVC": # the sparse version has a parameter that doesn't do anything continue if name.endswith("NB"): # NaiveBayes classifiers have a somewhat different interface. # FIXME SOON! continue yield check_class_weight_classifiers, name, Classifier
def test_class_weight_auto_classifiers(): # Test that class_weight="auto" improves f1-score # This test is broken; its success depends on: # * a rare fortuitous RNG seed for make_classification; and # * the use of binary F1 over a seemingly arbitrary positive class for two # datasets, and weighted average F1 for the third. # Its expectations need to be clarified and reimplemented. raise SkipTest('This test requires redefinition') classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): classifiers = [ c for c in classifiers if 'class_weight' in c[1]().get_params().keys() ] for n_classes, weights in zip([2, 3], [[.8, .2], [.8, .1, .1]]): # create unbalanced dataset X, y = make_classification(n_classes=n_classes, n_samples=200, n_features=10, weights=weights, random_state=0, n_informative=n_classes) X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0) for name, Classifier in classifiers: if (name != "NuSVC" # the sparse version has a parameter that doesn't do anything and not name.startswith("RidgeClassifier") # RidgeClassifier behaves unexpected # FIXME! and not name.endswith("NB")): # NaiveBayes classifiers have a somewhat different interface. # FIXME SOON! yield (check_class_weight_auto_classifiers, name, Classifier, X_train, y_train, X_test, y_test, weights)
def test_class_weight_auto_linear_classifiers(): classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): linear_classifiers = [ (name, clazz) for name, clazz in classifiers if 'class_weight' in clazz().get_params().keys() and issubclass(clazz, LinearClassifierMixin)] for name, Classifier in linear_classifiers: if name == "LogisticRegressionCV": # Contrary to RidgeClassifierCV, LogisticRegressionCV use actual # CV folds and fit a model for each CV iteration before averaging # the coef. Therefore it is expected to not behave exactly as the # other linear model. continue yield check_class_weight_auto_linear_classifier, name, Classifier
def test_class_weight_classifiers(): # test that class_weight works and that the semantics are consistent classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): classifiers = [ c for c in classifiers if 'class_weight' in c[1]().get_params().keys() ] for name, Classifier in classifiers: if name == "NuSVC": # the sparse version has a parameter that doesn't do anything continue if name.endswith("NB"): # NaiveBayes classifiers have a somewhat different interface. # FIXME SOON! continue yield check_class_weight_classifiers, name, Classifier
def test_class_weight_auto_classifiers(): # Test that class_weight="auto" improves f1-score # This test is broken; its success depends on: # * a rare fortuitous RNG seed for make_classification; and # * the use of binary F1 over a seemingly arbitrary positive class for two # datasets, and weighted average F1 for the third. # Its expectations need to be clarified and reimplemented. raise SkipTest('This test requires redefinition') classifiers = all_estimators(type_filter='classifier') clean_warning_registry() with warnings.catch_warnings(record=True): classifiers = [c for c in classifiers if 'class_weight' in c[1]().get_params().keys()] for n_classes, weights in zip([2, 3], [[.8, .2], [.8, .1, .1]]): # create unbalanced dataset X, y = make_classification(n_classes=n_classes, n_samples=200, n_features=10, weights=weights, random_state=0, n_informative=n_classes) X = StandardScaler().fit_transform(X) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.5, random_state=0) for name, Classifier in classifiers: if (name != "NuSVC" # the sparse version has a parameter that doesn't do anything and not name.startswith("RidgeClassifier") # RidgeClassifier behaves unexpected # FIXME! and not name.endswith("NB")): # NaiveBayes classifiers have a somewhat different interface. # FIXME SOON! yield (check_class_weight_auto_classifiers, name, Classifier, X_train, y_train, X_test, y_test, weights)
def test_all_estimator_no_base_class(): # test that all_estimators doesn't find abstract classes. for name, Estimator in all_estimators(): msg = ("Base estimators such as {0} should not be included" " in all_estimators").format(name) assert_false(name.lower().startswith('base'), msg=msg)