def test_non_deterministic_estimator_skip_tests(): # check estimators with non_deterministic tag set to True # will skip certain tests, refer to issue #22313 for details for est in [MinimalTransformer, MinimalRegressor, MinimalClassifier]: all_tests = list(_yield_all_checks(est())) assert check_methods_sample_order_invariance in all_tests assert check_methods_subset_invariance in all_tests class Estimator(est): def _more_tags(self): return {"non_deterministic": True} all_tests = list(_yield_all_checks(Estimator())) assert check_methods_sample_order_invariance not in all_tests assert check_methods_subset_invariance not in all_tests
def custom_check_estimator(Estimator): # Same as sklearn.check_estimator, skipping tests that can't succeed. from sklearn.utils.estimator_checks import _yield_all_checks from sklearn.utils.testing import SkipTest from sklearn.exceptions import SkipTestWarning from sklearn.utils import estimator_checks estimator = Estimator name = type(estimator).__name__ for check in _yield_all_checks(name, estimator): if (check is estimator_checks.check_fit2d_1feature or check is estimator_checks.check_fit2d_1sample): # X is both Fortran and C aligned and numba can't compile. # Opened numba issue 3569 continue if check is estimator_checks.check_classifiers_train: continue # probas don't exactly sum to 1 (very close though) if (hasattr(check, 'func') and check.func is estimator_checks.check_classifiers_train): continue # same, wrapped in a functools.partial object. try: check(name, estimator) except SkipTest as exception: # the only SkipTest thrown currently results from not # being able to import pandas. warnings.warn(str(exception), SkipTestWarning)
def custom_check_estimator(Estimator): # Same as sklearn.check_estimator, skipping tests that can't succeed. if isinstance(Estimator, type): # got a class name = Estimator.__name__ estimator = Estimator() check_parameters_default_constructible(name, Estimator) check_no_attributes_set_in_init(name, estimator) else: # got an instance estimator = Estimator name = type(estimator).__name__ for check in _yield_all_checks(name, estimator): if (check is estimator_checks.check_estimators_dtypes or check is estimator_checks.check_fit_score_takes_y or check is estimator_checks.check_dtype_object or check is estimator_checks.check_sample_weights_list or check is estimator_checks.check_estimators_overwrite_params or check is estimator_checks.check_classifiers_classes or check is estimator_checks.check_supervised_y_2d or check is estimator_checks.check_fit2d_predict1d or check is estimator_checks.check_class_weight_classifiers or check is estimator_checks.check_methods_subset_invariance or check is estimator_checks.check_dont_overwrite_parameters or "check_estimators_fit_returns_self" in check.__repr__() or "check_classifiers_train" in check.__repr__()): continue try: check(name, estimator) except SkipTest as exception: # the only SkipTest thrown currently results from not # being able to import pandas. warnings.warn(str(exception), SkipTestWarning)
def test_all_estimators(Estimator): estimator = Estimator(model_func=f) name = type(estimator).__name__ # NOTE many of the check_estimator checks fail due to shape mismatch in scipy.optimize.minpack.py # This is most likely due to the tests themselves generating random test data with # specific shapes... Since we are using the model_function itself as a parameter for fitting data, it is # impossible to know the test data in advance and no way to specify it since it is essentially a private API... # for now we skip these tests but this should somehow be resolved in order to assure that # the Estimator is actually up to sklearn's standards... checks = {'passed': 0, 'failed': 0} for check in _yield_all_checks(name, estimator): fname = str(check) try: check(name, estimator) print(f'PASSED: {fname}') checks['passed'] += 1 except Exception as e: print(f'FAILED: {fname} ... {e}{str(e)}') checks['failed'] += 1 # raise print('===========SKLEARN ESTIMATOR CHECKS==========\n') print(f"PASSED: {checks['passed']} FAILED: {checks['failed']}") print('=============================================\n')
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if name == 'kmodes': for check in _yield_all_checks(name, Estimator): # Skip these if check.__name__ not in ('check_clustering', 'check_dtype_object'): yield _named_check(check, name), name, Estimator elif name == 'kprototypes': for check in _yield_all_checks(name, Estimator): # Only do these if check.__name__ in ('check_estimator_sparse_data', 'check_clusterer_compute_labels_predict', 'check_estimators_partial_fit_n_features'): yield _named_check(check, name), name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.endswith("HMM") or name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): yield check, name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): yield _named_check(check, name), name, Estimator
def test_non_meta_estimators(): for name, Estimator in all_estimators(): estimator = Estimator() if name == 'kmodes': for check in _yield_all_checks(name, Estimator): # Skip these if hasattr(check, '__name__'): if check.__name__ not in ('check_clustering', 'check_dtype_object'): yield _named_check(check, name), name, estimator else: yield check, name, estimator elif name == 'kprototypes': for check in _yield_all_checks(name, Estimator): # Only do these if hasattr(check, '__name__') and check.__name__ in ( 'check_estimator_sparse_data', 'check_clusterer_compute_labels_predict', 'check_estimators_partial_fit_n_features'): yield _named_check(check, name), name, estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: estimator = Estimator() if name == 'kmodes': for check in _yield_all_checks(name, Estimator): # Skip these if check.__name__ not in ('check_clustering', 'check_dtype_object'): yield _named_check(check, name), name, estimator
def test_non_meta_estimators(): for name, Estimator in all_estimators(): if name == 'kmodes': relevant_checks = KMODES_INCLUDE_CHECKS elif name == 'kprototypes': relevant_checks = KPROTOTYPES_INCLUDE_CHECKS else: raise NotImplementedError estimator = Estimator() for check in _yield_all_checks(name, estimator): if hasattr(check, '__name__') and check.__name__ in relevant_checks: yield _named_check(check, name), name, estimator
def test_non_meta_estimators(): for name, Estimator in all_estimators(): estimator = Estimator() if name == 'kmodes': for check in _yield_all_checks(name, Estimator): # Skip these if hasattr(check, '__name__'): if check.__name__ not in ('check_clustering', 'check_dtype_object'): yield _named_check(check, name), name, estimator else: yield check, name, estimator elif name == 'kprototypes': for check in _yield_all_checks(name, Estimator): # Only do these if hasattr(check, '__name__') and check.__name__ in ( 'check_estimator_sparse_data', 'check_clusterer_compute_labels_predict', 'check_estimators_partial_fit_n_features' ): yield _named_check(check, name), name, estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue for check in _yield_all_checks(name, Estimator): if issubclass(Estimator, ProjectedGradientNMF): # The ProjectedGradientNMF class is deprecated with ignore_warnings(): yield check, name, Estimator else: yield check, name, Estimator
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue estimator = Estimator() # check this on class yield check_no_attributes_set_in_init, name, estimator for check in _yield_all_checks(name, estimator): set_checking_parameters(estimator) yield check, name, estimator
def test_sklearn_integration(self): # we cannot use `check_estimator` directly since there is no skip test mechanism for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier), (lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)): check_parameters_default_constructible(name, estimator) # we cannot leave default params (see https://github.com/microsoft/LightGBM/issues/833) estimator = estimator(min_child_samples=1, min_data_in_bin=1) for check in _yield_all_checks(name, estimator): check_name = check.func.__name__ if hasattr(check, 'func') else check.__name__ if check_name == 'check_estimators_nan_inf': continue # skip test because LightGBM deals with nan try: check(name, estimator) except SkipTest as message: warnings.warn(message, SkipTestWarning)
def test_sklearn_integration(self): # we cannot use `check_estimator` directly since there is no skip test mechanism for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier), (lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)): check_parameters_default_constructible(name, estimator) check_no_fit_attributes_set_in_init(name, estimator) # we cannot leave default params (see https://github.com/Microsoft/LightGBM/issues/833) estimator = estimator(min_child_samples=1, min_data_in_bin=1) for check in _yield_all_checks(name, estimator): if check.__name__ == 'check_estimators_nan_inf': continue # skip test because LightGBM deals with nan try: check(name, estimator) except SkipTest as message: warnings.warn(message, SkipTestWarning)
def test_non_meta_estimators(): # input validation etc for non-meta estimators estimators = all_estimators() for name, Estimator in estimators: if issubclass(Estimator, BiclusterMixin): continue if name.startswith("_"): continue estimator = Estimator() # check this on class yield check_no_fit_attributes_set_in_init, name, Estimator for check in _yield_all_checks(name, estimator): set_checking_parameters(estimator) yield check, name, estimator
def test_sklearn_integration(self): # sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1 if sklearn_at_least_019: # we cannot use `check_estimator` directly since there is no skip test mechanism for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier), (lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)): check_parameters_default_constructible(name, estimator) check_no_fit_attributes_set_in_init(name, estimator) # we cannot leave default params (see https://github.com/Microsoft/LightGBM/issues/833) estimator = estimator(min_child_samples=1, min_data_in_bin=1) for check in _yield_all_checks(name, estimator): if check.__name__ == 'check_estimators_nan_inf': continue # skip test because LightGBM deals with nan try: check(name, estimator) except SkipTest as message: warnings.warn(message, SkipTestWarning)
def method(self): failed_checks = set() passed_checks = set() class_name = epoint[1] print("\n======== now Estimator is %s =========== " % class_name) mod = __import__('nimbusml.' + epoint[0], fromlist=[str(class_name)]) the_class = getattr(mod, class_name) if class_name in INSTANCES: estimator = INSTANCES[class_name] else: estimator = the_class() if estimator._use_single_input_as_string(): estimator = estimator << 'F0' for check in _yield_all_checks(class_name, estimator): # Skip check_dict_unchanged for estimators which # update the classes_ attribute. For more details # see https://github.com/microsoft/NimbusML/pull/200 if (check.__name__ == 'check_dict_unchanged') and \ (hasattr(estimator, 'predict_proba') or hasattr(estimator, 'decision_function')): continue if check.__name__ in OMITTED_CHECKS_ALWAYS: continue if 'Binary' in class_name and check.__name__ in NOBINARY_CHECKS: continue if class_name in OMITTED_CHECKS and check.__name__ in \ OMITTED_CHECKS[class_name]: continue if class_name in OMITTED_CHECKS_TUPLE[0] and check.__name__ in \ OMITTED_CHECKS_TUPLE[1]: continue try: check(class_name, estimator.clone()) passed_checks.add(check.__name__) except Exception as e: failed_checks.add(check.__name__) if len(failed_checks) > 0: self.fail(msg=str(failed_checks))
continue # skip SymSgdBinaryClassifier for now, because of crashes. if 'SymSgdBinaryClassifier' in class_name: continue mod = __import__('nimbusml.' + e[0], fromlist=[str(class_name)]) the_class = getattr(mod, class_name) if class_name in INSTANCES: estimator = INSTANCES[class_name] else: estimator = the_class() if estimator._use_single_input_as_string(): estimator = estimator << 'F0' for check in _yield_all_checks(class_name, estimator): # Skip check_dict_unchanged for estimators which # update the classes_ attribute. For more details # see https://github.com/microsoft/NimbusML/pull/200 if (check.__name__ == 'check_dict_unchanged') and \ (hasattr(estimator, 'predict_proba') or hasattr(estimator, 'decision_function')): continue if check.__name__ in OMITTED_CHECKS_ALWAYS: continue if 'Binary' in class_name and check.__name__ in NOBINARY_CHECKS: continue if class_name in OMITTED_CHECKS and check.__name__ in \ OMITTED_CHECKS[class_name]: continue