示例#1
0
def test_non_deterministic_estimator_skip_tests():
    # check estimators with non_deterministic tag set to True
    # will skip certain tests, refer to issue #22313 for details
    for est in [MinimalTransformer, MinimalRegressor, MinimalClassifier]:
        all_tests = list(_yield_all_checks(est()))
        assert check_methods_sample_order_invariance in all_tests
        assert check_methods_subset_invariance in all_tests

        class Estimator(est):
            def _more_tags(self):
                return {"non_deterministic": True}

        all_tests = list(_yield_all_checks(Estimator()))
        assert check_methods_sample_order_invariance not in all_tests
        assert check_methods_subset_invariance not in all_tests
示例#2
0
def custom_check_estimator(Estimator):
    # Same as sklearn.check_estimator, skipping tests that can't succeed.

    from sklearn.utils.estimator_checks import _yield_all_checks
    from sklearn.utils.testing import SkipTest
    from sklearn.exceptions import SkipTestWarning
    from sklearn.utils import estimator_checks

    estimator = Estimator
    name = type(estimator).__name__

    for check in _yield_all_checks(name, estimator):
        if (check is estimator_checks.check_fit2d_1feature
                or check is estimator_checks.check_fit2d_1sample):
            # X is both Fortran and C aligned and numba can't compile.
            # Opened numba issue 3569
            continue
        if check is estimator_checks.check_classifiers_train:
            continue  # probas don't exactly sum to 1 (very close though)
        if (hasattr(check, 'func')
                and check.func is estimator_checks.check_classifiers_train):
            continue  # same, wrapped in a functools.partial object.

        try:
            check(name, estimator)
        except SkipTest as exception:
            # the only SkipTest thrown currently results from not
            # being able to import pandas.
            warnings.warn(str(exception), SkipTestWarning)
示例#3
0
def custom_check_estimator(Estimator):
    # Same as sklearn.check_estimator, skipping tests that can't succeed.
    if isinstance(Estimator, type):
        # got a class
        name = Estimator.__name__
        estimator = Estimator()
        check_parameters_default_constructible(name, Estimator)
        check_no_attributes_set_in_init(name, estimator)
    else:
        # got an instance
        estimator = Estimator
        name = type(estimator).__name__

    for check in _yield_all_checks(name, estimator):
        if (check is estimator_checks.check_estimators_dtypes
                or check is estimator_checks.check_fit_score_takes_y
                or check is estimator_checks.check_dtype_object
                or check is estimator_checks.check_sample_weights_list
                or check is estimator_checks.check_estimators_overwrite_params
                or check is estimator_checks.check_classifiers_classes
                or check is estimator_checks.check_supervised_y_2d
                or check is estimator_checks.check_fit2d_predict1d
                or check is estimator_checks.check_class_weight_classifiers
                or check is estimator_checks.check_methods_subset_invariance
                or check is estimator_checks.check_dont_overwrite_parameters
                or "check_estimators_fit_returns_self" in check.__repr__()
                or "check_classifiers_train" in check.__repr__()):
            continue
        try:
            check(name, estimator)
        except SkipTest as exception:
            # the only SkipTest thrown currently results from not
            # being able to import pandas.
            warnings.warn(str(exception), SkipTestWarning)
示例#4
0
def test_all_estimators(Estimator):

    estimator = Estimator(model_func=f)
    name = type(estimator).__name__

    # NOTE many of the check_estimator checks fail due to shape mismatch in scipy.optimize.minpack.py
    # This is most likely due to the tests themselves generating random test data with
    # specific shapes... Since we are using the model_function itself as a parameter for fitting data, it is
    # impossible to know the test data in advance and no way to specify it since it is essentially a private API...
    # for now we skip these tests but this should somehow be resolved in order to assure that
    # the Estimator is actually up to sklearn's standards...
    checks = {'passed': 0, 'failed': 0}
    for check in _yield_all_checks(name, estimator):
        fname = str(check)
        try:
            check(name, estimator)
            print(f'PASSED: {fname}')
            checks['passed'] += 1
        except Exception as e:
            print(f'FAILED: {fname} ... {e}{str(e)}')
            checks['failed'] += 1
            # raise

    print('===========SKLEARN ESTIMATOR CHECKS==========\n')
    print(f"PASSED: {checks['passed']}    FAILED: {checks['failed']}")
    print('=============================================\n')
示例#5
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if name == 'kmodes':
            for check in _yield_all_checks(name, Estimator):
                # Skip these
                if check.__name__ not in ('check_clustering',
                                          'check_dtype_object'):
                    yield _named_check(check, name), name, Estimator
        elif name == 'kprototypes':
            for check in _yield_all_checks(name, Estimator):
                # Only do these
                if check.__name__ in ('check_estimator_sparse_data',
                                      'check_clusterer_compute_labels_predict',
                                      'check_estimators_partial_fit_n_features'):
                    yield _named_check(check, name), name, Estimator
示例#6
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.endswith("HMM") or name.startswith("_"):
            continue
        for check in _yield_all_checks(name, Estimator):
            yield check, name, Estimator
示例#7
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        for check in _yield_all_checks(name, Estimator):
            yield _named_check(check, name), name, Estimator
示例#8
0
def test_non_meta_estimators():
    for name, Estimator in all_estimators():
        estimator = Estimator()
        if name == 'kmodes':
            for check in _yield_all_checks(name, Estimator):
                # Skip these
                if hasattr(check, '__name__'):
                    if check.__name__ not in ('check_clustering',
                                              'check_dtype_object'):
                        yield _named_check(check, name), name, estimator
                else:
                    yield check, name, estimator
        elif name == 'kprototypes':
            for check in _yield_all_checks(name, Estimator):
                # Only do these
                if hasattr(check, '__name__') and check.__name__ in (
                        'check_estimator_sparse_data',
                        'check_clusterer_compute_labels_predict',
                        'check_estimators_partial_fit_n_features'):
                    yield _named_check(check, name), name, estimator
示例#9
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        estimator = Estimator()
        if name == 'kmodes':
            for check in _yield_all_checks(name, Estimator):
                # Skip these
                if check.__name__ not in ('check_clustering',
                                          'check_dtype_object'):
                    yield _named_check(check, name), name, estimator
示例#10
0
def test_non_meta_estimators():
    for name, Estimator in all_estimators():
        if name == 'kmodes':
            relevant_checks = KMODES_INCLUDE_CHECKS
        elif name == 'kprototypes':
            relevant_checks = KPROTOTYPES_INCLUDE_CHECKS
        else:
            raise NotImplementedError
        estimator = Estimator()
        for check in _yield_all_checks(name, estimator):
            if hasattr(check, '__name__') and check.__name__ in relevant_checks:
                yield _named_check(check, name), name, estimator
示例#11
0
def test_non_meta_estimators():
    for name, Estimator in all_estimators():
        estimator = Estimator()
        if name == 'kmodes':
            for check in _yield_all_checks(name, Estimator):
                # Skip these
                if hasattr(check, '__name__'):
                    if check.__name__ not in ('check_clustering',
                                              'check_dtype_object'):
                        yield _named_check(check, name), name, estimator
                else:
                    yield check, name, estimator
        elif name == 'kprototypes':
            for check in _yield_all_checks(name, Estimator):
                # Only do these
                if hasattr(check, '__name__') and check.__name__ in (
                        'check_estimator_sparse_data',
                        'check_clusterer_compute_labels_predict',
                        'check_estimators_partial_fit_n_features'
                ):
                    yield _named_check(check, name), name, estimator
示例#12
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        for check in _yield_all_checks(name, Estimator):
            if issubclass(Estimator, ProjectedGradientNMF):
                # The ProjectedGradientNMF class is deprecated
                with ignore_warnings():
                    yield check, name, Estimator
            else:
                yield check, name, Estimator
示例#13
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        estimator = Estimator()
        # check this on class
        yield check_no_attributes_set_in_init, name, estimator

        for check in _yield_all_checks(name, estimator):
            set_checking_parameters(estimator)
            yield check, name, estimator
示例#14
0
 def test_sklearn_integration(self):
     # we cannot use `check_estimator` directly since there is no skip test mechanism
     for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier),
                             (lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)):
         check_parameters_default_constructible(name, estimator)
         # we cannot leave default params (see https://github.com/microsoft/LightGBM/issues/833)
         estimator = estimator(min_child_samples=1, min_data_in_bin=1)
         for check in _yield_all_checks(name, estimator):
             check_name = check.func.__name__ if hasattr(check, 'func') else check.__name__
             if check_name == 'check_estimators_nan_inf':
                 continue  # skip test because LightGBM deals with nan
             try:
                 check(name, estimator)
             except SkipTest as message:
                 warnings.warn(message, SkipTestWarning)
示例#15
0
 def test_sklearn_integration(self):
     # we cannot use `check_estimator` directly since there is no skip test mechanism
     for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier),
                             (lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)):
         check_parameters_default_constructible(name, estimator)
         check_no_fit_attributes_set_in_init(name, estimator)
         # we cannot leave default params (see https://github.com/Microsoft/LightGBM/issues/833)
         estimator = estimator(min_child_samples=1, min_data_in_bin=1)
         for check in _yield_all_checks(name, estimator):
             if check.__name__ == 'check_estimators_nan_inf':
                 continue  # skip test because LightGBM deals with nan
             try:
                 check(name, estimator)
             except SkipTest as message:
                 warnings.warn(message, SkipTestWarning)
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        for check in _yield_all_checks(name, Estimator):
            if issubclass(Estimator, ProjectedGradientNMF):
                # The ProjectedGradientNMF class is deprecated
                with ignore_warnings():
                    yield check, name, Estimator
            else:
                yield check, name, Estimator
示例#17
0
def test_non_meta_estimators():
    # input validation etc for non-meta estimators
    estimators = all_estimators()
    for name, Estimator in estimators:
        if issubclass(Estimator, BiclusterMixin):
            continue
        if name.startswith("_"):
            continue
        estimator = Estimator()
        # check this on class
        yield check_no_fit_attributes_set_in_init, name, Estimator

        for check in _yield_all_checks(name, estimator):
            set_checking_parameters(estimator)
            yield check, name, estimator
示例#18
0
 def test_sklearn_integration(self):
     # sklearn <0.19 cannot accept instance, but many tests could be passed only with min_data=1 and min_data_in_bin=1
     if sklearn_at_least_019:
         # we cannot use `check_estimator` directly since there is no skip test mechanism
         for name, estimator in ((lgb.sklearn.LGBMClassifier.__name__, lgb.sklearn.LGBMClassifier),
                                 (lgb.sklearn.LGBMRegressor.__name__, lgb.sklearn.LGBMRegressor)):
             check_parameters_default_constructible(name, estimator)
             check_no_fit_attributes_set_in_init(name, estimator)
             # we cannot leave default params (see https://github.com/Microsoft/LightGBM/issues/833)
             estimator = estimator(min_child_samples=1, min_data_in_bin=1)
             for check in _yield_all_checks(name, estimator):
                 if check.__name__ == 'check_estimators_nan_inf':
                     continue  # skip test because LightGBM deals with nan
                 try:
                     check(name, estimator)
                 except SkipTest as message:
                     warnings.warn(message, SkipTestWarning)
示例#19
0
        def method(self):
            failed_checks = set()
            passed_checks = set()
            class_name = epoint[1]
            print("\n======== now Estimator is %s =========== " % class_name)

            mod = __import__('nimbusml.' + epoint[0],
                             fromlist=[str(class_name)])
            the_class = getattr(mod, class_name)
            if class_name in INSTANCES:
                estimator = INSTANCES[class_name]
            else:
                estimator = the_class()

            if estimator._use_single_input_as_string():
                estimator = estimator << 'F0'

            for check in _yield_all_checks(class_name, estimator):
                # Skip check_dict_unchanged for estimators which
                # update the classes_ attribute. For more details
                # see https://github.com/microsoft/NimbusML/pull/200
                if (check.__name__ == 'check_dict_unchanged') and \
                    (hasattr(estimator, 'predict_proba') or
                     hasattr(estimator, 'decision_function')):
                    continue

                if check.__name__ in OMITTED_CHECKS_ALWAYS:
                    continue
                if 'Binary' in class_name and check.__name__ in NOBINARY_CHECKS:
                    continue
                if class_name in OMITTED_CHECKS and check.__name__ in \
                        OMITTED_CHECKS[class_name]:
                    continue
                if class_name in OMITTED_CHECKS_TUPLE[0] and check.__name__ in \
                        OMITTED_CHECKS_TUPLE[1]:
                    continue

                try:
                    check(class_name, estimator.clone())
                    passed_checks.add(check.__name__)
                except Exception as e:
                    failed_checks.add(check.__name__)

            if len(failed_checks) > 0:
                self.fail(msg=str(failed_checks))
示例#20
0
        continue
    # skip SymSgdBinaryClassifier for now, because of crashes.
    if 'SymSgdBinaryClassifier' in class_name:
        continue

    mod = __import__('nimbusml.' + e[0], fromlist=[str(class_name)])
    the_class = getattr(mod, class_name)
    if class_name in INSTANCES:
        estimator = INSTANCES[class_name]
    else:
        estimator = the_class()

    if estimator._use_single_input_as_string():
        estimator = estimator << 'F0'

    for check in _yield_all_checks(class_name, estimator):
        # Skip check_dict_unchanged for estimators which
        # update the classes_ attribute. For more details
        # see https://github.com/microsoft/NimbusML/pull/200
        if (check.__name__ == 'check_dict_unchanged') and \
            (hasattr(estimator, 'predict_proba') or
             hasattr(estimator, 'decision_function')):
            continue

        if check.__name__ in OMITTED_CHECKS_ALWAYS:
            continue
        if 'Binary' in class_name and check.__name__ in NOBINARY_CHECKS:
            continue
        if class_name in OMITTED_CHECKS and check.__name__ in \
                OMITTED_CHECKS[class_name]:
            continue