def test_more_verbose_output(): """Check verbose=2 does not cause error""" old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() est = SymbolicRegressor(random_state=0, verbose=2) est.fit(boston.data, boston.target) verbose_output = sys.stdout joblib_output = sys.stderr sys.stdout = old_stdout sys.stderr = old_stderr # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines) joblib_output.seek(0) n_lines = sum(1 for l in joblib_output.readlines()) assert_equal(20, n_lines)
def test_get_subtree(): """Check that get subtree does the same thing for self and new programs""" params = { 'function_set': [add2, sub2, mul2, div2], 'arities': { 2: [add2, sub2, mul2, div2] }, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) self_test = gp.get_subtree(check_random_state(0)) external_test = gp.get_subtree(check_random_state(0), test_gp) assert_equal(self_test, external_test)
def check_classifiers_input_shapes(name, Classifier): iris = load_iris() X, y = iris.data, iris.target X, y = shuffle(X, y, random_state=1) X = StandardScaler().fit_transform(X) # catch deprecation warnings with warnings.catch_warnings(record=True): classifier = Classifier() set_fast_parameters(classifier) set_random_state(classifier) # fit classifier.fit(X, y) y_pred = classifier.predict(X) set_random_state(classifier) # Check that when a 2D y is given, a DataConversionWarning is # raised with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always", DataConversionWarning) warnings.simplefilter("ignore", RuntimeWarning) classifier.fit(X, y[:, np.newaxis]) msg = "expected 1 DataConversionWarning, got: %s" % ( ", ".join([str(w_x) for w_x in w])) assert_equal(len(w), 1, msg) assert_array_equal(y_pred, classifier.predict(X))
def test_set_random_state(): lda = LDA() tree = DecisionTreeClassifier() # LDA doesn't have random state: smoke test set_random_state(lda, 3) set_random_state(tree, 3) assert_equal(tree.random_state, 3)
def check_clustering(name, Alg): X, y = make_blobs(n_samples=50, random_state=1) X, y = shuffle(X, y, random_state=7) X = StandardScaler().fit_transform(X) n_samples, n_features = X.shape # catch deprecation and neighbors warnings with warnings.catch_warnings(record=True): alg = Alg() set_fast_parameters(alg) if hasattr(alg, "n_clusters"): alg.set_params(n_clusters=3) set_random_state(alg) if name == 'AffinityPropagation': alg.set_params(preference=-100) alg.set_params(max_iter=100) # fit alg.fit(X) # with lists alg.fit(X.tolist()) assert_equal(alg.labels_.shape, (n_samples,)) pred = alg.labels_ assert_greater(adjusted_rand_score(pred, y), 0.4) # fit another time with ``fit_predict`` and compare results if name is 'SpectralClustering': # there is no way to make Spectral clustering deterministic :( return set_random_state(alg) with warnings.catch_warnings(record=True): pred2 = alg.fit_predict(X) assert_array_equal(pred, pred2)
def test_more_verbose_output(): """Check verbose=2 does not cause error""" old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() est = SymbolicRegressor(random_state=0, verbose=2) est.fit(boston.data, boston.target) verbose_output = sys.stdout joblib_output = sys.stderr sys.stdout = old_stdout sys.stderr = old_stderr # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines) joblib_output.seek(0) n_lines = sum(1 for l in joblib_output.readlines()) # New version of joblib appears to output sys.stderr assert_equal(0, n_lines % 10)
def test_compute_class_weight_auto_unordered(): # Test compute_class_weight when classes are unordered classes = np.array([1, 0, 3]) y = np.asarray([1, 0, 0, 3, 3, 3]) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1.636, 0.818, 0.545]), decimal=3)
def test_shuffle_on_ndim_equals_three(): def to_tuple(A): # to make the inner arrays hashable return tuple(tuple(tuple(C) for C in B) for B in A) A = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]]) # A.shape = (2,2,2) S = set(to_tuple(A)) shuffle(A) # shouldn't raise a ValueError for dim = 3 assert_equal(set(to_tuple(A)), S)
def test_safe_mask(): random_state = check_random_state(0) X = random_state.rand(5, 4) X_csr = sp.csr_matrix(X) mask = [False, False, True, True, True] mask = safe_mask(X, mask) assert_equal(X[mask].shape[0], 3) mask = safe_mask(X_csr, mask) assert_equal(X_csr[mask].shape[0], 3)
def test_gridsearch(): """Check that SymbolicRegressor can be grid-searched""" # Grid search parsimony_coefficient parameters = {'parsimony_coefficient': [0.001, 0.1, 'auto']} clf = SymbolicRegressor(population_size=50, generations=5, tournament_size=5, random_state=0) grid = GridSearchCV(clf, parameters, scoring='mean_absolute_error') grid.fit(boston.data, boston.target) expected = {'parsimony_coefficient': 'auto'} assert_equal(grid.best_params_, expected)
def test_gridsearch(): """Check that SymbolicRegressor can be grid-searched""" # Grid search parsimony_coefficient parameters = {'parsimony_coefficient': [0.001, 0.1, 'auto']} clf = SymbolicRegressor(population_size=50, generations=5, tournament_size=5, random_state=0) grid = GridSearchCV(clf, parameters, scoring='mean_absolute_error') grid.fit(boston.data, boston.target) expected = {'parsimony_coefficient': 0.001} assert_equal(grid.best_params_, expected)
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25), 'Best Individual'.center(42)) assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25), 'Best Individual'.center(42)) assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_compute_class_weight_auto_negative(): # Test compute_class_weight when labels are negative # Test with balanced class labels. classes = np.array([-2, -1, 0]) y = np.asarray([-1, -1, 0, 0, -2, -2]) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([1., 1., 1.])) # Test with unbalanced class labels. y = np.asarray([-1, 0, 0, -2, -2, -2]) cw = compute_class_weight("auto", classes, y) assert_almost_equal(cw.sum(), classes.shape) assert_equal(len(cw), len(classes)) assert_array_almost_equal(cw, np.array([0.545, 1.636, 0.818]), decimal=3)
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def check_parameters_default_constructible(name, Estimator): # test default-constructibility # get rid of deprecation warnings with warnings.catch_warnings(record=True): estimator = Estimator() # test cloning clone(estimator) # test __repr__ repr(estimator) # test that set_params returns self assert_true(isinstance(estimator.set_params(), Estimator)) # test if init does nothing but set parameters # this is important for grid_search etc. # We get the default parameters from init and then # compare these against the actual values of the attributes. # this comes from getattr. Gets rid of deprecation decorator. init = getattr(estimator.__init__, 'deprecated_original', estimator.__init__) try: args, varargs, kws, defaults = inspect.getargspec(init) except TypeError: # init is not a python function. # true for mixins return params = estimator.get_params() args = args[1:] if args: # non-empty list assert_equal(len(args), len(defaults)) else: return for arg, default in zip(args, defaults): if arg not in params.keys(): # deprecated parameter, not in get_params assert_true(default is None) continue if isinstance(params[arg], np.ndarray): assert_array_equal(params[arg], default) else: assert_equal(params[arg], default)
def test_deprecated(): # Test whether the deprecated decorator issues appropriate warnings # Copied almost verbatim from http://docs.python.org/library/warnings.html # First a function... with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @deprecated() def ham(): return "spam" spam = ham() assert_equal(spam, "spam") # function must remain usable assert_equal(len(w), 1) assert_true(issubclass(w[0].category, DeprecationWarning)) assert_true("deprecated" in str(w[0].message).lower()) # ... then a class. with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @deprecated("don't use this") class Ham(object): SPAM = 1 ham = Ham() assert_true(hasattr(ham, "SPAM")) assert_equal(len(w), 1) assert_true(issubclass(w[0].category, DeprecationWarning)) assert_true("deprecated" in str(w[0].message).lower())
def check_estimators_overwrite_params(name, Estimator): X, y = make_blobs(random_state=0, n_samples=9) y = multioutput_estimator_convert_y_2d(name, y) # some want non-negative input X -= X.min() with warnings.catch_warnings(record=True): # catch deprecation warnings estimator = Estimator() if name == 'MiniBatchDictLearning' or name == 'MiniBatchSparsePCA': # FIXME # for MiniBatchDictLearning and MiniBatchSparsePCA estimator.batch_size = 1 set_fast_parameters(estimator) set_random_state(estimator) # Make a physical copy of the orginal estimator parameters before fitting. params = estimator.get_params() original_params = deepcopy(params) # Fit the model estimator.fit(X, y) # Compare the state of the model parameters with the original parameters new_params = estimator.get_params() for param_name, original_value in original_params.items(): new_value = new_params[param_name] # We should never change or mutate the internal state of input # parameters by default. To check this we use the joblib.hash function # that introspects recursively any subobjects to compute a checksum. # The only exception to this rule of immutable constructor parameters # is possible RandomState instance but in this check we explicitly # fixed the random_state params recursively to be integer seeds. assert_equal(hash(new_value), hash(original_value), "Estimator %s should not change or mutate " " the parameter %s from %s to %s during fit." % (name, param_name, original_value, new_value))
def test_get_subtree(): """Check that get subtree does the same thing for self and new programs""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) self_test = gp.get_subtree(check_random_state(0)) external_test = gp.get_subtree(check_random_state(0), test_gp) assert_equal(self_test, external_test)
def test_warn(self): def f(): warnings.warn("yo") return 3 # Test that assert_warns is not impacted by externally set # filters and is reset internally. # This is because `clean_warning_registry()` is called internally by # assert_warns and clears all previous filters. warnings.simplefilter("ignore", UserWarning) assert_equal(assert_warns(UserWarning, f), 3) # Test that the warning registry is empty after assert_warns assert_equal(sys.modules['warnings'].filters, []) assert_raises(AssertionError, assert_no_warnings, f) assert_equal(assert_no_warnings(lambda x: x, 1), 1)
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2)
def check_classifiers_train(name, Classifier): X_m, y_m = make_blobs(random_state=0) X_m, y_m = shuffle(X_m, y_m, random_state=7) X_m = StandardScaler().fit_transform(X_m) # generate binary problem from multi-class one y_b = y_m[y_m != 2] X_b = X_m[y_m != 2] for (X, y) in [(X_m, y_m), (X_b, y_b)]: # catch deprecation warnings classes = np.unique(y) n_classes = len(classes) n_samples, n_features = X.shape with warnings.catch_warnings(record=True): classifier = Classifier() if name in ['BernoulliNB', 'MultinomialNB']: X -= X.min() set_fast_parameters(classifier) set_random_state(classifier) # raises error on malformed input for fit assert_raises(ValueError, classifier.fit, X, y[:-1]) # fit classifier.fit(X, y) # with lists classifier.fit(X.tolist(), y.tolist()) assert_true(hasattr(classifier, "classes_")) y_pred = classifier.predict(X) assert_equal(y_pred.shape, (n_samples,)) # training set performance if name not in ['BernoulliNB', 'MultinomialNB']: assert_greater(accuracy_score(y, y_pred), 0.83) # raises error on malformed input for predict assert_raises(ValueError, classifier.predict, X.T) if hasattr(classifier, "decision_function"): try: # decision_function agrees with predict decision = classifier.decision_function(X) if n_classes is 2: assert_equal(decision.shape, (n_samples,)) dec_pred = (decision.ravel() > 0).astype(np.int) assert_array_equal(dec_pred, y_pred) if n_classes is 3: assert_equal(decision.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(decision, axis=1), y_pred) # raises error on malformed input assert_raises(ValueError, classifier.decision_function, X.T) # raises error on malformed input for decision_function assert_raises(ValueError, classifier.decision_function, X.T) except NotImplementedError: pass if hasattr(classifier, "predict_proba"): # predict_proba agrees with predict y_prob = classifier.predict_proba(X) assert_equal(y_prob.shape, (n_samples, n_classes)) assert_array_equal(np.argmax(y_prob, axis=1), y_pred) # check that probas for all classes sum to one assert_array_almost_equal(np.sum(y_prob, axis=1), np.ones(n_samples)) # raises error on malformed input assert_raises(ValueError, classifier.predict_proba, X.T) # raises error on malformed input for predict_proba assert_raises(ValueError, classifier.predict_proba, X.T)
def _check_transformer(name, Transformer, X, y): if name in ('CCA', 'LocallyLinearEmbedding', 'KernelPCA') and _is_32bit(): # Those transformers yield non-deterministic output when executed on # a 32bit Python. The same transformers are stable on 64bit Python. # FIXME: try to isolate a minimalistic reproduction case only depending # on numpy & scipy and/or maybe generate a test dataset that does not # cause such unstable behaviors. msg = name + ' is non deterministic on 32bit Python' raise SkipTest(msg) n_samples, n_features = np.asarray(X).shape # catch deprecation warnings with warnings.catch_warnings(record=True): transformer = Transformer() set_random_state(transformer) set_fast_parameters(transformer) # fit if name in CROSS_DECOMPOSITION: y_ = np.c_[y, y] y_[::2, 1] *= 2 else: y_ = y transformer.fit(X, y_) X_pred = transformer.fit_transform(X, y=y_) if isinstance(X_pred, tuple): for x_pred in X_pred: assert_equal(x_pred.shape[0], n_samples) else: assert_equal(X_pred.shape[0], n_samples) if hasattr(transformer, 'transform'): if name in CROSS_DECOMPOSITION: X_pred2 = transformer.transform(X, y_) X_pred3 = transformer.fit_transform(X, y=y_) else: X_pred2 = transformer.transform(X) X_pred3 = transformer.fit_transform(X, y=y_) if isinstance(X_pred, tuple) and isinstance(X_pred2, tuple): for x_pred, x_pred2, x_pred3 in zip(X_pred, X_pred2, X_pred3): assert_array_almost_equal( x_pred, x_pred2, 2, "fit_transform and transform outcomes not consistent in %s" % Transformer) assert_array_almost_equal( x_pred, x_pred3, 2, "consecutive fit_transform outcomes not consistent in %s" % Transformer) else: assert_array_almost_equal( X_pred, X_pred2, 2, "fit_transform and transform outcomes not consistent in %s" % Transformer) assert_array_almost_equal( X_pred, X_pred3, 2, "consecutive fit_transform outcomes not consistent in %s" % Transformer) # raises error on malformed input for transform if hasattr(X, 'T'): # If it's not an array, it does not have a 'T' property assert_raises(ValueError, transformer.transform, X.T)
def test_genetic_operations(): """Check all genetic operations are stable and don't change programs""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) # Test for a small program test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] donor = ['add2', 0.1, 'sub2', 2, 7] gp = _Program(random_state=random_state, program=test_gp, **params) assert_equal(gp.reproduce(), ['mul2', 'div2', 8, 1, 'sub2', 9, 0.5]) assert_equal(gp.program, test_gp) assert_equal(gp.crossover(donor, random_state)[0], ['sub2', 2, 7]) assert_equal(gp.program, test_gp) assert_equal(gp.subtree_mutation(random_state)[0], ['mul2', 'div2', 8, 1, 'sub2', 'sub2', 3, 5, 'add2', 6, 3]) assert_equal(gp.program, test_gp) assert_equal(gp.hoist_mutation(random_state)[0], ['div2', 8, 1]) assert_equal(gp.program, test_gp) assert_equal(gp.point_mutation(random_state)[0], ['mul2', 'div2', 8, 1, 'sub2', 9, 0.5]) assert_equal(gp.program, test_gp)
def test_genetic_operations(): """Check all genetic operations are stable and don't change programs""" params = { 'function_set': [add2, sub2, mul2, div2], 'arities': { 2: [add2, sub2, mul2, div2] }, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1 } random_state = check_random_state(415) # Test for a small program test_gp = [mul2, div2, 8, 1, sub2, 9, .5] donor = [add2, 0.1, sub2, 2, 7] gp = _Program(random_state=random_state, program=test_gp, **params) assert_equal( [f.name if isinstance(f, _Function) else f for f in gp.reproduce()], ['mul', 'div', 8, 1, 'sub', 9, 0.5]) assert_equal(gp.program, test_gp) assert_equal([ f.name if isinstance(f, _Function) else f for f in gp.crossover(donor, random_state)[0] ], ['sub', 2, 7]) assert_equal(gp.program, test_gp) assert_equal([ f.name if isinstance(f, _Function) else f for f in gp.subtree_mutation(random_state)[0] ], ['mul', 'div', 8, 1, 'sub', 'sub', 3, 5, 'add', 6, 3]) assert_equal(gp.program, test_gp) assert_equal([ f.name if isinstance(f, _Function) else f for f in gp.hoist_mutation(random_state)[0] ], ['div', 8, 1]) assert_equal(gp.program, test_gp) assert_equal([ f.name if isinstance(f, _Function) else f for f in gp.point_mutation(random_state)[0] ], ['mul', 'div', 8, 1, 'sub', 9, 0.5]) assert_equal(gp.program, test_gp)