def test_print_overloading(): """Check that printing a program object results in 'pretty' output""" params = {'function_set': ['add2', 'sub2', 'mul2', 'div2'], 'arities': {2: ['add2', 'sub2', 'mul2', 'div2']}, 'init_depth': (2, 6), 'init_method': 'half and half', 'n_features': 10, 'const_range': (-1.0, 1.0), 'metric': 'mean absolute error', 'p_point_replace': 0.05, 'parsimony_coefficient': 0.1} random_state = check_random_state(415) test_gp = ['mul2', 'div2', 8, 1, 'sub2', 9, .5] gp = _Program(random_state=random_state, program=test_gp, **params) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(gp) output = out.getvalue().strip() finally: sys.stdout = orig_stdout lisp = "mul(div(X8, X1), sub(X9, 0.500))" assert_true(output == lisp)
def test_n_iter_without_progress(): # Make sure that the parameter n_iter_without_progress is used correctly random_state = check_random_state(0) X = random_state.randn(100, 2) tsne = TSNE(n_iter_without_progress=2, verbose=2, random_state=0, method='exact') old_stdout = sys.stdout sys.stdout = StringIO() try: tsne.fit_transform(X) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout # The output needs to contain the value of n_iter_without_progress assert_in( "did not make any progress during the " "last 2 episodes. Finished.", out)
def test_sparse_and_verbose(): # Make sure RBM works with sparse input when verbose=True old_stdout = sys.stdout sys.stdout = StringIO() from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm = BernoulliRBM(n_components=2, batch_size=2, n_iter=1, random_state=42, verbose=True) try: rbm.fit(X) s = sys.stdout.getvalue() # make sure output is sound assert_true( re.match( r"\[BernoulliRBM\] Iteration 1," r" pseudo-likelihood = -?(\d)+(\.\d+)?," r" time = (\d|\.)+s", s)) finally: sys.stdout = old_stdout
def test_verbose(): # Verbose options write to stdout. random_state = check_random_state(0) tsne = TSNE(verbose=2) X = random_state.randn(5, 2) old_stdout = sys.stdout sys.stdout = StringIO() try: tsne.fit_transform(X) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert ("[t-SNE]" in out) assert ("Computing pairwise distances" in out) assert ("Computed conditional probabilities" in out) assert ("Mean sigma" in out) assert ("Finished" in out) assert ("early exaggeration" in out) assert ("Finished" in out)
def test_n_iter_without_progress(): # Use a dummy negative n_iter_without_progress and check output on stdout random_state = check_random_state(0) X = random_state.randn(100, 10) for method in ["barnes_hut", "exact"]: tsne = TSNE(n_iter_without_progress=-1, verbose=2, learning_rate=1e8, random_state=0, method=method, n_iter=351, init="random") tsne._N_ITER_CHECK = 1 tsne._EXPLORATION_N_ITER = 0 old_stdout = sys.stdout sys.stdout = StringIO() try: tsne.fit_transform(X) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout # The output needs to contain the value of n_iter_without_progress assert_in("did not make any progress during the " "last -1 episodes. Finished.", out)
def test_grid_search(): # Test that the best estimator contains the right value for foo_param clf = MockClassifier() grid_search = GridSearchCV(clf, {'foo_param': [1, 2, 3]}, verbose=3) # make sure it selects the smallest parameter in case of ties old_stdout = sys.stdout sys.stdout = StringIO() grid_search.fit(X, y) sys.stdout = old_stdout assert_equal(grid_search.best_estimator_.foo_param, 2) for i, foo_i in enumerate([1, 2, 3]): assert_true(grid_search.grid_scores_[i][0] == {'foo_param': foo_i}) # Smoke test the score etc: grid_search.score(X, y) grid_search.predict_proba(X) grid_search.decision_function(X) grid_search.transform(X) # Test exception handling on scoring grid_search.scoring = 'sklearn' assert_raises(ValueError, grid_search.fit, X, y)
def test_n_iter_without_progress(): # Use a dummy negative n_iter_without_progress and check output on stdout random_state = check_random_state(0) X = random_state.randn(100, 2) tsne = TSNE(n_iter_without_progress=-1, verbose=2, random_state=1, method='exact') old_stdout = sys.stdout sys.stdout = StringIO() try: tsne.fit_transform(X) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout # The output needs to contain the value of n_iter_without_progress assert_in( "did not make any progress during the " "last -1 episodes. Finished.", out)
def test_k_means_function(): # test calling the k_means function directly # catch output old_stdout = sys.stdout sys.stdout = StringIO() try: cluster_centers, labels, inertia = k_means(X, n_clusters=n_clusters, verbose=True) finally: sys.stdout = old_stdout centers = cluster_centers assert_equal(centers.shape, (n_clusters, n_features)) labels = labels assert_equal(np.unique(labels).shape[0], n_clusters) # check that the labels assignment are perfect (up to a permutation) assert_equal(v_measure_score(true_labels, labels), 1.0) assert_greater(inertia, 0.0) # check warning when centers are passed assert_warns(RuntimeWarning, k_means, X, n_clusters=n_clusters, init=centers) # to many clusters desired assert_raises(ValueError, k_means, X, n_clusters=X.shape[0] + 1) # kmeans for algorithm='elkan' raises TypeError on sparse matrix assert_raise_message(TypeError, "algorithm='elkan' not supported for " "sparse input X", k_means, X=X_csr, n_clusters=2, algorithm="elkan")
def test_more_verbose_output(): # Check verbose=2 does not cause error. from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() clf = GradientBoostingClassifier(n_estimators=100, random_state=1, verbose=2) clf.fit(X, y) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header = verbose_output.readline().rstrip() # no OOB true_header = ' '.join(['%10s'] + ['%16s'] * 2) % ( 'Iter', 'Train Loss', 'Remaining Time') assert_equal(true_header, header) n_lines = sum(1 for l in verbose_output.readlines()) # 100 lines for n_estimators==100 assert_equal(100, n_lines)
def test_dict_learning_online_verbosity(): n_components = 5 # test verbosity from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO() dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=1, random_state=0) dico.fit(X) dico = MiniBatchDictionaryLearning(n_components, n_iter=20, verbose=2, random_state=0) dico.fit(X) dict_learning_online(X, n_components=n_components, alpha=1, verbose=1, random_state=0) dict_learning_online(X, n_components=n_components, alpha=1, verbose=2, random_state=0) finally: sys.stdout = old_stdout assert_true(dico.components_.shape == (n_components, n_features))
def test_verbose_output(): # Check verbose=1 does not cause error. from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout sys.stdout = StringIO() clf = GradientBoostingClassifier(n_estimators=100, random_state=1, verbose=1, subsample=0.8) clf.fit(X, y) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header = verbose_output.readline().rstrip() # with OOB true_header = ' '.join(['%10s'] + ['%16s'] * 3) % ( 'Iter', 'Train Loss', 'OOB Improve', 'Remaining Time') assert_equal(true_header, header) n_lines = sum(1 for l in verbose_output.readlines()) # one for 1-10 and then 9 for 20-100 assert_equal(10 + 9, n_lines)
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" assert_raises_regex(TypeError, msg, check_estimator, object) # check that we have a fit method msg = "object has no attribute 'fit'" assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) # check that fit does input validation msg = "TypeError not raised by fit" assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier) # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator " + name + " doesn't seem to fail gracefully on sparse data" # the check for sparse input handling prints to the stdout, # instead of raising an error, so as not to remove the original traceback. # that means we need to jump through some hoops to catch it. old_stdout = sys.stdout string_buffer = StringIO() sys.stdout = string_buffer try: check_estimator(NoSparseClassifier) except: pass finally: sys.stdout = old_stdout assert_true(msg in string_buffer.getvalue()) # doesn't error on actual estimator check_estimator(AdaBoostClassifier) check_estimator(MultiTaskElasticNet)
def test_simple(): # Principle of Lars is to keep covariances tied and decreasing # also test verbose output from sklearn.externals.six.moves import cStringIO as StringIO import sys old_stdout = sys.stdout try: sys.stdout = StringIO() alphas_, active, coef_path_ = linear_model.lars_path( diabetes.data, diabetes.target, method="lar", verbose=10) sys.stdout = old_stdout for (i, coef_) in enumerate(coef_path_.T): res = y - np.dot(X, coef_) cov = np.dot(X.T, res) C = np.max(abs(cov)) eps = 1e-3 ocur = len(cov[C - eps < abs(cov)]) if i < X.shape[1]: assert_true(ocur == i + 1) else: # no more than max_pred variables can go into the active set assert_true(ocur == X.shape[1]) finally: sys.stdout = old_stdout
def test_check_estimator(): # tests that the estimator actually fails on "bad" estimators. # not a complete test of all checks, which are very extensive. # check that we have a set_params and can clone msg = "it does not implement a 'get_params' methods" assert_raises_regex(TypeError, msg, check_estimator, object) assert_raises_regex(TypeError, msg, check_estimator, object()) # check that we have a fit method msg = "object has no attribute 'fit'" assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator) assert_raises_regex(AttributeError, msg, check_estimator, BaseEstimator()) # check that fit does input validation msg = "TypeError not raised" assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier) assert_raises_regex(AssertionError, msg, check_estimator, BaseBadClassifier()) # check that sample_weights in fit accepts pandas.Series type try: from pandas import Series # noqa msg = ("Estimator NoSampleWeightPandasSeriesType raises error if " "'sample_weight' parameter is of type pandas.Series") assert_raises_regex( ValueError, msg, check_estimator, NoSampleWeightPandasSeriesType) except ImportError: pass # check that predict does input validation (doesn't accept dicts in input) msg = "Estimator doesn't check for NaN and inf in predict" assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict) assert_raises_regex(AssertionError, msg, check_estimator, NoCheckinPredict()) # check that estimator state does not change # at transform/predict/predict_proba time msg = 'Estimator changes __dict__ during predict' assert_raises_regex(AssertionError, msg, check_estimator, ChangesDict) # check that `fit` only changes attribures that # are private (start with an _ or end with a _). msg = ('Estimator ChangesWrongAttribute should not change or mutate ' 'the parameter wrong_attribute from 0 to 1 during fit.') assert_raises_regex(AssertionError, msg, check_estimator, ChangesWrongAttribute) check_estimator(ChangesUnderscoreAttribute) # check that `fit` doesn't add any public attribute msg = ('Estimator adds public attribute\(s\) during the fit method.' ' Estimators are only allowed to add private attributes' ' either started with _ or ended' ' with _ but wrong_attribute added') assert_raises_regex(AssertionError, msg, check_estimator, SetsWrongAttribute) # check for invariant method name = NotInvariantPredict.__name__ method = 'predict' msg = ("{method} of {name} is not invariant when applied " "to a subset.").format(method=method, name=name) assert_raises_regex(AssertionError, msg, check_estimator, NotInvariantPredict) # check for sparse matrix input handling name = NoSparseClassifier.__name__ msg = "Estimator %s doesn't seem to fail gracefully on sparse data" % name # the check for sparse input handling prints to the stdout, # instead of raising an error, so as not to remove the original traceback. # that means we need to jump through some hoops to catch it. old_stdout = sys.stdout string_buffer = StringIO() sys.stdout = string_buffer try: check_estimator(NoSparseClassifier) except: pass finally: sys.stdout = old_stdout assert_true(msg in string_buffer.getvalue()) # doesn't error on actual estimator check_estimator(AdaBoostClassifier) check_estimator(AdaBoostClassifier()) check_estimator(MultiTaskElasticNet) check_estimator(MultiTaskElasticNet())
def test_gradient_descent_stops(): # Test stopping conditions of gradient descent. class ObjectiveSmallGradient: def __init__(self): self.it = -1 def __call__(self, _): self.it += 1 return (10 - self.it) / 10.0, np.array([1e-5]) def flat_function(_): return 0.0, np.ones(1) # Gradient norm old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=1e-5, min_error_diff=0.0, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert_equal(error, 1.0) assert_equal(it, 0) assert("gradient norm" in out) # Error difference old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=100, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, min_error_diff=0.2, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert_equal(error, 0.9) assert_equal(it, 1) assert("error difference" in out) # Maximum number of iterations without improvement old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( flat_function, np.zeros(1), 0, n_iter=100, n_iter_without_progress=10, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, min_error_diff=-1.0, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert_equal(error, 0.0) assert_equal(it, 11) assert("did not make any progress" in out) # Maximum number of iterations old_stdout = sys.stdout sys.stdout = StringIO() try: _, error, it = _gradient_descent( ObjectiveSmallGradient(), np.zeros(1), 0, n_iter=11, n_iter_without_progress=100, momentum=0.0, learning_rate=0.0, min_gain=0.0, min_grad_norm=0.0, min_error_diff=0.0, verbose=2) finally: out = sys.stdout.getvalue() sys.stdout.close() sys.stdout = old_stdout assert_equal(error, 0.0) assert_equal(it, 10) assert("Iteration 10" in out)
def test_print_overloading_estimator(): """Check that printing a fitted estimator results in 'pretty' output""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est._program) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out output = str([gp.__str__() for gp in est]) print(output.replace("',", ",\n").replace("'", "")) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program)