def test_parallel_custom_metric(): """Regression test for running parallel training with custom transformer""" def _custom_metric(y, y_pred, w): """Calculate the root mean square error.""" return np.sqrt(np.average(((y_pred - y) ** 2), weights=w)) custom_metric = make_fitness(function=_custom_metric, greater_is_better=True) est = SymbolicRegressor(generations=2, metric=custom_metric, random_state=0, n_jobs=2) est.fit(boston.data, boston.target) _ = pickle.dumps(est) # Unwrapped functions should fail custom_metric = make_fitness(function=_custom_metric, greater_is_better=True, wrap=False) est = SymbolicRegressor(generations=2, metric=custom_metric, random_state=0, n_jobs=2) est.fit(boston.data, boston.target) assert_raises(AttributeError, pickle.dumps, est) # Single threaded will also fail in non-interactive sessions est = SymbolicRegressor(generations=2, metric=custom_metric, random_state=0) est.fit(boston.data, boston.target) assert_raises(AttributeError, pickle.dumps, est)
def test_validate_fitness(): """Check that valid fitness measures are accepted & invalid raise error""" # Check arg count checks _ = make_fitness(function=_mean_square_error, greater_is_better=True) # non-bool greater_is_better assert_raises(ValueError, make_fitness, _mean_square_error, 'Sure') assert_raises(ValueError, make_fitness, _mean_square_error, 1) # non-bool wrap assert_raises(ValueError, make_fitness, _mean_square_error, True, 'f') # Check arg count tests def bad_fun1(x1, x2): return 1.0 assert_raises(ValueError, make_fitness, bad_fun1, True) # Check return type tests def bad_fun2(x1, x2, w): return 'ni' assert_raises(ValueError, make_fitness, bad_fun2, True) def _custom_metric(y, y_pred, w): """Calculate the root mean square error.""" return np.sqrt(np.average(((y_pred - y) ** 2), weights=w)) custom_metric = make_fitness(function=_custom_metric, greater_is_better=True) for Symbolic in (SymbolicRegressor, SymbolicTransformer): # These should be fine est = Symbolic(generations=2, random_state=0, metric=custom_metric) est.fit(boston.data, boston.target)
def test_custom_regressor_metrics(): """Check whether greater_is_better works for SymbolicRegressor.""" x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 est_gp = SymbolicRegressor(metric='mean absolute error', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.001, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() assert_equal('add(mul(X0, X0), mul(X1, X1))', formula, True) def neg_mean_absolute_error(y, y_pred, sample_weight): return -1 * mean_absolute_error(y, y_pred, sample_weight) customized_fitness = make_fitness(neg_mean_absolute_error, greater_is_better=True) c_est_gp = SymbolicRegressor(metric=customized_fitness, stopping_criteria=-0.000001, random_state=415, parsimony_coefficient=0.001, verbose=0, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal('add(mul(X0, X0), mul(X1, X1))', c_formula, True)
def test_custom_transformer_metrics(): """Check whether greater_is_better works for SymbolicTransformer.""" est_gp = SymbolicTransformer(generations=2, population_size=100, hall_of_fame=10, n_components=1, metric='pearson', random_state=415) est_gp.fit(boston.data, boston.target) for program in est_gp: formula = program.__str__() expected_formula = ('sub(div(mul(X4, X12), div(X9, X9)), ' 'sub(div(X11, X12), add(X12, X0)))') assert_equal(expected_formula, formula, True) def _neg_weighted_pearson(y, y_pred, w): """Calculate the weighted Pearson correlation coefficient.""" with np.errstate(divide='ignore', invalid='ignore'): y_pred_demean = y_pred - np.average(y_pred, weights=w) y_demean = y - np.average(y, weights=w) corr = ((np.sum(w * y_pred_demean * y_demean) / np.sum(w)) / np.sqrt((np.sum(w * y_pred_demean ** 2) * np.sum(w * y_demean ** 2)) / (np.sum(w) ** 2))) if np.isfinite(corr): return -1 * np.abs(corr) return 0. neg_weighted_pearson = make_fitness(function=_neg_weighted_pearson, greater_is_better=False) c_est_gp = SymbolicTransformer(generations=2, population_size=100, hall_of_fame=10, n_components=1, stopping_criteria=-1, metric=neg_weighted_pearson, random_state=415) c_est_gp.fit(boston.data, boston.target) for program in c_est_gp: c_formula = program.__str__() assert_equal(expected_formula, c_formula, True)
def test_custom_classifier_metrics(): """Check whether greater_is_better works for SymbolicClassifier.""" x_data = check_random_state(0).uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 y_true = (y_true < y_true.mean()).astype(int) est_gp = SymbolicClassifier(metric='log loss', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() expected_formula = 'sub(0.364, mul(add(X0, X0), add(X0, X0)))' assert_equal(expected_formula, formula, True) def negative_log_loss(y, y_pred, w): """Calculate the log loss.""" eps = 1e-15 y_pred = np.clip(y_pred, eps, 1 - eps) score = y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred) return np.average(score, weights=w) customized_fitness = make_fitness(negative_log_loss, greater_is_better=True) c_est_gp = SymbolicClassifier(metric=customized_fitness, stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.01, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal(expected_formula, c_formula, True)