def test_low_memory(): """Check the low_memory functionality works as expected.""" est = SymbolicRegressor(generations=10, random_state=56, low_memory=True) # Check there are no parents est.fit(boston.data, boston.target) assert_true(est._programs[-2] is None)
def main(): if len(sys.argv) < 2: print("Provide data file name!") exit(1) filename = sys.argv[1] # Training samples x = read_nth_column(0, filename) x_train = np.ndarray((len(x), ), buffer=np.array(x, dtype=float)).reshape(-1, 1) # print(x_train) y = read_nth_column(1, filename) y_train = np.ndarray((len(y), ), buffer=np.array(y, dtype=float)) # print(y_train) # Testing samples X_test = read_nth_column(0, filename) y_test = read_nth_column(1, filename) est_gp = SymbolicRegressor(population_size=5000, generations=30, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, verbose=1, parsimony_coefficient=0.01, random_state=0, function_set=('add', 'sub', 'mul', 'div', 'sin', 'cos', 'sqrt', 'log')) est_gp.fit(x_train, y_train) print(est_gp._program)
def gp_model(self, gp_model): if self.problem == 'regression' or len(self.labels) == 2: if gp_model is None: f_names = self.gp_hyper_parameters.get('feature_names') if f_names is None: self.gp_hyper_parameters['feature_names'] = self.feature_names self._gp_model = SymbolicRegressor(**self.gp_hyper_parameters) else: self._gp_model = gp_model else: dict_gp_model = {} for i in self.labels: if gp_model is None: f_names = self.gp_hyper_parameters.get('feature_names') if f_names is None: self.gp_hyper_parameters['feature_names'] = self.feature_names dict_gp_model[i] = SymbolicRegressor(**self.gp_hyper_parameters) else: dict_gp_model[i] = gp_model self._gp_model = dict_gp_model
def test_customized_regressor_metrics(): """Check whether greater_is_better works for SymbolicRegressor.""" x_data = rng.uniform(-1, 1, 100).reshape(50, 2) y_true = x_data[:, 0] ** 2 + x_data[:, 1] ** 2 est_gp = SymbolicRegressor(metric='mean absolute error', stopping_criteria=0.000001, random_state=415, parsimony_coefficient=0.001, init_method='full', init_depth=(2, 4)) est_gp.fit(x_data, y_true) formula = est_gp.__str__() assert_equal('add(mul(X1, X1), mul(X0, X0))', formula, True) def neg_mean_absolute_error(y, y_pred, sample_weight): return -1 * mean_absolute_error(y, y_pred, sample_weight) customized_fitness = make_fitness(neg_mean_absolute_error, greater_is_better=True) c_est_gp = SymbolicRegressor(metric=customized_fitness, stopping_criteria=-0.000001, random_state=415, parsimony_coefficient=0.001, verbose=0, init_method='full', init_depth=(2, 4)) c_est_gp.fit(x_data, y_true) c_formula = c_est_gp.__str__() assert_equal('add(mul(X1, X1), mul(X0, X0))', c_formula, True)
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25), 'Best Individual'.center(42)) assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = ' |{:^25}|{:^42}|'.format('Population Average', 'Best Individual') assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() line_format = '{:>4} {:>8} {:>16} {:>8} {:>16} {:>16} {:>10}' true_header = line_format.format('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def symbolic_regressor(f, npoints, xrange): X = np.linspace(xrange[0], xrange[1], npoints).reshape((-1, 1)) y = f(X) est_gp = SymbolicRegressor(population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0) est_gp.fit(X, y) sym_expr = str(est_gp._program) converter = { 'sub': lambda x, y: x - y, 'div': lambda x, y: x / y, 'mul': lambda x, y: x * y, 'add': lambda x, y: x + y, 'neg': lambda x: -x, 'pow': lambda x, y: x ** y } x, X0 = symbols('x X0') sym_reg = simplify(sympify(sym_expr, locals=converter)) sym_reg = sym_reg.subs(X0, x) Y_true = y.reshape((-1, 1)) Y_est = np.array([sympify(str(sym_reg)).subs(x, X[k]) for k in range(len(X))]).reshape((-1, 1)) R2_perf = compute_Rsquared(Y_true, Y_est) return sym_reg, R2_perf
def test_verbose_output(): """Check verbose=1 does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() true_header = '%4s|%-25s|%-42s|' % (' ', 'Population Average'.center(25), 'Best Individual'.center(42)) assert_equal(true_header, header1) header2 = verbose_output.readline().rstrip() true_header = '-' * 4 + ' ' + '-' * 25 + ' ' + '-' * 42 + ' ' + '-' * 10 assert_equal(true_header, header2) header3 = verbose_output.readline().rstrip() header_fields = ('Gen', 'Length', 'Fitness', 'Length', 'Fitness', 'OOB Fitness', 'Time Left') true_header = '%4s %8s %16s %8s %16s %16s %10s' % header_fields assert_equal(true_header, header3) n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_more_verbose_output(): """Check verbose=2 does not cause error""" old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() est = SymbolicRegressor(random_state=0, verbose=2) est.fit(boston.data, boston.target) verbose_output = sys.stdout joblib_output = sys.stderr sys.stdout = old_stdout sys.stderr = old_stderr # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines) joblib_output.seek(0) n_lines = sum(1 for l in joblib_output.readlines()) # New version of joblib appears to output sys.stderr assert_equal(0, n_lines % 10)
def getSymbolicRegressorModel(): rng = check_random_state(0) # Training samples X_train = rng.uniform(-1, 1, 100).reshape(50, 2) y_train = X_train[:, 0]**2 - X_train[:, 1]**2 + X_train[:, 1] - 1 # Testing samples X_test = rng.uniform(-1, 1, 100).reshape(50, 2) y_test = X_test[:, 0]**2 - X_test[:, 1]**2 + X_test[:, 1] - 1 est_gp = SymbolicRegressor( population_size=5000, generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, ) est_gp.fit(X_train, y_train) return est_gp._program
def test_more_verbose_output(): """Check verbose=2 does not cause error""" old_stdout = sys.stdout old_stderr = sys.stderr sys.stdout = StringIO() sys.stderr = StringIO() est = SymbolicRegressor(random_state=0, verbose=2) est.fit(boston.data, boston.target) verbose_output = sys.stdout joblib_output = sys.stderr sys.stdout = old_stdout sys.stderr = old_stderr # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines) joblib_output.seek(0) n_lines = sum(1 for l in joblib_output.readlines()) assert_equal(20, n_lines)
def _gp_fit(arg): param = arg[0] X = arg[1] Y = arg[2] est_gp = SymbolicRegressor( population_size=param[0], generations=450, parsimony_coefficient=param[1], function_set=param[2].split(" "), const_range=(-param[3], param[3]), ) training, validation = splitidx_srs(len(Y)) X_train = X[training] Y_train = Y[training] X_validation = X[validation] Y_validation = Y[validation] try: est_gp.fit(X_train, Y_train) return ( param, str(est_gp._program), est_gp._program.raw_fitness_, regression_measures(est_gp.predict(X_validation), Y_validation), ) except Exception as e: return (param, "Exception: {}".format(str(e)), 999999999)
def best_approximate(L,points): n=len(L) datax=[item[1] for item in L] datax.pop() datax.pop(0) datay=[item[0] for item in L] datay.pop() datay.pop(0) print(datax) print(datay) X_train, X_test, y_train, y_test = train_test_split(datax, datay, test_size=0.33) # random_state here is a random seed, fixed so that we always get the same results sr = SymbolicRegressor( population_size=500, generations=20, stopping_criteria=0.01, # stop if the mean squared error of the best solution is lower than this function_set=('add', 'sub', 'mul', 'div'), # functions that the symbolic regression can use p_crossover=0.54, # probabilities of activation of different genetic operators p_subtree_mutation=0.1, # p_hoist_mutation=0.05, # p_point_mutation=0.3, # verbose=1, # print a lot of stuff to screen ) # launch the evolution sr.fit(X_train, y_train) Ypred=sr.predict(points) return Ypred
def experiment(seed, i): est_gp = SymbolicRegressor(population_size = pop_size, generations=num_generations, stopping_criteria=0.01, p_crossover=crossover_prob, p_subtree_mutation=mutation_prob, p_hoist_mutation=mutation_prob, p_point_mutation=mutation_prob, function_set = function_set, max_samples=0.9, verbose=1, metric=fit, random_state=seed) est_gp.fit(x, y) plt.figure(figsize=(14,5)) plt.subplot(1,2,1) plt.xlabel('Generations', fontsize=24) plt.ylabel('Best fitness', fontsize=24) plt.plot(est_gp.run_details_['best_fitness'], linewidth=3.0) plt.grid() plt.subplot(1,2,2) plt.xlabel('Generations', fontsize=24) plt.ylabel('Best size', fontsize=24) plt.plot(est_gp.run_details_['best_length'], linewidth=3.0, color='red') plt.grid() plt.suptitle('Run {}'.format(i), fontsize=24) plt.savefig('plot_{}.eps'.format(seed)) return est_gp.run_details_
def train(): est_gp = SymbolicRegressor(population_size=150, generations=20, stopping_criteria=0.001, p_crossover=0.8, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.05, max_samples=0.9, verbose=1, metric='mean absolute error', parsimony_coefficient=0.01) est_gp.fit(X_train, y_train) print(est_gp._program) print(est_gp.score(X_train, y_train))
def test_none_const_range(): """Check that const_range=None produces no constants""" # Check with None as const_range est = SymbolicRegressor(const_range=None, generations=2) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if type(element) == float: float_count += 1 assert_true(float_count == 0) # Check with default const_range est = SymbolicRegressor(generations=2) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if type(element) == float: float_count += 1 assert_true(float_count > 1)
def test_none_const_range(): """Check that const_range=None produces no constants""" # Check with None as const_range est = SymbolicRegressor(population_size=100, generations=2, const_range=None) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if isinstance(element, float): float_count += 1 assert(float_count == 0) # Check with default const_range est = SymbolicRegressor(population_size=100, generations=2) est.fit(boston.data, boston.target) float_count = 0 for generation in est._programs: for program in generation: if program is None: continue for element in program.program: if isinstance(element, float): float_count += 1 assert(float_count > 1)
def test_subsample(): """Check that subsample work and that results differ""" est1 = SymbolicRegressor(max_samples=1.0, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(max_samples=0.7, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_early_stopping(): """Check that early stopping works""" est1 = SymbolicRegressor(stopping_criteria=10, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicClassifier(stopping_criteria=.9, random_state=0) est1.fit(cancer.data[:400, :], cancer.target[:400]) assert_true(len(est1._programs) == 1)
def test_pipeline(): """Check that SymbolicRegressor/Transformer can work in a pipeline""" # Check the regressor est = make_pipeline(StandardScaler(), SymbolicRegressor(population_size=50, generations=5, tournament_size=5, random_state=0)) est.fit(boston.data, boston.target) assert_almost_equal(est.score(boston.data, boston.target), -4.00270923) # Check the classifier est = make_pipeline(StandardScaler(), SymbolicClassifier(population_size=50, generations=5, tournament_size=5, random_state=0)) est.fit(cancer.data, cancer.target) assert_almost_equal(est.score(cancer.data, cancer.target), 0.934973637961) # Check the transformer est = make_pipeline(SymbolicTransformer(population_size=50, hall_of_fame=20, generations=5, tournament_size=5, random_state=0), DecisionTreeRegressor()) est.fit(boston.data, boston.target) assert_almost_equal(est.score(boston.data, boston.target), 1.0)
def test_parsimony_coefficient(): """Check that parsimony coefficients work and that results differ""" est1 = SymbolicRegressor(population_size=100, generations=2, parsimony_coefficient=0.001, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(population_size=100, generations=2, parsimony_coefficient='auto', random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert(abs(est1 - est2) > 0.01)
def test_trigonometric(): """Check that using trig functions work and that results differ""" est1 = SymbolicRegressor(random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor( function_set=['add', 'sub', 'mul', 'div', 'sin', 'cos', 'tan'], random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_parallel_train(): """Check predictions are the same for different n_jobs""" # Check the regressor ests = [ SymbolicRegressor(population_size=100, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.predict(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2) # Check the transformer ests = [ SymbolicTransformer(population_size=100, hall_of_fame=50, generations=4, n_jobs=n_jobs, random_state=0).fit(boston.data[:100, :], boston.target[:100]) for n_jobs in [1, 2, 3, 8, 16] ] preds = [e.transform(boston.data[500:, :]) for e in ests] for pred1, pred2 in zip(preds, preds[1:]): assert_array_almost_equal(pred1, pred2) lengths = np.array([[gp.length_ for gp in e._programs[-1]] for e in ests]) for len1, len2 in zip(lengths, lengths[1:]): assert_array_almost_equal(len1, len2)
def runProgram(X_train,y_train,w): SymbolicRegressor(population_size=5000, generations=25, stopping_criteria=0.01, p_crossover=0.65, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, function_set=['add', 'sub', 'mul', 'div','sqrt'], parsimony_coefficient=0.01, random_state=1, init_depth=(3,6), tournament_size=10,metric='mean absolute error')
def pca_gp(rows, features, function): run_results = {} for run_number in range(0, NUMBER_OF_RUNS): # Generating random data rng = check_random_state(run_number) X = rng.uniform(-1, 1, rows).reshape(rows // features, features) Y = function(X) # Dividing it into training and test set X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size=TEST_SIZE, random_state=0) # Convert it to PCA pca = PCA(n_components=1) X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) # Training the system est_gp = SymbolicRegressor(population_size=POPULATION_SIZE, generations=NUMBER_OF_GENERATION, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, n_jobs=-1) est_gp.fit(X_train_pca, y_train) generation_results = [] for idGen in range(len(est_gp._programs)): single_generation = {} single_generation[idGen] = math.inf for idPopulation in range(est_gp.population_size): if (est_gp._programs[idGen][idPopulation] != None): if est_gp._programs[idGen][ idPopulation].raw_fitness_ < single_generation[ idGen]: single_generation[idGen] = est_gp._programs[idGen][ idPopulation].raw_fitness_ generation_results.append(single_generation) run_results[run_number] = generation_results return run_results
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def main(): data = read_data() regressor = SymbolicRegressor(population_size=1000, generations=100, const_range=(.0, .0), init_depth=(2, 10), init_method='grow', function_set=('add', 'sub', 'mul', 'div', 'log', 'sin', 'cos'), p_crossover=0.7, p_subtree_mutation=0.0, p_hoist_mutation=0.0, p_point_mutation=0.0, verbose=1, n_jobs=-1) (n, _) = data.shape regressor.fit(data[:, 0].reshape(n, 1), data[:, 1]) print(regressor._program)
def fit(self, x_data): est_gp = SymbolicRegressor(population_size=500, generations=10, stopping_criteria=0.0001, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, metric=make_fitness( self.make_explict_func(), False), function_set=self.function_set, verbose=1, parsimony_coefficient=0.01) indicies = np.arange(x_data.shape[0]) est_gp.fit(x_data, indicies) return est_gp
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) header1 = verbose_output.readline().rstrip() header2 = verbose_output.readline().rstrip() header3 = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(20, n_lines)
def test_gridsearch(): """Check that SymbolicRegressor can be grid-searched""" # Grid search parsimony_coefficient parameters = {'parsimony_coefficient': [0.001, 0.1, 'auto']} clf = SymbolicRegressor(population_size=50, generations=5, tournament_size=5, random_state=0) grid = GridSearchCV(clf, parameters, scoring='neg_mean_absolute_error') grid.fit(boston.data, boston.target) expected = {'parsimony_coefficient': 0.001} assert_equal(grid.best_params_, expected)
def result(train_data: tuple, test_data: tuple, verbose: bool): if verbose: print(">>>>>>>>>>>>>>>>>>>>>>>EVOLUTIONIST<<<<<<<<<<<<<<<<<<<<<<<<<<") train_atts, train_targets = train_data test_atts, test_targets = test_data clf = SymbolicRegressor(verbose=verbose) y_predict = clf.fit(train_atts, train_targets).predict(test_atts) i = 0 for predict, target in zip(y_predict, test_targets): if predict == target: i += 1 if verbose: print(F"\tFINAL PROGRAM: {clf._program}") return i
def test_verbose_with_oob(): """Check oob scoring for subsample does not cause error""" old_stdout = sys.stdout sys.stdout = StringIO() est = SymbolicRegressor(population_size=100, generations=10, max_samples=0.9, random_state=0, verbose=1) est.fit(boston.data, boston.target) verbose_output = sys.stdout sys.stdout = old_stdout # check output verbose_output.seek(0) # Ignore header rows _ = verbose_output.readline().rstrip() _ = verbose_output.readline().rstrip() _ = verbose_output.readline().rstrip() n_lines = sum(1 for l in verbose_output.readlines()) assert_equal(10, n_lines)
def regressionOfFailureRate(coords, seed=None, population_size=None, generations=None): """ Pokusí se co nejlépe proložit body \a coords vyjadřující četnost chyb. Snaží se při tom aby výsledek byl integrovatelný, ovšem integrovatelnost nezaručuje. """ if population_size is None: population_size = 1000 if generations is None: generations = 20 # Rozdělení x-ových a y-ových souřadnic pro GpLearn X_train, y_train = zip(*(([x], y) for (x, y) in coords)) from gplearn.genetic import SymbolicRegressor # Kolik náhodných čísel gplearn vygeneruje? Není omezeno. Buď se dosadí funkce, proměnná nebo se vygeneruje náhodné číslo z daného intervalu. est_gp = SymbolicRegressor( # Estimator Genetic Programming population_size=population_size, generations=1, tournament_size=20, stopping_criteria=0.0, const_range=(0.0, 5.0), init_depth=(2, 6), init_method='half and half', function_set=('add', 'mul'), metric='mean absolute error', #metric=sum_absolute_error parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.01, p_hoist_mutation=0.01, p_point_mutation=0.01, p_point_replace=0.05, max_samples=1.0, warm_start=False, n_jobs=-1, verbose=VERBOSITY, random_state=seed) est_gp.fit(X_train, y_train) for p in est_gp._programs[0]: p.program[ 0] = gplearn.functions.div2 # Všechny kořeny přepíšeme na dělení for i in range(1, generations): for p in est_gp._programs[i - 1]: p.get_subtree = functools.partial( get_subtree, p) # Všem potomkům zakážeme křížení z kořene est_gp.set_params(generations=i + 1, warm_start=True) est_gp.fit(X_train, y_train) best_individual = est_gp._program return est_gp, extractExprFromGplearn(best_individual.program)
def test_trigonometric(): """Check that using trig functions work and that results differ""" est1 = SymbolicRegressor(random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(trigonometric=True, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01)
def test_early_stopping(): """Check that early stopping works""" est1 = SymbolicRegressor(stopping_criteria=10, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1) est1 = SymbolicTransformer(stopping_criteria=0.5, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) assert_true(len(est1._programs) == 1)
def test_input_shape(): """Check changed dimensions cause failure""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) X2 = np.reshape(random_state.uniform(size=45), (5, 9)) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(X, y) assert_raises(ValueError, est.predict, X2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(X, y) assert_raises(ValueError, est.transform, X2)
def test_pickle(): """Check pickability""" # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) score = est.score(boston.data[500:, :], boston.target[500:]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) score2 = est2.score(boston.data[500:, :], boston.target[500:]) assert_equal(score, score2) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) est.fit(boston.data[:100, :], boston.target[:100]) X_new = est.transform(boston.data[500:, :]) pickle_object = pickle.dumps(est) est2 = pickle.loads(pickle_object) assert_equal(type(est2), est.__class__) X_new2 = est2.transform(boston.data[500:, :]) assert_array_almost_equal(X_new, X_new2)
def main(): escalation = { Position.GOALKEEPER: 1, Position.DEFENDER: 2, Position.SIDE: 2, Position.MIDFIELD: 4, Position.ATTACKER: 2, Position.COACH: 1 } print("Getting auth") auth = get_auth() print("Getting teams") teams = get_teams() print("Getting athletes") athletes = get_athletes(teams) print("Getting scores") scores = [athlete.get_row(auth) for athlete in athletes] max_length = 0 for score in scores: if len(score) > max_length: max_length = len(score) fixed_score = [] for score in scores: fixed_score.append([0.0] * (max_length - len(score)) + score) generations = 2000 print("Training using " + str(generations) + " generations. It can take a long time to end") est_gp = SymbolicRegressor( population_size=5000, generations=generations, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, const_range=(-50., 50.), function_set=( 'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan')) est_gp.fit([x[:-1] for x in fixed_score], [x[-1] for x in fixed_score]) predictions = est_gp.predict([x[:-1] for x in fixed_score]) print("Getting results") results = [[athlete, prediction] for athlete, prediction in zip(athletes, predictions)] results.sort(key=lambda x: -x[1]) print("\"Scale\",\"Name\",\"Team\",\"Position\",\"Status\",\"Price\",\"Prediction\"") for result in results: athlete = result[0] prediction = result[1] scale = athlete.status == Status.Probable and escalation[athlete.position] > 0 if scale: escalation[athlete.position] = escalation[athlete.position] - 1 print("\"" + ("*" if scale else " ") + "\",\"" + athlete.nick + "\",\"" + athlete.club.name + "\",\"" + str(athlete.position.name) + "\",\"" + str(athlete.status.name) + "\"," + str(athlete.price) + "," + str(prediction)) print("Done")
def test_sample_weight(): """Check sample_weight param works""" # Check constant sample_weight has no effect sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicRegressor(generations=2, random_state=0) est1.fit(boston.data, boston.target) est2 = SymbolicRegressor(generations=2, random_state=0) est2.fit(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicRegressor(generations=2, random_state=0) est3.fit(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_almost_equal(est1._program.fitness_, est2._program.fitness_) assert_almost_equal(est1._program.fitness_, est3._program.fitness_) # And again for the transformer sample_weight = np.ones(boston.target.shape[0]) est1 = SymbolicTransformer(generations=2, random_state=0) est1 = est1.fit_transform(boston.data, boston.target) est2 = SymbolicTransformer(generations=2, random_state=0) est2 = est2.fit_transform(boston.data, boston.target, sample_weight=sample_weight) # And again with a scaled sample_weight est3 = SymbolicTransformer(generations=2, random_state=0) est3 = est3.fit_transform(boston.data, boston.target, sample_weight=sample_weight * 1.1) assert_array_almost_equal(est1, est2) assert_array_almost_equal(est1, est3)
def test_parsimony_coefficient(): """Check that parsimony coefficients work and that results differ""" est1 = SymbolicRegressor(parsimony_coefficient=0.001, random_state=0) est1.fit(boston.data[:400, :], boston.target[:400]) est1 = mean_absolute_error(est1.predict(boston.data[400:, :]), boston.target[400:]) est2 = SymbolicRegressor(parsimony_coefficient=0.1, random_state=0) est2.fit(boston.data[:400, :], boston.target[:400]) est2 = mean_absolute_error(est2.predict(boston.data[400:, :]), boston.target[400:]) est3 = SymbolicRegressor(parsimony_coefficient='auto', random_state=0) est3.fit(boston.data[:400, :], boston.target[:400]) est3 = mean_absolute_error(est3.predict(boston.data[400:, :]), boston.target[400:]) assert_true(abs(est1 - est2) > 0.01) assert_true(abs(est1 - est3) > 0.01) assert_true(abs(est2 - est3) > 0.01)
def test_print_overloading_estimator(): """Check that printing a fitted estimator results in 'pretty' output""" random_state = check_random_state(415) X = np.reshape(random_state.uniform(size=50), (5, 10)) y = random_state.uniform(size=5) # Check the regressor est = SymbolicRegressor(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est._program) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program) # Check the transformer est = SymbolicTransformer(generations=2, random_state=0) # Unfitted orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_unfitted = out.getvalue().strip() finally: sys.stdout = orig_stdout # Fitted est.fit(X, y) orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out print(est) output_fitted = out.getvalue().strip() finally: sys.stdout = orig_stdout orig_stdout = sys.stdout try: out = StringIO() sys.stdout = out output = str([gp.__str__() for gp in est]) print(output.replace("',", ",\n").replace("'", "")) output_program = out.getvalue().strip() finally: sys.stdout = orig_stdout assert_true(output_unfitted != output_fitted) assert_true(output_unfitted == est.__repr__()) assert_true(output_fitted == output_program)