def regressionOfFailureRate(coords, seed=None, population_size=None, generations=None): """ Pokusí se co nejlépe proložit body \a coords vyjadřující četnost chyb. Snaží se při tom aby výsledek byl integrovatelný, ovšem integrovatelnost nezaručuje. """ if population_size is None: population_size = 1000 if generations is None: generations = 20 # Rozdělení x-ových a y-ových souřadnic pro GpLearn X_train, y_train = zip(*(([x], y) for (x, y) in coords)) from gplearn.genetic import SymbolicRegressor # Kolik náhodných čísel gplearn vygeneruje? Není omezeno. Buď se dosadí funkce, proměnná nebo se vygeneruje náhodné číslo z daného intervalu. est_gp = SymbolicRegressor( # Estimator Genetic Programming population_size=population_size, generations=1, tournament_size=20, stopping_criteria=0.0, const_range=(0.0, 5.0), init_depth=(2, 6), init_method='half and half', function_set=('add', 'mul'), metric='mean absolute error', #metric=sum_absolute_error parsimony_coefficient=0.001, p_crossover=0.9, p_subtree_mutation=0.01, p_hoist_mutation=0.01, p_point_mutation=0.01, p_point_replace=0.05, max_samples=1.0, warm_start=False, n_jobs=-1, verbose=VERBOSITY, random_state=seed) est_gp.fit(X_train, y_train) for p in est_gp._programs[0]: p.program[ 0] = gplearn.functions.div2 # Všechny kořeny přepíšeme na dělení for i in range(1, generations): for p in est_gp._programs[i - 1]: p.get_subtree = functools.partial( get_subtree, p) # Všem potomkům zakážeme křížení z kořene est_gp.set_params(generations=i + 1, warm_start=True) est_gp.fit(X_train, y_train) best_individual = est_gp._program return est_gp, extractExprFromGplearn(best_individual.program)
def test_run_details(): """Check the run_details_ attribute works as expected.""" est = SymbolicRegressor(generations=5, random_state=415) est.fit(boston.data, boston.target) # Check generations are indexed as expected without warm_start assert_equal(est.run_details_['generation'], list(range(5))) est.set_params(generations=10, warm_start=True) est.fit(boston.data, boston.target) # Check generations are indexed as expected with warm_start assert_equal(est.run_details_['generation'], list(range(10))) # Check all details have expected number of elements for detail in est.run_details_: assert_equal(len(est.run_details_[detail]), 10)
def test_low_memory_warm_start(): """Check the warm_start functionality works as expected with low_memory.""" est = SymbolicRegressor(generations=20, random_state=415, low_memory=True) est.fit(boston.data, boston.target) cold_fitness = est._program.fitness_ cold_program = est._program.__str__() # Check warm start with low memory gets the same result est = SymbolicRegressor(generations=10, random_state=415, low_memory=True) est.fit(boston.data, boston.target) est.set_params(generations=20, warm_start=True) est.fit(boston.data, boston.target) warm_fitness = est._program.fitness_ warm_program = est._program.__str__() assert_almost_equal(cold_fitness, warm_fitness) assert_equal(cold_program, warm_program)
def test_warm_start(): """Check the warm_start functionality works as expected.""" est = SymbolicRegressor(generations=20, random_state=415) est.fit(boston.data, boston.target) cold_fitness = est._program.fitness_ cold_program = est._program.__str__() # Check fitting fewer generations raises error est.set_params(generations=5, warm_start=True) assert_raises(ValueError, est.fit, boston.data, boston.target) # Check fitting the same number of generations warns est.set_params(generations=20, warm_start=True) assert_warns(UserWarning, est.fit, boston.data, boston.target) # Check warm starts get the same result est = SymbolicRegressor(generations=10, random_state=415) est.fit(boston.data, boston.target) est.set_params(generations=20, warm_start=True) est.fit(boston.data, boston.target) warm_fitness = est._program.fitness_ warm_program = est._program.__str__() assert_almost_equal(cold_fitness, warm_fitness) assert_equal(cold_program, warm_program)
metric='rmse', max_samples=1.0, low_memory=True, n_jobs=4, verbose=1, random_state=seed, p_hoist_mutation=0.0) est_gp.set_params( paretogp_lengths=(5, 250), paretogp=False, complexity='length', selection='tournament', elitism_size=1, tournament_size=toursize, parsimony_coefficient=length_coefficients, # = 0.0, p_crossover=0.1, #p_crossover, p_subtree_mutation=0.5, #p_mutations, p_point_mutation=0.3, #p_mutations, p_point_replace=0.05, p_gs_crossover=0.05, p_gs_mutation=0.05, gs_mutationstep=0.001) if pgp or est_gp.get_params()['paretogp']: est_gp.set_params(parsimony_coefficient=0.0) if len(sys.argv) > 3: est_gp.set_params(**args1) est = None