def test_parallel_custom_transformer(): """Regression test for running parallel training with custom transformer""" def _sigmoid(x1): with np.errstate(over='ignore', under='ignore'): return 1 / (1 + np.exp(-x1)) sigmoid = make_function(function=_sigmoid, name='sig', arity=1) est = SymbolicClassifier(generations=2, transformer=sigmoid, random_state=0, n_jobs=2) est.fit(cancer.data, cancer.target) _ = pickle.dumps(est) # Unwrapped functions should fail sigmoid = make_function(function=_sigmoid, name='sig', arity=1, wrap=False) est = SymbolicClassifier(generations=2, transformer=sigmoid, random_state=0, n_jobs=2) est.fit(cancer.data, cancer.target) assert_raises(AttributeError, pickle.dumps, est) # Single threaded will also fail in non-interactive sessions est = SymbolicClassifier(generations=2, transformer=sigmoid, random_state=0) est.fit(cancer.data, cancer.target) assert_raises(AttributeError, pickle.dumps, est)
def test_parallel_custom_function(): """Regression test for running parallel training with custom functions""" def _logical(x1, x2, x3, x4): return np.where(x1 > x2, x3, x4) logical = make_function(function=_logical, name='logical', arity=4) est = SymbolicRegressor(generations=2, function_set=['add', 'sub', 'mul', 'div', logical], random_state=0, n_jobs=2) est.fit(boston.data, boston.target) _ = pickle.dumps(est) # Unwrapped functions should fail logical = make_function(function=_logical, name='logical', arity=4, wrap=False) est = SymbolicRegressor(generations=2, function_set=['add', 'sub', 'mul', 'div', logical], random_state=0, n_jobs=2) est.fit(boston.data, boston.target) assert_raises(AttributeError, pickle.dumps, est) # Single threaded will also fail in non-interactive sessions est = SymbolicRegressor(generations=2, function_set=['add', 'sub', 'mul', 'div', logical], random_state=0) est.fit(boston.data, boston.target) assert_raises(AttributeError, pickle.dumps, est)
def fit(self, X, y=None, state={}): exponential = make_function(function=exponent, name='exp', arity=1) function_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'tan', 'sin', 'cos', exponential] gp = SymbolicTransformer(generations=self.generations, population_size=self.population, hall_of_fame=self.hall_of_fame, n_components=self.components, function_set=function_set, parsimony_coefficient='auto', max_samples=0.6, verbose=1, metric=self.metric, random_state=0, n_jobs=7) self.state['genetic'] = {} self.state['genetic']['fit'] = gp.fit(X, y) return self
def test_validate_function(): """Check that valid functions are accepted & invalid ones raise error""" # Check arity tests _ = make_function(function=_protected_sqrt, name='sqrt', arity=1) # non-integer arity assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', '1') assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 1.0) # non-bool wrap assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 1, 'f') # non-matching arity assert_raises(ValueError, make_function, _protected_sqrt, 'sqrt', 2) assert_raises(ValueError, make_function, maximum, 'max', 1) # Check name test assert_raises(ValueError, make_function, _protected_sqrt, 2, 1) # Check return type tests def bad_fun1(x1, x2): return 'ni' assert_raises(ValueError, make_function, bad_fun1, 'ni', 2) # Check return shape tests def bad_fun2(x1): return np.ones((2, 1)) assert_raises(ValueError, make_function, bad_fun2, 'ni', 1) # Check closure for negatives test def _unprotected_sqrt(x1): with np.errstate(divide='ignore', invalid='ignore'): return np.sqrt(x1) assert_raises(ValueError, make_function, _unprotected_sqrt, 'sqrt', 1) # Check closure for zeros test def _unprotected_div(x1, x2): with np.errstate(divide='ignore', invalid='ignore'): return np.divide(x1, x2) assert_raises(ValueError, make_function, _unprotected_div, 'div', 2)
def test_function_in_program(): """Check that using a custom function in a program works""" def logic(x1, x2, x3, x4): return np.where(x1 > x2, x3, x4) logical = make_function(function=logic, name='logical', arity=4) function_set = ['add', 'sub', 'mul', 'div', logical] est = SymbolicTransformer(generations=2, population_size=2000, hall_of_fame=100, n_components=10, function_set=function_set, parsimony_coefficient=0.0005, max_samples=0.9, random_state=0) est.fit(boston.data[:300, :], boston.target[:300]) formula = est._programs[0][906].__str__() expected_formula = 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)' assert_equal(expected_formula, formula, True)
def test_custom_functions(): """Test the custom programs example works""" rng = check_random_state(0) boston = load_boston() perm = rng.permutation(boston.target.size) boston.data = boston.data[perm] boston.target = boston.target[perm] def logic(x1, x2, x3, x4): return np.where(x1 > x2, x3, x4) logical = make_function(function=logic, name='logical', arity=4) function_set = ['add', 'sub', 'mul', 'div', logical] gp = SymbolicTransformer(generations=2, population_size=2000, hall_of_fame=100, n_components=10, function_set=function_set, parsimony_coefficient=0.0005, max_samples=0.9, random_state=0) gp.fit(boston.data[:300, :], boston.target[:300]) assert_equal(gp._programs[0][906].__str__(), 'sub(logical(X6, add(X11, 0.898), X10, X2), X5)') dot_data = gp._programs[0][906].export_graphviz() expected = ('digraph program {\nnode [style=filled]\n0 [label="sub", ' 'fillcolor="#136ed4"] ;\n1 [label="logical", ' 'fillcolor="#136ed4"] ;\n2 [label="X6", fillcolor="#60a6f6"] ' ';\n3 [label="add", fillcolor="#136ed4"] ;\n4 [label="X11", ' 'fillcolor="#60a6f6"] ;\n5 [label="0.898", ' 'fillcolor="#60a6f6"] ;\n3 -> 5 ;\n3 -> 4 ;\n6 [label="X10", ' 'fillcolor="#60a6f6"] ;\n7 [label="X2", fillcolor="#60a6f6"] ' ';\n1 -> 7 ;\n1 -> 6 ;\n1 -> 3 ;\n1 -> 2 ;\n8 [label="X5", ' 'fillcolor="#60a6f6"] ;\n0 -> 8 ;\n0 -> 1 ;\n}') assert_equal(dot_data, expected)
def projection_generator_function(max_arity, projection='np.mean'): function_list = [] base_arity = 3 for current_arity in range(base_arity, max_arity): base_str = "def experiment_file(" for i in range(base_arity, base_arity + current_arity): base_str += 'x%d,' % i base_str = base_str[:-1] base_str += "):\n\treturn " base_str += '%s(np.vstack([' % projection for i in range(base_arity, base_arity + current_arity): base_str += 'x%d,' % i base_str = base_str[:-1] base_str += "]).T,axis = 1)" base_code = compile(base_str, "<string>", "exec") base_code = FunctionType(base_code.co_consts[0], globals(), "base_code") function_list.append( make_function(base_code, '%s_%d' % (projection, current_arity), arity=current_arity)) return function_list
#return a or b # for i in range(tam): # x[i]=a[i]|b[i] # return x def logi_or(a, b): return a.astype(bool) | b.astype(bool) def logi_not(a): #return not a return ~a.astype(bool) def logi_xor(a,b): return a != b logic_and = make_function(function=logi_and, name='op_and',arity=2) logic_or = make_function(function=logi_or, name='op_or',arity=2) logic_xor = make_function(function=logi_xor, name='op_xor',arity=2) logic_not = make_function(function=logi_not, name='op_not',arity=1) #function_set = [logic_and, logic_not,logic_or] function_set = [logic_and,logic_or,logic_xor,logic_not] est_gp = SymbolicRegressor(population_size=100, generations=500, #stopping_criteria=0.01, tournament_size=2, function_set= function_set, parsimony_coefficient=0.009, max_samples=1.0, verbose=1, p_crossover=0.9, p_subtree_mutation=0.1,
) #Scale the X_train,y_train and X_test mmx = MinMaxScaler() X_train = mmx.fit_transform(X_train) X_test = mmx.fit_transform(X_test) mmy = MinMaxScaler() y_train = mmy.fit_transform(y_train) #Save the scalers dump(mmx,"./mmx.bin") dump(mmy,"./mmy.bin") #Make custom function power = make_function(function=power, name="power", arity=2) power_2 = make_function(function=power_2, name="power_2", arity=1) power_3 = make_function(function=power_3, name="power_3", arity=1) power_4 = make_function(function=power_4, name="power_4", arity=1) #Form the function set function_set = [ "add", "sub", "mul", "div", "inv", "sqrt", #Default-function power_2 #Custom-function ] #Converter: from function to string (*args to sympify) converter = { 'add': lambda x, y : x + y, 'sub': lambda x, y : x - y, 'mul': lambda x, y : x*y,
value = gb.apply(lambda x: x / x.sum()) return np.nan_to_num(value.values) def _sec_demean(df1): # 截面去均值 df = pd.DataFrame({'0': df1}) df['time'] = trade_date df['code'] = stock_code gb = df.groupby('time')['0'] value = gb.apply(lambda x: x - x.mean()) return np.nan_to_num(value.values) exp = make_function(function=_exp, name='exp', arity=1) square = make_function(function=_square, name='square', arity=1) ts_max = make_function(function=_ts_max, name='ts_max', arity=2) ts_min = make_function(function=_ts_min, name='ts_min', arity=2) ts_mid = make_function(function=_ts_mid, name='ts_mid', arity=2) ts_mean = make_function(function=_ts_mean, name='ts_mean', arity=2) ts_wma = make_function(function=_ts_wma, name='ts_wma', arity=2) ts_std = make_function(function=_ts_std, name='ts_std', arity=2) ts_skew = make_function(function=_ts_skew, name='ts_skew', arity=2) ts_kurt = make_function(function=_ts_kurt, name='ts_kurt', arity=2) ts_norm = make_function(function=_ts_norm, name='tsnorm', arity=2) ts_normMaxMin = make_function(function=_ts_normMaxMin, name='ts_normMaxMin', arity=2) ts_rank = make_function(function=_ts_rank, name='ts_rank', arity=2) ts_argmax = make_function(function=_ts_argmax, name='ts_argmax', arity=2)
if __name__ == '__main__': f = "/home/philgun/Documents/coolstuff/coolstuff/ML/script/script/RBF/data/data.mat" data = Data() df = data.generate_data(f) df = df[ ["der_hf_2","T_f_2","T_f_1","T_f_3","T_s_2","u_flow"] ] print(df) X = df[df.columns[1:]].to_numpy() y = df[df.columns[0]].to_numpy().reshape(-1,1) power = make_function(function=power, name="power", arity=2) power_2 = make_function(function=power_2, name="power_2", arity=1) function_set = [ "add", "sub", "mul", "div", "inv", "sqrt","log", power_2 ] converter = { 'add': lambda x, y : x + y, 'sub': lambda x, y : x - y, 'mul': lambda x, y : x*y, 'div': lambda x, y : x/y, 'neg': lambda x : -x, 'inv': lambda x: 1/x, 'sqrt': lambda x: x**0.5,
popSize = 1000 noGens = 50 crossoverProb = 0.7 mutationProb = 0.0 # https://gplearn.readthedocs.io/en/stable/advanced.html#custom-functions # Custom safe exponent function def _protected_exponent(x1): with np.errstate(over='ignore'): return np.where(np.abs(x1) < 100, np.exp(x1), 0.) exp = functions.make_function(function=_protected_exponent, name='exp', arity=1) # https://gplearn.readthedocs.io/en/stable/advanced.html#custom-fitness # Custom fitness function for - sum of absolute errors def _nsae(true_y, pred_y, w): diffs = np.abs(true_y - pred_y) return -sum(diffs) nsae = fitness.make_fitness(_nsae, greater_is_better=True) # Run symbolic regression def symbolicRegr(funcs):
0, -.1629, -.2624, -.3129, -.3264, -.3125, -.2784, -.2289, -.1664, -.0909, 0, .1111, .2496, .4251, .6496, .9375, 1.3056, 1.7731, 2.3616, 3.0951, 4. ]) x = x.reshape(-1, 1) y = y.reshape(-1, 1) #Define exp def _exp(x): y = np.exp(x) #protext for infinites y[np.isinf(y)] = 10**6 return y exp = functions.make_function(_exp, 'exp', 1) function_set = ['add', 'sub', 'mul', 'div', 'log', 'sin', 'cos', exp] #create summed absolute error as metric _sae = lambda y, t, w: np.sum(np.abs(y - t)) sae = fitness.make_fitness(_sae, False) n_generations = 50 #Initialize genetic programm regressor est_gp = genetic.SymbolicRegressor(population_size=1000, generations=1, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0, p_hoist_mutation=0,
compare_htm.append([fidelity_train, fidelity_test]) ---------------------------------------- # # Genetic Programming with gplearn # # to add an exponential function to the function set, # I needed to create one from "scratch", and to prevent overflow # errors from the gplearn method a "protected" exponential function # is needed def _protected_exponent(x): with np.errstate(over='ignore'): return np.where(np.abs(x) < 100, np.exp(x), 0.) e_function = make_function(function = _protected_exponent, name = 'e_func', arity = 1) # the function set must be specified, otherwise the gplearn method # will only look at ['add', 'sub', 'mul', 'div'] f_set = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', e_function] time_step = 3 fidelity = [] for j in rand_indx: seqn = data[j] train_size = int(np.ceil(len(seqn)*0.70)) x = np.reshape(np.array([i for i in range(len(seqn))]), (len(seqn), 1)) # population_size = initial number of random programs to examine # generations = max number of times programs will continue on to earn a chance at becoming fit # tournament_size = number of programs that will compete
z = np.tanh(x) z = np.where(np.abs(z) < 1e+1000, z, np.sign(z)*1e+1000) z[~np.isfinite(z)]=0 return z def pexp(x): z = np.where(np.exp(x) <= np.exp(100), np.exp(x), np.exp(100)) z = np.where(np.abs(z) < 1e+1000, z, np.sign(z)*1e+1000) z[~np.isfinite(z)]=0 return z myExp = make_function(pexp, "exp", 1) #myTanh = make_function(np.tanh, "tanh", 1) mySqrt = make_function(lambda x: np.sqrt(np.abs(x)), "sqrtabs", 1) myLog = make_function(plog, "log",1) myDiv = make_function(pdiv, "pdiv", 2) myTanh = make_function(ptan, "tan", 1) def RMSE(yhat, y): return np.sqrt(np.square(yhat - y).mean()) # gridsearch_configurations is a dictionary, where each key is a parameter # and its value can be one of two options: # - list (python native):
def _rank(data): value = np.array(pd.Series(data.flatten()).rank().tolist()) value = np.nan_to_num(value) return value def _scale(data): k = 1 data = pd.Series(data.flatten()) value = data.mul(k).div(np.abs(data).sum()) value = np.nan_to_num(value) return value exp = make_function(function=_exp, name='exp', arity=1) square = make_function(function=_square, name='square', arity=1) ts_max = make_function(function=_ts_max, name='ts_max', arity=2) ts_min = make_function(function=_ts_min, name='ts_min', arity=2) ts_mid = make_function(function=_ts_mid, name='ts_mid', arity=2) sma = make_function(function=_sma, name='sma', arity=2) wma = make_function(function=_wma, name='wma', arity=2) stddev = make_function(function=_stddev, name='stddev', arity=2) skew = make_function(function=_skew, name='skew', arity=2) kurt = make_function(function=_kurt, name='kurt', arity=2) norm = make_function(function=_norm, name='norm', arity=2) normMaxMin = make_function(function=_normMaxMin, name='norm_MaxMin', arity=2) ts_rank = make_function(function=_ts_rank, name='ts_rank', arity=2) ts_argmax = make_function(function=_ts_argmax, name='ts_argmax', arity=2) ts_argmin = make_function(function=_ts_argmin, name='ts_argmin', arity=2) corr = make_function(function=_corr, name='corr', arity=3)
import numpy as np import matplotlib.pyplot as plt from gplearn.functions import make_function from gplearn.fitness import make_fitness from gplearn.genetic import SymbolicRegressor import graphviz def exp_func(x): with np.errstate(over='ignore'): return np.where(np.abs(x) < 100, np.exp(x), 0.) exp = make_function(function=exp_func, name='expo', arity=1) def _fitness(y, y_pred, sample_weight): return np.sum(np.abs(y-y_pred)) fit = make_fitness(function=_fitness, greater_is_better=False, wrap=False) def get_data(): x = np.linspace(-1, 1, 21).reshape(-1,1) y = np.array([0, -0.1629, -0.2624, -0.3129, -0.3264, -0.3125, -0.2784, -0.2289, -0.1664, -0.0909, 0.0, 0.1111, 0.2496, 0.4251, 0.6496, 0.9375, 1.3056, 1.7731, 2.3616, 3.0951, 4.0000] ) return x, y pop_size = 1000 function_set = ['add', 'sub', 'mul', 'log', exp, 'sin', 'cos', 'div'] num_generations = 50 crossover_prob = 0.7 mutation_prob = 0.1 def experiment(seed, i): est_gp = SymbolicRegressor(population_size = pop_size, generations=num_generations, stopping_criteria=0.01, p_crossover=crossover_prob, p_subtree_mutation=mutation_prob,
# return x def logi_or(a, b): return a.astype(bool) | b.astype(bool) def logi_not(a): #return not a return ~a.astype(bool) def logi_xor(a, b): return a != b logic_and = make_function(function=logi_and, name='AND', arity=2) logic_or = make_function(function=logi_or, name='OR', arity=2) logic_xor = make_function(function=logi_xor, name='XOR', arity=2) logic_not = make_function(function=logi_not, name='NOT', arity=1) #function_set = [logic_and, logic_not,logic_or] function_set = [logic_and, logic_or, logic_not] est_gp = SymbolicRegressor( population_size=100, generations=150, #stopping_criteria=0.01, tournament_size=2, function_set=function_set, parsimony_coefficient=0.009, max_samples=1.0, verbose=1,
def GeneticPrograming(): gp_tanh = make_function(tanh, "tanh", 1) gp_sinh = make_function(sinh, "sinh", 1) gp_cosh = make_function(cosh, "cosh", 1) X_test = df_test_sum.drop('time', axis=1).fillna(0) X_tr = df_train_sum.drop('time', axis=1).fillna(0) y_tr = df_train_sum['time'] while True: est_gp = SymbolicRegressor( population_size=200000, tournament_size=5000, generations=10, stopping_criteria=0.0, p_crossover=0.9, p_subtree_mutation=0.001, p_hoist_mutation=0.001, p_point_mutation=0.001, max_samples=1.0, verbose=1, function_set=('add', 'sub', 'mul', 'div', gp_tanh, 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'tan', 'cos', 'sin'), #function_set = (gp_tanh, 'add', 'sub', 'mul', 'div'), metric='mean absolute error', warm_start=True, n_jobs=1, parsimony_coefficient=0.001, random_state=11) if (os.path.exists(f'{PICKLE_PATH}\\EQS_gp.pickle')): pickle_in = open(f'{PICKLE_PATH}\\EQS_gp.pickle', 'rb') est_gp = pickle.load(pickle_in) print("Model Loaded") est_gp.generations += 10 est_gp.p_subtree_mutation /= 10 est_gp.p_hoist_mutation /= 10 est_gp.p_point_mutation /= 10 est_gp.parsimony_coefficient /= 10 alldata = pd.concat([X_tr, X_test]) scaler = StandardScaler() alldata = pd.DataFrame(scaler.fit_transform(alldata), columns=alldata.columns) X_tr_scaled = alldata[:X_tr.shape[0]] X_test_scaled = alldata[X_tr.shape[0]:] est_gp.fit(X_tr_scaled, y_tr) with open(f'{PICKLE_PATH}\\EQS_gp.pickle', 'wb') as f: pickle.dump(est_gp, f) print('Model Saved') y_gp = est_gp.predict(X_tr_scaled) gpLearn_MAE = mean_absolute_error(y_tr, y_gp) print("gpLearn MAE:", gpLearn_MAE) submission.time_to_failure = est_gp.predict(X_test_scaled) submission.to_csv(f'{DATA_PATH}\\gplearnEQS_submission.csv', index=True) print(submission.head())
domaingrid = np.linspace(xmin, xmax, Ng) outTrain = np.ravel(sigma(inTrain)) def _protected_exponent(x): with np.errstate(over='ignore'): return np.where(np.abs(x) < 100, np.exp(x), 0.) def _protected_negexponent(x): with np.errstate(over='ignore'): return np.where(np.abs(x) < 100, np.exp(-x), 0.) pexp = gf.make_function(_protected_exponent, 'exp', 1) pnexp = gf.make_function(_protected_negexponent, 'nexp', 1) f_s = ['add', 'sub', 'mul', 'div', pexp, pnexp, 'neg'] est_gp = gl.SymbolicRegressor(init_depth=(2, 4), population_size=3000, tournament_size=20, const_range=(-40, 40), generations=20, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, warm_start=True, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9,
def gplearn_procedure(equation_id, no_samples=1000, input_range=(-1, 1), save_path=None, save=True, load=True, func_set=[ 'add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos', 'tan', 'sin', 'pow', 'exp' ], verbose=1): """ Uses gplearn to attempt to predict the equation form of 'equation_id' Renders a graphviz image to images/gplearn/ returns predicted equation, R^2 score and time taken Parameters ---------- equation_id : string The ID of an equation in the dataset. Must be a valid one no_samples : int The number of samples you want fed in to the algorithm input_range: tuple(float, float) The minimum and maximum values of all input parameters save_path: string path The path to where you wish the save this dataframe save: boolean Saves file to save_path iff True load: boolean If true then looks for file in save_path and loads it preemptively if it is there func_set : list List of strings i.e names of functions to include / operations to consider current options include ‘add’ : addition, arity=2. ‘sub’ : subtraction, arity=2. ‘mul’ : multiplication, arity=2. ‘div’ : protected division where a denominator near-zero returns 1., arity=2. ‘sqrt’ : protected square root where the absolute value of the argument is used, arity=1. ‘log’ : protected log where the absolute value of the argument is used and a near-zero argument returns 0., arity=1. ‘abs’ : absolute value, arity=1. ‘neg’ : negative, arity=1. ‘inv’ : protected inverse where a near-zero argument returns 0., arity=1. ‘max’ : maximum, arity=2. ‘min’ : minimum, arity=2. ‘sin’ : sine (radians), arity=1. ‘cos’ : cosine (radians), arity=1. ‘tan’ : tangent (radians), arity=1. 'exp' : exponential (self defined), arity=1 'pow' : power (self defined), arity=2 verbose : int controls how much is printed, 0 is quitest Returns ------- string, float, float """ try: df = create_dataset(equation_id, no_samples=no_samples, input_range=input_range, save_path=save_path, save=save, load=load).dropna() X = df.drop('target', axis=1) y = df['target'] except: traceback.print_exc() print(f"Error on equation {equation_id} skipping") return '', 0, 0 no_samples = min(no_samples, len(y)) default_func_set = ('add', 'sub', 'mul', 'div', 'log', 'sqrt', 'cos', 'tan', 'sin', 'abs', 'neg', 'inv', 'max', 'min') final_func_set = [] for func in func_set: if func in default_func_set: final_func_set.append(func) else: if func == "pow": final_func_set.append(make_function(power, func, 2)) elif func == "exp": final_func_set.append(make_function(exponent, func, 1)) elif func == "pi": final_func_set.append(make_function(pi, func, 0)) else: warnings.warn( f"{func} is an unrecognized function, skipping it") pass est_gp = SymbolicRegressor(population_size=5000, generations=10, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, function_set=final_func_set, verbose=verbose, parsimony_coefficient=0.01, random_state=0) start = time.time() hist = est_gp.fit(X[:no_samples], y[:no_samples]) end = time.time() #print(est_gp._program) dot_data = est_gp._program.export_graphviz() graph = graphviz.Source(dot_data) graph.render(f'images/gplearn/{equation_id}_estimate', format='png', cleanup=True) return est_gp._program, est_gp.score(X, y), end - start
import numpy as np from gplearn.genetic import SymbolicRegressor from gplearn.functions import make_function from sklearn.model_selection import train_test_split warnings.filterwarnings("ignore") def is_less_than_zero(x): result = (x < 0) return result.astype(int) def is_greater_than_or_equal_to_zero(x): result = (x >= 0) return result.astype(int) is_lt_zero = make_function(is_less_than_zero, "is_lt_zero", arity=1) is_gte_zero = make_function(is_greater_than_or_equal_to_zero, "is_gte_zero", arity=1) function_set = [is_lt_zero, is_gte_zero, "mul", "add", "neg"] X = np.arange(-10, 11).reshape(-1, 1) y = np.abs(X).reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X.tolist(), y.tolist()) my_abs_gp = SymbolicRegressor(function_set=function_set, init_method="grow", parsimony_coefficient=0.0625, verbose=True) my_abs_gp.fit(X_train, y_train)
xmax = max(X_train) # xmin = 1 # xmax = 3 X_int = np.linspace(xmin, xmax, 200) N = 20 # X_train = rng.uniform(xmin, xmax, N).reshape(N, 1) # y_train = np.ravel(X_train) def protexp(x): return np.exp(-np.abs(x)) nexp = gf.make_function(protexp, 'negabsexp', 1) f_s = ['add', 'sub', 'mul', 'div', 'inv', 'abs', nexp, 'log'] est_gp = gl.SymbolicRegressor(init_depth=(3, 6), population_size=4000, tournament_size=20, generations=30, stopping_criteria=0.01, p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05, p_point_mutation=0.1, max_samples=0.9, verbose=1, parsimony_coefficient=0.01, random_state=0, function_set=f_s) est_gp.fit(X_train, y_train) y_gp = est_gp.predict(np.c_[X_traintemp.ravel()]).reshape(X_traintemp.shape) print(est_gp.program)
'log': (_log, 1), 'inv': (_inverse, 1), 'exp': (_exp, 1), 'sig': (_sigmoid, 1), 'square': (_square, 1), 'cube': (_cube, 1), 'compare': (_compare, 2), 'scale': (_scale, 1), 'talib_HT_DCPHASE': (_talib_HT_DCPHASE, 1), } base_function_dict = {} for fn in _base_function_params1: base_function_dict[fn] = _function_map[fn] for fn, (f, a) in _base_function_params2.items(): base_function_dict[fn] = deepcopy(make_function(function=f, name=fn, arity=a)) # Make time series functions rolling_periods = { 'minute': np.array([1, 5, 15, 30, 60]), 'day': np.array([1, 3, 5, 10, 20]), } # annualized_factor = { # 'minute': np.sqrt(240 * 252), # 'day': np.sqrt(252) # } ts_function_params = { # function_name: (function, arity, window_iterator)
def get_custom_function_list(): function_list = ['add', 'sub', 'mul', 'div'] function_list.append( gp_func.make_function(function=_logical, name='logical', arity=4)) return function_list
def function(x): return 3*x**(3.5) + 2 def _pow(x1,x2): with np.errstate(over='ignore'): return _protected_exponent(x2*_protected_log(x1)) def _protected_exponent(x): with np.errstate(over='ignore'): return np.where(x < 100, np.exp(x), 2e20) def _protected_log(x): with np.errstate(over='ignore'): return np.where(x > 1e-5, np.log(x), -100.0) exp = make_function(function=_protected_exponent, name='exp', arity=1) log = make_function(function=_protected_log, name='log', arity=1) pow = make_function(function=_pow, name='pow', arity=2) ################################### INPUT ########################################### points_training = 1000 points_test = 150 # symbolic regressor parameters population_size = 10000 generations = 30 tournament_size = 100 function_set = ('add', 'sub', 'mul', 'div', exp, log, pow) metric = 'mse' init_depth = (2, 8) n_jobs = 1
def _ts_argmax(data): window=10 value = pd.Series(data.flatten()).rolling(10).apply(np.argmax) + 1 value = np.nan_to_num(value) return value def _ts_argmin(data): window=10 value = pd.Series(data.flatten()).rolling(10).apply(np.argmin) + 1 value = np.nan_to_num(value) return value # make_function函数群 delta = make_function(function=_delta, name='delta', arity=1) delay = make_function(function=_delay, name='delay', arity=1) rank = make_function(function=_rank, name='rank', arity=1) scale = make_function(function=_scale, name='scale', arity=1) sma = make_function(function=_sma, name='sma', arity=1) stddev = make_function(function=_stddev, name='stddev', arity=1) product = make_function(function=_product, name='product', arity=1) ts_rank = make_function(function=_ts_rank, name='ts_rank', arity=1) ts_min = make_function(function=_ts_min, name='ts_min', arity=1) ts_max = make_function(function=_ts_max, name='ts_max', arity=1) ts_argmax = make_function(function=_ts_argmax, name='ts_argmax', arity=1) ts_argmin = make_function(function=_ts_argmin, name='ts_argmin', arity=1) ts_sum = make_function(function=_ts_sum, name='ts_sum', arity=1) init_function = ['add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'max', 'min', 'sin', 'cos', 'tan'] user_function = [delta, delay, rank, scale, sma, stddev, product, ts_rank, ts_min, ts_max, ts_argmax, ts_argmin, ts_sum]
def pd_col_genetic_transform(df=None, col=None, pars=None): """ Find Symbolic formulae for faeture engineering """ prefix = 'col_genetic' ###################################################################################### from gplearn.genetic import SymbolicTransformer from gplearn.functions import make_function import random colX = col # [col_ for col_ in col if col_ not in coly] train_X = df[colX].fillna(method='ffill') feature_name_ = colX def squaree(x): return x * x square_ = make_function(function=squaree, name='square_', arity=1) function_set = pars.get('function_set', [ 'add', 'sub', 'mul', 'div', 'sqrt', 'log', 'abs', 'neg', 'inv', 'tan', square_ ]) pars_genetic = pars.get( 'pars_genetic', { 'generations': 5, 'population_size': 10, ### Higher than nb_features 'metric': 'spearman', 'tournament_size': 20, 'stopping_criteria': 1.0, 'const_range': (-1., 1.), 'p_crossover': 0.9, 'p_subtree_mutation': 0.01, 'p_hoist_mutation': 0.01, 'p_point_mutation': 0.01, 'p_point_replace': 0.05, 'parsimony_coefficient': 0.005, #### 0.00005 Control Complexity 'max_samples': 0.9, 'verbose': 1, #'n_components' ### Control number of outtput features : n_components 'random_state': 0, 'n_jobs': 4, }) if 'path_pipeline' in pars: #### Inference time gp = load(pars['path_pipeline'] + f"/{prefix}_model.pkl") pars = load(pars['path_pipeline'] + f"/{prefix}_pars.pkl") else: ### Training time coly = pars['coly'] train_y = pars['dfy'] gp = SymbolicTransformer( hall_of_fame=train_X.shape[1] + 1, ### Buggy n_components=pars_genetic.get('n_components', train_X.shape[1]), feature_names=feature_name_, function_set=function_set, **pars_genetic) gp.fit(train_X, train_y) ##### Transform Data ######################################### df_genetic = gp.transform(train_X) tag = random.randint(0, 10) #### UNIQUE TAG col_genetic = [f"gen_{tag}_{i}" for i in range(df_genetic.shape[1])] df_genetic = pd.DataFrame(df_genetic, columns=col_genetic, index=train_X.index) df_genetic.index = train_X.index pars_gen_all = {'pars_genetic': pars_genetic, 'function_set': function_set} ##### Formulae Exrraction ##################################### formula = str(gp).replace("[", "").replace("]", "") flist = formula.split(",\n") form_dict = {x: flist[i] for i, x in enumerate(col_genetic)} pars_gen_all['formulae_dict'] = form_dict log("########## Formulae ", form_dict) # col_pars['map_dict'] = dict(zip(train_X.columns.to_list(), feature_name_)) col_new = col_genetic ################################################################################### if 'path_features_store' in pars and 'path_pipeline_export' in pars: save_features(df_genetic, 'df_genetic', pars['path_features_store']) save(gp, pars['path_pipeline_export'] + f"/{prefix}_model.pkl") save(col_genetic, pars['path_pipeline_export'] + f"/{prefix}.pkl") save(pars_gen_all, pars['path_pipeline_export'] + f"/{prefix}_pars.pkl") # save(form_dict, pars['path_pipeline_export'] + f"/{prefix}_formula.pkl") save_json(form_dict, pars['path_pipeline_export'] + f"/{prefix}_formula.json") ### Human readable col_pars = { 'prefix': prefix, 'path': pars.get('path_pipeline_export', pars.get('path_pipeline', None)) } col_pars['cols_new'] = { prefix: col_new ### list } return df_genetic, col_pars
##TRAINING AND PREDICTING WITH GPLEARN def tanh(x): return np.tanh(x) def sinh(x): return np.sinh(x) def cosh(x): return np.cosh(x) gp_tanh = make_function(tanh, "tanh", 1) gp_sinh = make_function(sinh, "sinh", 1) gp_cosh = make_function(cosh, "cosh", 1) est_gp = SymbolicRegressor( population_size=20000, tournament_size=500, generations=1, stopping_criteria=0.0, p_crossover=0.9, p_subtree_mutation=0.0001, p_hoist_mutation=0.0001, p_point_mutation=0.0001, max_samples=1.0, verbose=1, function_set=('add', 'sub', 'mul', 'div', gp_tanh, 'sqrt', 'log', 'abs',