Q = np.hstack((np.reshape(Yp, (-1, 1)), np.ones((len(Yp), 1)))) (individual.a, individual.b), residuals, _, _ = np.linalg.lstsq(Q, pi, rcond=-1) # residuals is the sum of squared errors if residuals.size > 0: return residuals[0] / len(pi), # MSE # regarding the above special cases, the optimal linear scaling w.r.t LSM is just the mean of true target values individual.a = 0 individual.b = np.mean(pi) return np.mean((pi - individual.b)**2), # In[51]: pset = gep.PrimitiveSet('Main', input_names=['wxx', 'wyy', 'sa', 'dw', 'wa', 'k']) h = 2 # head length t = h(n-1) + 1 n_genes = 2 # number of genes in a chromosome r = 3 # length of the RNC array enable_ls = True # whether to apply the linear scaling technique # size of population and number of generations n_pop = 20 n_gen = 500 champs = 3 # In[52]:# In[50]: def protected_div(x1, x2):
# In reviewing geppy code, in the file: # geppy/geppy/core/symbol.py # # we find how terminals in the gene are named correctly to match input # data. # # Oh - notice, I only mapped in below the input data columes, # and not the TARGET "PE" which is sitting in var Y. # I didn't notice where to map that - so suggest you force the target # variable to "Y" when reading in data. pset = gep.PrimitiveSet( 'Main', input_names=[ 'P', 'T', 'x_1', 'x_2', 'x_3', 'x_4', 'x_5', 'x_6', 'x_7', 'x_8', 'x_9', 'x_10', 'x_11', 'x_12', 'x_13', 'x_14', 'x_15', 'x_16', 'x_17', 'x_18', 'x_19', 'x_20', 'x_21', 'x_22', 'x_23', 'x_24', 'x_25', 'x_26', 'x_27', 'x_28', 'x_29', 'x_30', 'x_31', 'x_32', 'x_33', 'x_34', 'x_35', 'x_36', 'x_37', 'x_38', 'x_39', 'x_40', 'x_41', 'x_42', 'x_43', 'x_44', 'x_45', 'x_46', 'x_47', 'x_48', 'x_at' ]) # Define the operators # Here we define and pass the operators we'll construct our final # symbolic regression function with pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) #pset.add_function(operator.truediv, 2) pset.add_function(protected_div, 2) #pset.add_function(protected_exp, 1) #pset.add_function(root, 1)
Q = np.hstack((np.reshape(Yp, (-1, 1)), np.ones((len(Yp), 1)))) (individual.a, individual.b), residuals, _, _ = np.linalg.lstsq(Q, star, rcond=-1) # residuals is the sum of squared errors if residuals.size > 0: return residuals[0] / len(star), # MSE # regarding the above special cases, the optimal linear scaling w.r.t LSM is just the mean of true target values individual.a = 0 individual.b = np.mean(star) return np.mean((star - individual.b)**2), # In[51]: pset = gep.PrimitiveSet('Main', input_names=['x', 'y', 't']) h = 5 # head length t = h(n-1) + 1 n_genes = 3 # number of genes in a chromosome r = 8 # length of the RNC array enable_ls = True # whether to apply the linear scaling technique # size of population and number of generations n_pop = 50 n_gen = 500 champs = 3 # In[52]:# In[50]: def protected_div(x1, x2):
Q = np.hstack((np.reshape(Yp, (-1, 1)), np.ones((len(Yp), 1)))) (individual.a, individual.b), residuals, _, _ = np.linalg.lstsq(Q, ut, rcond=-1) # residuals is the sum of squared errors if residuals.size > 0: return residuals[0] / len(ut), # MSE # regarding the above special cases, the optimal linear scaling w.r.t LSM is just the mean of true target values individual.a = 0 individual.b = np.mean(ut) return np.mean((ut - individual.b)**2), # In[51]: pset = gep.PrimitiveSet('Main', input_names=['u', 'ux', 'u2x', 'u3x', 'u4x', 'u5x']) h = 2 # head length t = h(n-1) + 1 n_genes = 2 # number of genes in a chromosome r = 5 # length of the RNC array enable_ls = True # whether to apply the linear scaling technique # size of population and number of generations n_pop = 50 n_gen = 300 champs = 3 def protected_div(x1, x2): if abs(x2) < 1e-6: return 1
# # Map our input data to the GEP variables # Here we map the input data to the GEP algorithm: # # We do that by listing the field names as "input_names". # # In reviewing geppy code, in the file: # geppy/geppy/core/symbol.py # # we find how terminals in the gene are named correctly to match input data. # # Oh - notice, I only mapped in below the input data columes, and not the TARGET "PE" which is sitting in var Y. # I didn't notice where to map that - so suggest you force the target variable to "Y" when reading in data. # In[28]: pset = gep.PrimitiveSet('Main', input_names=['AT', 'V', 'AP', 'RH']) # # Define the operators # Here we define and pass the operators we'll construct our final symbolic regression function with # In[29]: pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) pset.add_function(protected_div, 2) pset.add_function(math.sin, 1) # I tested adding my own functions pset.add_function(math.cos, 1) pset.add_function(math.tan, 1) pset.add_rnc_terminal()
def pulse(x): if x < -1: return 0 if -1<=x<=1: return 1 if x > 1: return 0 #FUNCAO RECT def rect(x): return ((x+0.5)-(x-0.5)) #INPUT DATA pset = gep.PrimitiveSet('Main', input_names=['sma','wma','macd','rsi','mom']) #DEFININDO OS OPERADORES pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) pset.add_function(protected_div, 2) #pset.add_function(rect, 1) #pset.add_function(math.sin, 1) #pset.add_function(math.cos, 1) #pset.add_function(math.sin, 1) pset.add_constant_terminal(1) pset.add_constant_terminal(-1)
# if math.isinf(result) == True: # print(x1,x2,'inf!') return np.min([result, 2**20]) def protected_div(x1, x2): if abs(x2) < 1e-6: return 1 return x1 / x2 import operator pset = gep.PrimitiveSet('Main', input_names=[ 'adep', 'r', 'rater', 'dc', 'ep', 'ep2', 'vpm', 'vem', 'minr', 'maxr', 'avgr', 'stdr' ]) pset.add_rnc_terminal() pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) pset.add_function(protected_div, 2) pset.add_function(protected_pow, 2) # pset.add_function(operator.abs, 1) pset.add_function(math.sin, 1) # pset.add_function(math.cos, 1) pset.add_constant_terminal(np.pi) pset.add_constant_terminal(np.e) from deap import creator, base, tools
# Here we map the input data to the GEP algorithm: # # We do that by listing the field names as "input_names". # # In reviewing geppy code, in the file: # geppy/geppy/core/symbol.py # # we find how terminals in the gene are named correctly to match input # data. # # Oh - notice, I only mapped in below the input data columns, # and not the TARGET "PE" which is sitting in var Y. # I didn't notice where to map that - so suggest you force the target # variable to "Y" when reading in data. pset = gep.PrimitiveSet('Main', input_names=['P','T','TvCO2','TvO2','TvCO','x_CO2','x_O2','x_CO','x_O','x_C']) # Define the operators # Here we define and pass the operators we'll construct our final # symbolic regression function with # pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) #pset.add_function(operator.truediv, 2) pset.add_function(protected_div, 2) pset.add_function(math.sin, 1) pset.add_function(math.cos, 1) pset.add_function(math.tan, 1) pset.add_rnc_terminal() #pset.add_pow_terminal('X') #attention: Must the same as input in primitive set
# generate the training set which contains all the 16 samples X = [] Y = [] for a, b, c, d in itertools.product([True, False], repeat=4): X.append((a, b, c, d)) Y.append(f(a, b, c, d)) # # Creating the primitives set # The first step in GEP (or GP as well) is to specify the primitive set, which contains the elementary building blocks to formulate the model. For this Boolean function problem, no constant terminals are necessary. Instead, only the three Boolean logic operators and the four input terminals are needed. # In[3]: import geppy as gep import operator pset = gep.PrimitiveSet('Main', input_names=['a', 'b', 'c', 'd']) pset.add_function(operator.and_, 2) pset.add_function(operator.or_, 2) pset.add_function(operator.not_, 1) # # Create the individual and population # Our objective is to **maximize** the number of samples that are correctly predicted by the evolved model. That is, the fitness of an individual is evaluated by *number of hits*. # ## Define the indiviudal class, a subclass of *gep.Chromosome* # In[4]: from deap import creator, base, tools creator.create("FitnessMax", base.Fitness, weights=(1, )) # to maximize the objective (fitness) creator.create("Individual", gep.Chromosome, fitness=creator.FitnessMax)
### Syntetic dataset of Goody ############################################################################### from Dataset import Goody_data f = np.logspace(-3.5, 2, num=2000) Rt = np.logspace(0, 2.5, num=5) df = Goody_data(f, Rt) ############################################################################### ### GEP ############################################################################### # Generate the fset of function and terminals pset = gep.PrimitiveSet('Main', input_names=['X', 'Y']) pset.add_function(operator.add, 2) pset.add_function(operator.mul, 2) pset.add_function(operator.truediv, 2) pset.add_rnc_terminal() pset.add_pow_terminal('X') #attention: Must the same as input in primitive set pset.add_pow_terminal('Y') pset.add_constant_terminal(1.0) creator.create("FitnessMin", base.Fitness, weights=(-1, )) # to minimize the objective (fitness) creator.create("Individual", gep.Chromosome, fitness=creator.FitnessMin) h = 4 # head length n_genes = 3 # number of genes in a chromosome
Y = syntheticData.y1.values # this is our target, now mapped to Y #Creating the primitives set import operator #define a protected division to avoid dividing by zero def protected_div(x, y): if y == 0.0: return np.nan return operator.truediv(x, y) # #Map our input data to the GEP variables import geppy as gep pset = gep.PrimitiveSet('Main', input_names=['x1', 'x2']) # #Define the operators pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) pset.add_function(protected_div, 2) pset.add_rnc_terminal() # #Create the individual and population from deap import creator, base, tools #to maximize the objective (fitness) creator.create("FitnessMax", base.Fitness, weights=(1.0, )) creator.create("Individual", gep.Chromosome, fitness=creator.FitnessMax) #Register the individual and population creation operations
if x <= 0.0: return np.nan return math.sqrt(x) #define a protected division to avoid dividing by zero def protected_asin(x): if abs(x) > 1.0: return np.nan return math.asin(x) # Map our input data to the GEP variables import geppy as gep pset = gep.PrimitiveSet('Main', input_names=['A', 'B', 'C', 'D', 'E']) #Define the operators #F1 pset.add_function(operator.add, 2) pset.add_function(operator.sub, 2) pset.add_function(operator.mul, 2) pset.add_function(protected_div, 2) #F2 pset.add_function(protected_pow, 2) pset.add_function(protected_log, 1) pset.add_function(protected_exp, 1) pset.add_function(protected_sqrt, 1) #F3 pset.add_function(math.sin, 1) pset.add_function(math.cos, 1)