def train(self, feature_size, regex_size, num_generations, pop_size, cx_prob=0.25, mut_prob=0.1, mut_ind_char_prob=0.1, tourn_size=3): """ Sets up and runs the evolutionary algorithm (EA). """ # Tell DEAP we're maximizing a single objective creator.create('FitnessMax', base.Fitness, weights=(1.0, )) creator.create('Individual', list, fitness=creator.FitnessMax) # Define an individual toolbox = base.Toolbox() toolbox.register('attr_int', random.choice, self.init_set) toolbox.register('individual', tools.initRepeat, creator.Individual, toolbox.attr_int, regex_size) # A population is a bunch of individuals toolbox.register('population', tools.initRepeat, list, toolbox.individual) pop = toolbox.population(pop_size) # Set up EA options toolbox.register('mate', tools.cxTwoPoint) toolbox.register('mutate', tools.mutUniformInt, low=0, up=self.max_valid_char, indpb=mut_ind_char_prob) toolbox.register('select', tools.selTournament, tournsize=tourn_size) toolbox.register('evaluate', self._score) # Set up Statistics tracker stats = tools.Statistics(key=lambda ind: ind.fitness.values) stats.register('median score', np.median) stats.register('best score', np.max) # Set up HallOfFame (tracks best individuals over time) hof = tools.HallOfFame(feature_size) # Run EA result_pop, log = algorithms.eaSimple(pop, toolbox, cx_prob, mut_prob, num_generations, stats, hof, True) self.best_regexes = [self.to_regex(ind) for ind in hof] # Show training summary print('-' * 79) for ind in hof: pattern = self.to_regex(ind).pattern score, counts = self._score(ind, with_counts=True) print('{}, {:.3f}, {}'.format(pattern, score, counts))
def getHof(): # Initialize variables to use eaSimple numPop = 300 numGen = 30 pop = toolbox.population(n=numPop) hof = tools.HallOfFame(numPop * numGen) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", numpy.mean) stats.register("std", numpy.std) stats.register("min", numpy.min) stats.register("max", numpy.max) # Launch genetic algorithm pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=numGen, stats=stats, halloffame=hof, verbose=True) # Return the hall of fame return hof
def main(): random.seed(64) pop = toolbox.population(n=30) hof = tools.HallOfFame(5) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max) pop, log = algorithms.eaSimple(pop, toolbox, cxpb=0.9, mutpb=0.01, ngen=30000, stats=stats, halloffame=hof, verbose=True) return pop, log, hof
def main(): random.seed(63) pop = toolbox.population(n=600) hof = tools.HallOfFame(1) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", tools.mean) stats.register("std", tools.std) stats.register("min", min) stats.register("max", max) algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.1, ngen=10000, stats=stats, halloffame=hof, verbose=True) return pop, stats, hof
def GeneticAlgorithm(prices_std_list, prices_mean_list, price_columns, rules, regressors, population, generation, costs=None, penalty_hard_constant=1000000, penalty_soft_constant=1000, step=0.05, random_seed=1): # 1. Preprocess rules and price limits num_item = len(price_columns) product_to_idx = {column.split('_')[1]: i for i, column in enumerate(price_columns)} rule_list_old, price_range, costs, revenue_obj = rules costs_dic = {} for item in costs: costs_dic[item['item_id']] = item['cost'] if revenue_obj == False: if len(costs) == 0 : return False, 1 missing_c = [] for p in price_columns: prod = p.split('_')[1] if int(prod) not in costs_dic: missing_c.append(prod) if len(missing_c) != 0: return False, 2, missing_c rule_list = [rule for rule in rule_list_old if set(rule['products']).issubset(set(product_to_idx.keys()))] # filter out the ones not in price_columns print('{} out of {} rules contain products not in price_columns.'.format(len(rule_list_old)-len(rule_list), len(rule_list_old))) hard_rule_eq_list = [i for i in rule_list if (i['penalty'] == -1 and i['equality'] == 0)] hard_rule_small_list = [i for i in rule_list if (i['penalty'] == -1 and i['equality'] == 1)] hard_rule_large_list = [i for i in rule_list if (i['penalty'] == -1 and i['equality'] == 2)] hard_rule_smalleq_list = [i for i in rule_list if (i['penalty'] == -1 and i['equality'] == 3)] hard_rule_largeeq_list = [i for i in rule_list if (i['penalty'] == -1 and i['equality'] == 4)] soft_rule_eq_list = [i for i in rule_list if (i['penalty'] != -1 and i['equality'] == 0)] soft_rule_small_list = [i for i in rule_list if (i['penalty'] != -1 and i['equality'] == 1)] soft_rule_large_list = [i for i in rule_list if (i['penalty'] != -1 and i['equality'] == 2)] soft_rule_smalleq_list = [i for i in rule_list if (i['penalty'] != -1 and i['equality'] == 3)] soft_rule_largeeq_list = [i for i in rule_list if (i['penalty'] != -1 and i['equality'] == 4)] price_range_dic = {} for item in price_range: price_range_dic[item['item_id']] = [item['max'], item['min']] # 2. Find valid price vectors to start # 2.1. put hard equalities into matrix form matrix1, shifts1, penalty1 = list_to_matrix(hard_rule_eq_list, product_to_idx, penalty_hard_constant) # 2.2. put hard inequalities into matrix form matrix2_1, shifts2_1, penalty2_1 = list_to_matrix(hard_rule_small_list, product_to_idx, penalty_hard_constant) matrix2_1 = matrix2_1*(-1) shifts2_1 = shifts2_1*(-1)+0.0001 matrix2_2, shifts2_2, penalty2_2 = list_to_matrix(hard_rule_large_list, product_to_idx, penalty_hard_constant) shifts2_2 = shifts2_2+0.0001 matrix2_3, shifts2_3, penalty2_3 = list_to_matrix(hard_rule_smalleq_list, product_to_idx, penalty_hard_constant) matrix2_3 = matrix2_3*(-1) shifts2_3 = shifts2_3*(-1) matrix2_4, shifts2_4, penalty2_4 = list_to_matrix(hard_rule_largeeq_list, product_to_idx, penalty_hard_constant) # 2.2.2. adding price ranges prices = [i.split('_')[1] for i in price_columns] # 2.2.2.1 adding price floor matrix2_5 = np.zeros((2*len(prices), len(prices))) shifts2_5 = np.zeros((2*len(prices), 1)) for i, product in enumerate(prices): matrix2_5[i, i] = 1. if int(product) not in price_range_dic.keys(): print('product {} is not given price range, assumed to be within [0.5, 20].'.format(product)) shifts2_5[i,0] = 0.5 else: shifts2_5[i,0] = price_range_dic[int(product)][1] # 2.2.2.1 adding price cap for i, product in enumerate(prices): matrix2_5[len(prices)+i, i] = -1. if int(product) not in price_range_dic.keys(): shifts2_5[len(prices)+i,0] = -20. else: shifts2_5[len(prices)+i,0] = -price_range_dic[int(product)][0] penalty2_5 = np.full((matrix2_5.shape[0],1), penalty_hard_constant) # 2.2.3. Put together hard inequality and price range matrix2 = np.vstack([matrix2_1, matrix2_2, matrix2_3, matrix2_4, matrix2_5]) shifts2 = np.vstack([shifts2_1, shifts2_2, shifts2_3, shifts2_4, shifts2_5]) penalty2 = np.vstack([penalty2_1, penalty2_2, penalty2_3, penalty2_4, penalty2_5]) # 2.3. get 2 valid individuals from linear programming val_ind1, status1 = solve_cvx(matrix1, shifts1, matrix2, shifts2, 'sum') val_ind2, status2 = solve_cvx(matrix1, shifts1, matrix2, shifts2, 'sum_squares') print('status 1: {}'.format(status1)) print('status 2: {}'.format(status2)) if status1 == 'infeasible' or status2 == 'infeasible': return False, 0 # an integer code for hard constraint infeasibility print('val_ind1 shape: {}'.format(val_ind1.shape)) print('val_ind2 shape: {}'.format(val_ind2.shape)) # 3. Put soft constraints into matrix form # 3.1. soft equality matrix3, shifts3, penalty3 = list_to_matrix(soft_rule_eq_list, product_to_idx, penalty_soft_constant) # 3.2. soft inequality matrix4_1, shifts4_1, penalty4_1 = list_to_matrix(soft_rule_small_list, product_to_idx, penalty_soft_constant) matrix4_1 = matrix4_1*(-1) shifts4_1 = shifts4_1*(-1)+0.0001 matrix4_2, shifts4_2, penalty4_2 = list_to_matrix(soft_rule_large_list, product_to_idx, penalty_soft_constant) shifts4_2 = shifts4_2+0.0001 matrix4_3, shifts4_3, penalty4_3 = list_to_matrix(soft_rule_smalleq_list, product_to_idx, penalty_soft_constant) matrix4_3 = matrix4_3*(-1) shifts4_3 = shifts4_3*(-1) matrix4_4, shifts4_4, penalty4_4 = list_to_matrix(soft_rule_largeeq_list, product_to_idx, penalty_soft_constant) matrix4 = np.vstack([matrix4_1, matrix4_2, matrix4_3, matrix4_4]) shifts4 = np.vstack([shifts4_1, shifts4_2, shifts4_3, shifts4_4]) penalty4 = np.vstack([penalty4_1, penalty4_2, penalty4_3, penalty4_4]) # 4. Run GA using DEAP library # 4.1. Define fitness function def evalObjective(individual, report=False): """ returns: (revenue, penalty_): revenue of this individual and penalty from it violating the constraints """ # Calculating revenue quantity = np.zeros((num_item)) f = lambda x: 0.05 * np.round(x/0.05) individual = f(individual) individual = individual.round(2) for code in regressors: # TODO: use multiple workers here to speedup the optimization process X = pd.DataFrame(individual.reshape(1, -1), columns=price_columns) X = X.reindex(sorted(X.columns), axis=1) quantity[product_to_idx[code]] = regressors[code].predict(X) # Calculating constraint violation penalty if revenue_obj == False: # True for revenue, False for profit costs_list = [costs_dic[int(product.split('_')[1])] for product in price_columns] costs_np = np.array(costs_list) output = (individual-costs_np).dot(quantity) if report: output_rev = individual.dot(quantity) else: output = individual.dot(quantity) temp1 = (matrix1.dot(individual.reshape(-1, 1)) - shifts1).round(2) mask1 = temp1 != 0 penalty_1 = mask1.T.dot(penalty1) temp2 = (matrix2.dot(individual.reshape(-1, 1)) - shifts2).round(2) mask2 = temp2 < 0 penalty_2 = mask2.T.dot(penalty2) temp3 = (matrix3.dot(individual.reshape(-1, 1)) - shifts3).round(2) mask3 = temp3 != 0 penalty_3 = mask3.T.dot(penalty3) temp4 = (matrix4.dot(individual.reshape(-1, 1)) - shifts4).round(2) mask4 = temp4 < 0 penalty_4 = mask4.T.dot(penalty4) if report: if revenue_obj == False: return [output, output_rev, np.sum(mask1)+np.sum(mask2), np.sum(mask3)+np.sum(mask4)] else: return [output, np.sum(mask1)+np.sum(mask2), np.sum(mask3)+np.sum(mask4)] if penalty_1.shape[0] > 0 and penalty_1.shape[1] > 0: output -= penalty_1[0,0] if penalty_2.shape[0] > 0 and penalty_2.shape[1] > 0: output -= penalty_2[0,0] if penalty_3.shape[0] > 0 and penalty_3.shape[1] > 0: output -= penalty_3[0,0] if penalty_4.shape[0] > 0 and penalty_4.shape[1] > 0: output -= penalty_4[0,0] return (output,) # 4.2. Initialize individuals and operations creator.create("RevenuePenalty", base.Fitness, weights=(1.,)) creator.create("Individual", np.ndarray, fitness=creator.RevenuePenalty) toolbox = base.Toolbox() def get_individual(num_item, price_std, price_mean): return creator.Individual(np.random.standard_normal(num_item)*price_std*2 + price_mean) toolbox.register("individual", get_individual, num_item, np.array(prices_std_list), np.array(prices_mean_list)) toolbox.register("population", tools.initRepeat, list, toolbox.individual) toolbox.register("evaluate", evalObjective) toolbox.register("mate", cxTwoPointCopy) toolbox.register("mutate", tools.mutFlipBit, indpb=0.05) toolbox.register("select", tools.selTournament, tournsize=3) # 4.3. Run the algoritm random.seed(64) pop = toolbox.population(n=population) pop.append(creator.Individual(val_ind1.round(2).flatten())) pop.append(creator.Individual(val_ind2.round(2).flatten())) print('fitess of ind1: ',evalObjective(val_ind1.round(2).flatten())) print('fitess of ind2: ',evalObjective(val_ind2.round(2).flatten())) # hof = tools.ParetoFront(similar=np.array_equal) hof = tools.HallOfFame(2, similar=np.array_equal) stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", np.mean) stats.register("std", np.std) stats.register("min", np.min) stats.register("max", np.max) print('GA started running...') algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=generation, stats=stats, halloffame=hof) return True, pop, stats, hof, evalObjective(hof[0], report=True)
def nsga2_pareto_K(KK, propvec, pDB, sen=None, ref=None, seed=None): # GP, scal = run_GP() # NDIM = 6 #MMF+ GP # print(len(propvec['COST'])) NDIM = len(propvec['COST']) # +3 # Number of parameters # Minimize both objectives (min -f(x) if maximization is needed) creator.create("FitnessMin", base.Fitness, weights=(1.0, -1.0)) # Individuals in the generation creator.create("Individual", array.array, typecode='d', fitness=creator.FitnessMin) toolbox = base.Toolbox() # parameter sequence: RON, S, HOV, SL, LFV150, PMI, CA50, IAT, KI BOUND_LOW, BOUND_UP = 0, 1 # Lower and upper variable bounds if not (cooptimizer_input.parallel_nsgaruns): pool = Pool() toolbox.register("map", pool.map) # toolbox.register("map",futures.map) toolbox.register("attr_float", uniform, BOUND_LOW, BOUND_UP, NDIM) toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.attr_float) toolbox.register("population", tools.initRepeat, list, toolbox.individual) # toolbox.register("evaluate",eval_MMF_gp_opt, propvec=propvec, Kinp=KK, GP = GP, scal = scal) # toolbox.register("evaluate", eval_MMF_gp, propvec=propvec, Kinp=KK, GP = GP, scal = scal)#, propvec=propvec, Kinp=KK) # toolbox.register("evaluate", eval_gp, GP = GP, scal = scal)#, propvec=propvec, Kinp=KK) toolbox.register("evaluate", eval_mo, propvec=propvec, Kinp=KK) # , ref_in = ref, sen_in = sen ) # toolbox.register("evaluate", eval_mo2, propvec=propvec, Kinp=KK)#, ref_in = ref, sen_in = sen ) # toolbox.register("evaluate", eval_mean_var, propDB=pDB, Kinp=KK) toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0) toolbox.register("mutate", tools.mutPolynomialBounded, low=BOUND_LOW, up=BOUND_UP, eta=20.0, indpb=1.0 / NDIM) toolbox.decorate("mate", scale_2()) toolbox.decorate("mutate", scale_2()) toolbox.register("select", tools.selNSGA2) # These are parameters that can be adjusted and may change # the algorithm's performance NGEN = 300 # Number of generations MU = 100 # Number of individuals CXPB = 0.75 # Cross-over probability, [0,1] stats = tools.Statistics(lambda ind: ind.fitness.values) stats.register("avg", numpy.mean, axis=0) stats.register("std", numpy.std, axis=0) stats.register("min", numpy.min, axis=0) stats.register("max", numpy.max, axis=0) pf = tools.ParetoFront() hof = tools.HallOfFame(100) logbook = tools.Logbook() logbook.header = "gen", "evals", "std", "min", "avg", "max" pop = toolbox.population(n=MU) # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in pop if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit pf.update(pop) hof.update(pop) # This is just to assign the crowding distance to the individuals # no actual selection is done # print(pop) pop = toolbox.select(pop, len(pop)) record = stats.compile(pop) logbook.record(gen=0, evals=len(invalid_ind), **record) print(logbook.stream) # Begin the generational process for gen in range(1, NGEN): # Vary the population offspring = tools.selNSGA2(pop, len(pop)) offspring = tools.selTournamentDCD(pop, len(pop)) offspring = [toolbox.clone(ind) for ind in offspring] for ind1, ind2 in zip(offspring[::2], offspring[1::2]): if random.random() <= CXPB: toolbox.mate(ind1, ind2) toolbox.mutate(ind1) toolbox.mutate(ind2) del ind1.fitness.values, ind2.fitness.values # Evaluate the individuals with an invalid fitness invalid_ind = [ind for ind in offspring if not ind.fitness.valid] fitnesses = toolbox.map(toolbox.evaluate, invalid_ind) for ind, fit in zip(invalid_ind, fitnesses): ind.fitness.values = fit try: pf.update(offspring) except: print([ind.fitness.values for ind in offspring]) lll hof.update(offspring) # Select the next generation population pop = toolbox.select(pop + offspring, MU) record = stats.compile(pop) logbook.record(gen=gen, evals=len(invalid_ind), **record) print(logbook.stream) pop.sort(key=lambda x: x.fitness.values) ''' print(pop) filename = open("vals_MMF_NMEPopt_"+str(KK)+".txt", "w") #filename.write("CA50,\t IAT \t KI \t RON \t S \t HOV \t MEAN \t VAR \n") filename.write("RON \t S \t HOV \t SL \t LFV \t PMI \t COST \t CA50 \t IAT \t KI \t MMF \t GP \n") #for eval_mo: #filename.write("RON \t S \t HOV \t SL \t LFV \t PMI \t Cost \t MMFmean \t MMFvar \n") #filename.write("RON \t S \t HOV \t SL \t LFV \t PMI \t COST \t CA50 \t IAT \t KI \t MMF \t GP \n") #filename.write("RON \t S \t HOV \t SL \t LFV \t PMI \t COST \t MMF \n") low = numpy.array([6.7, 35., 2., 99.1, 0., 303.]) #OG bounds CA50, IAT, KI, RON, S,HOV up = numpy.array([23.8, 90., 10.5, 105.6, 12.2, 595.]) #OG bounds CA50, IAT, KI, RON, S,HOV front = numpy.array([ind.fitness.values for ind in pf]) fi = 0 for point in pf: #CA50 = low[0]+(up[0]-low[0])*point[0]#22 #IAT = low[1]+(up[1]-low[1])*point[1]#23 #KI = low[2]+(up[2]-low[2])*point[2]#24 #RON=low[3]+(up[3]-low[3])*point[3] #S=low[4]+(up[4]-low[4])*point[4] #HOV=low[5]+(up[5]-low[5])*point[5] this_ron = blend(point[0:22], propvec, 'RON') this_s = blend(point[0:22], propvec, 'S') this_HoV = blend(point[0:22], propvec, 'HoV') this_SL = blend(point[0:22], propvec, 'SL') #this_AFR = blend(point, propvec, 'AFR_STOICH') this_LFV150 = blend(point[0:22], propvec, 'LFV150') this_PMI = blend(point[0:22], propvec, 'PMI') cost_f = blend(point[0:22], propvec, 'COST') #MMF = front[fi,0] #NMEP = front[fi,1] merit_f = mmf_single(RON=this_ron, S=this_s, HoV=this_HoV, SL=this_SL, K=KK) RON = this_ron S = this_s HOV= this_HoV x0 = (numpy.array([23.8, 90., 10.5])+numpy.array([6.7, 35., 2.]))/2 bound_list=[(6.7,23.8),(35.,90.),(2.,10.5)] #xout, out1,out2 = fmin_tnc(f_gp, x0,approx_grad = True, bounds=bound_list, disp = 0, args = (RON,S,HOV, GP,scal,))#???????_ex?approx_grad = True, res = minimize(f_gp, x0, bounds=bound_list, args = (RON,S,HOV, GP,scal,)) mean_out = f_gp(res.x, RON,S,HOV, GP,scal) CA50=res.x[0] IAT=res.x[1] KI = res.x[2] #gp_in = numpy.array([[CA50, IAT,KI, RON,S, HOV]]).reshape(1,-1)#CA50, IAT, KI, RON, S,HOV #pred_mean, pred_std = predict_GP(GP, scal, gp_in) #for eval_mo: #a = "{} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \n".format(this_ron, this_s, this_HoV, this_SL, this_LFV150,\ # this_PMI,cost_f,merit_mean, merit_var) a = "{} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \n".format(this_ron, this_s, this_HoV, this_SL, this_LFV150,\ this_PMI,cost_f, CA50, IAT, KI, merit_f,mean_out) #a = "{} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \n".format(CA50, IAT, KI, RON, S,HOV,pred_mean[0], pred_std[0]) #a = "{} \t {} \t {} \t {} \t {} \t {} \t {} \t {} \n".format(this_ron, this_s, this_HoV, this_SL, this_LFV150,\ # this_PMI,cost_f, merit_f) filename.write(a) fi = fi+1 filename.close() ''' front = numpy.array([ind.fitness.values for ind in pf]) print("NSGA done; hof: {}".format(pf[0])) # print("K = {}; Score: {}".format(KK, -eval_mo(pf[0],propvec,KK)[0])) # print("pv RON = {}".format(propvec['NAME'])) print('paretofront:', front) return front # front[:, 1], -front[:, 0], front[:,2]
the Pareto front """ return np.allclose(ind1.fitness.values, ind2.fitness.values) pop_size = 500 pop = toolbox.population(n=pop_size) pareto_front = tools.ParetoFront(similar=pareto_eq) #stats = tools.Statistics(lambda ind: ind.fitness.values[0:3]) stats = tools.Statistics(lambda x: pareto_front) #stats.register('avg', np.mean, axis=0) #stats.register('std', np.std, axis=0) #stats.register('min', np.min, axis=0) #stats.register('max', np.max, axis=0) stats.register('num skipped', lambda x: total_skipped) stats.register('cache lookups', lambda x: total_cache_lookups) stats.register('pf size', lambda x: len(x[0])) stats.register('best fitness', lambda x: x[0][0].fitness.values) stats.register('best ind', lambda x: str(x[0][0]).strip()) t_test = creator.Individual.from_string( 'float_div(float_sub(array_mean(x1), array_mean(x2)), float_sqrt(float_add(float_div(array_var(x1), array_size(x1)), float_div(array_var(x2), array_size(x2)))))', pset) print('t-test fitness: {}'.format(evaluate_individual(t_test))) t_test = creator.Individual.from_string( 'float_div(float_sub(array_mean(x1), array_mean(x2)), float_add(array_stderr(x1), array_stderr(x2)))', pset) print('t-test fitness: {}'.format(evaluate_individual(t_test)))