def es(f_vals, pnts, dims, nr_of_funcs, nr_of_nodes, error_func, op_table, max_iter=500, nr_of_pars=0, reheat_iter=100, remaining_time=None, nr_of_children=4): """ Evolutionary Strategy. """ start_time = time() assert nr_of_funcs == len(op_table) # Create a starting function (solution) at random. current_sol = create_random_gene(dims+nr_of_pars, nr_of_funcs, nr_of_nodes) current_cgp = CGP(dims, op_table, current_sol, nr_of_parameters=nr_of_pars) (current_error, best_pars) = error_func(f_vals, pnts, dims, current_cgp, nr_of_pars, op_table) print(current_error) best_cgp = deepcopy(current_cgp) best_error = current_error for itr in range(max_iter): if itr % 50 == 0: print("iter:", itr," of", max_iter) # Do a small mutation to create a few new function (aka solution) children = [mutate(current_cgp, dims+nr_of_pars, nr_of_funcs) for _ in range(nr_of_children)] children_results = [error_func(f_vals, pnts, dims, child, nr_of_pars, op_table) for child in children] children_errors = [children_results[i][0] for i in range(nr_of_children)] children_pars = [children_results[i][1] for i in range(nr_of_children)] best_children_idx = children_errors.index(min(children_errors)) current_cgp = children[best_children_idx] current_error = children_errors[best_children_idx] if current_error < best_error: new_pars = children_pars[best_children_idx] print("best yet:", current_error) write_2_file(new_pars, itr, current_error, 'es',current_cgp.convert2str(parameters=new_pars)) current_cgp.print_function(parameters=new_pars) best_cgp = deepcopy(current_cgp) best_error = current_error best_pars = list(new_pars) if remaining_time!=None and time()-start_time >= remaining_time: break return (best_cgp, best_error, best_pars)
def err_func(outs, inps, dims, gene, nr_of_pars, op_table): """ Example of an error function, without parameters or anything fancy. Just a minimization of the L2-norm. """ dims = len(inps[0]) cgp = CGP(dims, op_table, gene) n = len(inps) assert len(outs) == n s = 0.0 for i in range(n): tmp = outs[i] - cgp.eval(inps[i]) s += tmp*tmp s /= float(n) return sqrt(s)
def err_func(gene): cgp = CGP(dims, op_table, gene) (err, pars) = direct_error_func_curry(root_samples, parameter_in_inp_samples, dims, cgp, nr_of_parameters_in_inp, op_table) return err
def mutate(cgp_in, dims, nr_of_funcs, mute_rate=0.4): """ Mutates the cgp. Doesn't affect the input CGP. One of the USED parts of the gene is always mutated. """ gene = list(cgp_in.gene) nodes = int((len(gene)-1)/3) nr_of_used_parts = sum(cgp_in.used_genes)+1 # The +1 is for the last number in the gene, which decides which node that is the output. THe -1 is for indexing. used_part_2_mutate = 0 if nr_of_used_parts<=0 else randint(0, nr_of_used_parts-1) used_part_counter = 0 counter = 0 has_forced_mutated = False for i in range(nodes): # Make sure at least some part of the USED gene is mutated... # TODO: REMOVE THE WHOLE THING WHER EIT MUTATES INTO SOMETHING NEW. It can mutate into itself. if cgp_in.used_genes[i]: if used_part_counter == used_part_2_mutate: assert has_forced_mutated == False has_forced_mutated = True is_binary = cgp_in.op_table[gene[counter]].is_binary random_node = randint(0, 2 + (1 if is_binary else 0)-1) if random_node == 0: has_changed = False while not has_changed: old_val = gene[counter] gene[counter] = randint(0, nr_of_funcs-1) if gene[counter] != old_val: has_changed = True else: assert random_node==1 or random_node==2 has_changed = False while not has_changed: old_val = gene[counter+random_node] gene[counter+random_node] = randint(0,i+dims - 1) if gene[counter+random_node] != old_val: has_changed = True #counter += 3 used_part_counter += 1 #... the other parts don't have to mutate if random()<mute_rate: gene[counter] = randint(0, nr_of_funcs-1) counter += 1 for _ in range(2): if random()<mute_rate: gene[counter] = randint(0,i+dims - 1) counter += 1 if random() < mute_rate or nr_of_used_parts-1 == used_part_2_mutate: assert counter == len(gene)-1 if nr_of_used_parts-1 == used_part_2_mutate: assert has_forced_mutated == False has_forced_mutated = True old_val = gene[counter] gene[counter] = randint(0, nodes-1+dims) assert has_forced_mutated # This tmp is just to make sure it does not cause a seg-fault. start_node = gene[-1] tmp = gene[3*(start_node-dims)] assert counter == len(gene)-1 assert dims == cgp_in.dims+cgp_in.nr_of_parameters return CGP(cgp_in.dims, cgp_in.op_table, gene, nr_of_parameters=cgp_in.nr_of_parameters)
def sa(f_vals, pnts, dims, nr_of_funcs, nr_of_nodes, error_func, op_table, max_iter=500, nr_of_pars=0, reheat_iter=100, remaining_time=None): """ Simulated anneling is a simple way of doing compinatorial optimization without getting stuck in local minima. It basically works like this: 1) take the current solution and apply a small change to it 2) If this new solution is better, keep it. 3) There is a chance that the new solution is kept even if it is worse. This chance decreases as the iteration number grows, and the chance is small if the new solution is much worse than the old solution. 4) Repeate the process a bit, and return the best solution. The wiki page is rather good as well. """ start_time = time() assert nr_of_funcs == len(op_table) # Create a starting function (solution) at random. current_sol = create_random_gene(dims+nr_of_pars, nr_of_funcs, nr_of_nodes) current_cgp = CGP(dims, op_table, current_sol, nr_of_parameters=nr_of_pars) (current_error, best_pars) = error_func(f_vals, pnts, dims, current_cgp, nr_of_pars, op_table) print(current_error) #assert False best_cgp = deepcopy(current_cgp) best_error = current_error iterations_since_update = 0 temperature_itr = 0 for itr in range(max_iter): if itr % 50 == 0: print("iter:", itr," of", max_iter) temp = float(max_iter-temperature_itr)/max_iter # Do a small mutation to create a new function (aka solution) new_cgp = mutate(current_cgp, dims+nr_of_pars, nr_of_funcs) #cgp = CGP(dims, op_table, new_sol, nr_of_parameters=nr_of_pars) (new_error, new_pars) = error_func(f_vals, pnts, dims, new_cgp, nr_of_pars, op_table) temperature_itr += 1 if new_error < current_error or acceptance_prob(new_error, current_error, temp)<random(): #current_sol = new_sol current_cgp = new_cgp current_error = new_error if new_error < best_error: print("best yet:", new_error) write_2_file(new_pars, itr, current_error, 'sa',current_cgp.convert2str(parameters=new_pars)) new_cgp.print_function(parameters=new_pars) best_cgp = deepcopy(new_cgp) best_error = new_error best_pars = list(new_pars) else: iterations_since_update += 1 # If no change has been made in a while, then we set the temp to max again! if iterations_since_update == reheat_iter: temperature_itr = 0 iterations_since_update = 0 print("Reheating.") if remaining_time!=None and time()-start_time >= remaining_time: break return (best_cgp, best_error, best_pars)
Operation("sqr"), Operation("-"), Operation("log"), Operation("/") ] h = 1.0e-9 err = 0.0 counter = 0.0 for _ in range(10): nr_of_nodes = randint(1, 15) dims = randint(1, 4) gene = create_random_gene(dims, len(op_table), nr_of_nodes) cgp = CGP(dims, op_table, gene, nr_of_parameters=0) for _ in range(10): pnt = [gauss(0, 10) for _ in range(dims)] for d in range(dims): pnt_shift = list(pnt) pnt_shift[d] += h numerical_der = (cgp.eval(pnt_shift) - cgp.eval(pnt)) / h analytical_der = cgp.eval(pnt, derivative=True, der_dir=d) diff = analytical_der[1] - numerical_der err += diff * diff counter += 1.0 err = sqrt(err / counter) print("Test 2 error:", err)
def starting_point_approximation(func, nr_of_parameters, parameter_ranges, optimizer, max_iter=1000, multi_starts=2, nr_of_samples_per_parameter=25, nr_of_parameters_in_cgp=3, max_time=None, symbolic_der=None): assert nr_of_samples_per_parameter > 1 assert nr_of_parameters >= 0 if nr_of_parameters == 0: # I guess we don't really need an approximation # function for the starting point in this case. I mean, there will # only be one value for the root. print("There are no parameters in the input function! Then there is no need for this program.") assert False else: # Make sure that the input data even makes sense. assert len(parameter_ranges) == nr_of_parameters for tmp in parameter_ranges: assert len(tmp) == 2 assert tmp[0] < tmp[1] # Calculate the total number of samples. nr_of_samples = 1 for _ in range(nr_of_parameters): nr_of_samples *= nr_of_samples_per_parameter # Generate random parameter points in the given range. # TODO: this is stupid, change later! We really should sample on a nice # Cartesian grid. parameter_samples = [[0.0 for _ in range(nr_of_parameters)] for _ in range(nr_of_samples)] for i in range(nr_of_samples): for d in range(nr_of_parameters): parameter_samples[i][d] = random()*(parameter_ranges[d][1]-parameter_ranges[d][0])+parameter_ranges[d][0] # Let's get the derivative as well. if symbolic_der == None: func_der = lambda x, a: (func([x[0]+1.0e-11] , a) - func([x[0]],a))/1.0e-11 else: func_der = symbolic_der # Step 1 # For each point, find the x val that is the (or a) root. # Do this using Newton-Raphson. root_samples_and_errors = [root_finders(func, func_der, parameter_samples[i]) for i in range(nr_of_samples)] # Remove all points that didn't converge converge_thresh = 1.0e-8 remove_idxs = [] counter = 0 for i in range(nr_of_samples): err = root_samples_and_errors[counter][1] if err > converge_thresh: tmp = root_samples_and_errors.pop(counter) parameter_samples.pop(counter) assert tmp[1] > converge_thresh counter -= 1 counter += 1 root_samples = [tmp[0] for tmp in root_samples_and_errors] errors_samples = [tmp[1] for tmp in root_samples_and_errors] assert max(errors_samples) <= converge_thresh filtered_quota = 1.0-len(root_samples)/float(nr_of_samples) print("How many were filtered:", 100*(1.0-len(root_samples)/float(nr_of_samples)),"%") if filtered_quota > 5: # TODO: Do something in this case assert False # Step 2 # Run a symbolic regression to find a good approximation for the root. # This is used as a starting point. (cgp, best_err, parameters) = starting_point_approximation_symbolic_regression(root_samples, parameter_samples, optimizer, max_iter=max_iter, nr_of_parameters=nr_of_parameters_in_cgp, max_time=max_time) # Step 2 and a half # The symbolic regression (tries to) ignore all constant solutions, so those should be checked as well. mean = sum(root_samples)/float(len(root_samples)) error_from_mean = sqrt(sum((r-mean)*(r-mean) for r in root_samples) / float(len(root_samples))) if error_from_mean < best_err: print("DOING THE CONST THING:", error_from_mean) # Create a new gene that represents a constant func new_gene = [0] * len(cgp.gene) new_gene[-1] = cgp.dims+0 cgp = CGP(cgp.dims, cgp.op_table, new_gene, nr_of_parameters=1) parameters = [mean] for _ in range(10): assert cgp.eval([random() for _ in range(cgp.dims)], parameters=parameters) == mean best_err = error_from_mean return (cgp, best_err, parameters)
if __name__ == '__main__': from cgp import CGP, Operation op_table = [ Operation("+"), Operation("*"), Operation("sin"), Operation("cos"), Operation("sqr"), Operation("-"), Operation("log"), Operation("/") ] gene = [1, 2, 0, 3] first_approx_cgp = CGP(2, op_table, gene, nr_of_parameters=1) nr_of_der = 1 gene2 = [1, 1, 0, 0, 3, 2, 4] improvement_cgp = CGP(1, op_table, gene2, nr_of_parameters=nr_of_der + 1) func_str = 'a1*sin(x)+a2*x+a3' print( convert(op_table, improvement_cgp.gene, ['x'], improvement_cgp.nr_of_nodes, improvement_cgp.dims, parameters=[0.1, 0.4])) print( generate_code(op_table, first_approx_cgp, [0.4], improvement_cgp,
rows=args.num_breadth, cols=depth, level_back=2, min_active_num=args.num_min_depth, max_active_num=args.num_max_depth) with open(args.net_info_file, mode='wb') as f: pickle.dump(network_info, f) with open(f"{config['save_dir']}accuracy{args.gpuID}.txt", 'at') as f: with open(f"{config['save_dir']}config.json", 'w+') as cfg_f: json.dump(config, cfg_f, indent=2) cgp = CGP(network_info, None, arch_type=config['arch_type'], lam=1, img_size=img_size, init=args.init) print(cgp.pop[0].active_net_list()) full = CNN_train('cifar10', validation=True, verbose=True, batchsize=batchsize, data_num=args.num_train, mode="full", config=config) acc_full, acc_curr = full(cgp.pop[0].active_net_list(), args.gpuID, num_epoch=num_epoch, out_model='retrained_net.model')