def study_structure(basename, rep="bubble_down"): """Use structure.py to investigate the structure of the tree-based GP space: can do bubble-down or grow algorithm.""" structure.MINLEN = 100 structure.MAXLEN = 100 structure.SEMANTIC_DISTANCE = semantic_distance structure.PHENOTYPE_DISTANCE = tree_distance structure.FITNESS = fitness.benchmarks("pagie-2d") # hardcoded structure.CROSSOVER_PROB = 1.0 structure.MAXV = sys.maxint structure.WRAPS = 0 if rep == "bubble_down": structure.GENERATE = generate_bubble_down_tree_and_fn elif rep == "grow": structure.GENERATE = generate_grow_tree_and_fn else: raise ValueError structure.MAXIMISE = False n = 10000 print(rep) print("random") with open(os.path.join(basename, rep, "random_distances.dat"), "w") as outfile: total_count = 0 neutral_count = 0 for g, h in structure.generate_random_pairs(n): total_count += 1 ds = structure.distances(g, h) if ds[1] > 0: outfile.write("%d %d %f %f\n" % (ds)) else: neutral_count += 1 print("random attempted trials %d, valid %d, neutral %d" % ( n, total_count, neutral_count)) print("mutation") with open(os.path.join(basename, rep, "mutation_distances.dat"), "w") as outfile: total_count = 0 neutral_count = 0 for g, h in structure.generate_mutation_pairs(n): total_count += 1 ds = structure.distances(g, h) if ds[1] > 0: outfile.write("%d %d %f %f\n" % (ds)) else: neutral_count += 1 print("mutation attempted trials %d, valid %d, neutral %d" % ( n, total_count, neutral_count)) print("crossover") with open(os.path.join(basename, rep, "crossover_distances.dat"), "w") as outfile: total_count = 0 neutral_count = 0 for g, c in structure.generate_crossover_pairs(n): total_count += 1 ds = structure.distances(g, c) if ds[1] > 0: outfile.write("%d %d %f %f\n" % (ds)) else: neutral_count += 1 print("crossover attempted trials %d, valid %d, neutral %d" % ( n, total_count, neutral_count))
def run(fitness_fn_key, rep="bubble_down"): fitness_fn = fitness.benchmarks(fitness_fn_key) gp.set_fns_leaves(fitness_fn.arity) variga.MINLEN = 100 variga.MAXLEN = 100 variga.PHENOTYPE_DISTANCE = gp.tree_distance variga.FITNESS = fitness_fn if rep == "bubble_down": variga.GENERATE = generate_bubble_down_tree_and_fn elif rep == "grow": variga.GENERATE = generate_grow_tree_and_fn else: raise ValueError variga.MAXIMISE = False variga.SUCCESS = success variga.POPSIZE = 1000 variga.GENERATIONS = 100 variga.PMUT = 0.01 variga.CROSSOVER_PROB = 0.7 variga.ELITE = 1 variga.TOURNAMENT_SIZE = 3 variga.WRAPS = 1 variga.main()
def traverse(t, path=None): """Depth-first traversal of the tree t, yielding at each step the node, the subtree rooted at that node, and the path. The path passed-in is the "path so far".""" if path is None: path = tuple() yield t[0], t, path + (0,) for i, item in enumerate(t[1:], start=1): if isinstance(item, str): yield item, item, path + (i,) else: for s in traverse(item, path + (i,)): yield s # grammar = Grammar("grammars/symbolic_regression_2d.bnf") grammar = Grammar("grammars/sr_2d_ne_test.bnf") srff = fitness.benchmarks()["pagie_2d"] MAX_CODON = 127 def generate(random): return random_str_mod(random, grammar) def success(err): return False # let's just keep running so all runs are same length variga.GENERATE = generate variga.FITNESS = srff variga.SUCCESS = success variga.POPSIZE = 1000 variga.GENERATIONS = 40 variga.PMUT = 0.01 variga.CROSSOVER_PROB = 0.7 variga.MINLEN = 100 # ponyge uses 100 for all initialisation, no min/maxlen variga.MAXLEN = 100 variga.MAXIMISE = False
variga.GENERATIONS = 50 variga.PMUT = 0.01 variga.CROSSOVER_PROB = 0.7 variga.ELITE = 1 variga.TOURNAMENT_SIZE = 3 variga.WRAPS = 1 variga.main() def semantics(fn): return srff.get_semantics(fn) def fitness_fn(fn): return srff(fn) # srff = fitness.benchmarks()["pagie_2d"] srff = fitness.benchmarks()["vanneschi_bioavailability"] # vars = ["x", "y"] vars = ["x" + str(i) for i in range(srff.arity)] consts = ["0.1", "0.2", "0.3", "0.4", "0.5"] vars = vars + consts fns = {"+": 2, "-": 2, "*": 2, "/": 2, "sin": 1, "cos": 1, "square": 1} # fns = {"+": 2, "-": 2, "*": 2, "/": 2} pTerminal = 0.2 # used in grow algorithm if __name__ == "__main__": if sys.argv[1] == "test": test() elif sys.argv[1] == "test_grow": test_grow()
variga.MAXLEN = 100 variga.PHENOTYPE_DISTANCE = gp.tree_distance # run the fitness function as normal to get individuals' semantics variga.FITNESS = fitness_fn # but overwrite the individuals' fitness values variga.COEVOLUTIONARY_FITNESS = lambda pop: LCCB_coevo(fitness_fn, pop) if rep == "bubble_down": variga.GENERATE = lambda rng: generate_bubble_down_tree_and_fn_minn_maxn(10, 20, rng) elif rep == "grow": variga.GENERATE = generate_grow_tree_and_fn_maxd else: raise ValueError variga.MAXIMISE = False variga.SUCCESS = lambda x: False # FIXME variga.POPSIZE = 50 variga.GENERATIONS = 20 variga.PMUT = 0.01 variga.CROSSOVER_PROB = 0.7 variga.ELITE = 1 variga.TOURNAMENT_SIZE = 3 variga.WRAPS = 1 variga.main() if __name__ == "__main__": srff = fitness.benchmarks("pagie-2d") gp.set_fns_leaves(srff.arity) # run(srff) LCEB(srff, 10, 5, 2)
def read_trees_write_fitness_EuroGP2014(infile, outfile): srff = fitness.benchmarks("pagie-2d") out = open(outfile, "w") for gp_tree in file(infile): fitness_val = 1.0 / srff.get_semantics(make_fn(eval(gp_tree)))[0] out.write(str(fitness_val) + "\n")
def hillclimb(fitness_fn_key, mutation_type="optimal_ms", rt_method="grow", rt_size=3, ngens=200, popsize=1, init_popsize=1, print_every=10): """Hill-climbing optimisation. """ fitness_fn = fitness.benchmarks(fitness_fn_key) extra_fitness_fn = fitness.benchmarks(fitness_fn_key + "_class") set_fns_leaves(fitness_fn.arity) evals = 0 raw_returns = np.genfromtxt("/Users/jmmcd/Dropbox/GSGP-ideas-papers/finance/" + fitness_fn_key + ".txt").T[0][-418:] print("#generation evaluations fitness_rmse fitness_rmse_test class_acc class_acc_test returns_50 sig_50 returns_100 sig_100 returns_end sig_end best_phenotype_length best_phenotype") # Generate an initial solution and make sure it doesn't return an # error because if it does, in GSGP that error will always be present. si_out = None ft = float(sys.maxint) while si_out is None: if rt_method == "grow": s = [grow(rt_size, random) for i in range(init_popsize)] elif rt_method == "bubble_down": s = [bd.bubble_down(rt_size, random)[0] for i in range(init_popsize)] else: raise ValueError for si in s: # Evaluate child fnsi = make_fn(si) fsi, si_out = fitness_fn.get_semantics(fnsi) # Keep the child only if better if fsi < ft: t, ft, fnt = si, fsi, fnsi evals += init_popsize for gen in xrange(ngens): # make a lot of new individuals by mutation if mutation_type == "GSGP-optimal-ms": # Mutate and differentiate to get the best possibility s = [semantic_geometric_mutate_differentiate(t, fitness_fn, rt_size=rt_size, rt_method=rt_method) for i in range(popsize)] elif mutation_type == "GSGP": # ms=0.001 as in Moraglio s = [semantic_geometric_mutate(t, 0.001, rt_size=rt_size, one_tree=False, rt_method=rt_method) for i in range(popsize)] elif mutation_type == "GSGP-one-tree": # mutation step size randomly chosen s = [semantic_geometric_mutate(t, np.random.normal(), rt_size=rt_size, one_tree=True, rt_method=rt_method) for i in range(popsize)] elif mutation_type == "GP": # don't use rt_size since it's = 2. use 12, the default s = [subtree_mutate(t) for i in range(popsize)] else: raise ValueError("Unknown mutation type " + mutation_type) # test the new individuals and keep only the single best for si in s: # Evaluate child fnsi = make_fn(si) fsi, si_out = fitness_fn.get_semantics(fnsi) # Keep the child only if better if fsi < ft: t, ft, fnt = si, fsi, fnsi test_rmse, yhat_test = fitness_fn.get_semantics(fnt, test=True) evals += popsize if gen % print_every == 0: length = iter_len(traverse(t)) # This is horrible: if t is just a single variable eg x0, # then str(t) -> x0, instead of 'x0'. Hack around it. if isatom(t): str_t = "'" + t + "'" else: str_t = str(t) returns, sig_50, sig_100, sig_end = accum_returns(raw_returns, yhat_test) print("%d %d %f %f %f %f %f %d %f %d %f %d %d : %s" % ( gen, evals, ft, test_rmse, extra_fitness_fn(fnt), extra_fitness_fn.test(fnt), returns[50], sig_50, returns[100], sig_100, returns[417], sig_end, length, str_t)) print "ACCUMULATE RETURNS" for val in returns: print val