def read_family_db(toolbox): # toolbox.f.write("reading families db, please have some patience\n") if toolbox.update_fam_db or toolbox.analyse_best or toolbox.compute_p_cx_c0: print(f"reading families db in {toolbox.fam_db_file} ...") t0 = time.time() families = interpret.compile(interpret.load(toolbox.fam_db_file)) if toolbox.update_fam_db or toolbox.analyse_best or toolbox.compute_p_cx_c0: print( f" {round(time.time() - t0)} seconds for reading {len(families)} families" ) t0 = time.time() for code in families: deap_str = interpret.convert_code_to_deap_str(code, toolbox) ind = gp.PrimitiveTree.from_string(deap_str, toolbox.pset) ind.age = 0 ind.id = toolbox.get_unique_id() pp_str = make_pp_str(ind) evaluate_individual(toolbox, ind, pp_str, 0) if toolbox.clear_representatives_after_reading_family_db: # Prevent that the extra short DB snippets will influence the search : remove the code ind.fam.representative = None # only the family NUMBER may be used toolbox.new_families_list = [] if toolbox.update_fam_db or toolbox.analyse_best or toolbox.compute_p_cx_c0: elapsed = round(time.time() - t0) if elapsed > 0: print( f" {elapsed} seconds for processing, {round(len(toolbox.families_list)/elapsed)} families/sec" ) toolbox.t0 = time.time() # discard time lost by reading in the family db
def read_old_populations(toolbox, old_populations_folder, prefix): if toolbox.update_fam_db or toolbox.analyse_best: print(f"reading files in {old_populations_folder} ...") old_pops = [] filenames = [] for filename in os.listdir(old_populations_folder): if filename[:len(prefix)] == prefix: id = int(filename[len(prefix) + 1:len(prefix) + 1 + 4]) if id // toolbox.old_populations_samplesize == toolbox.id_seed // toolbox.old_populations_samplesize: filenames.append(filename) filenames.sort() for filename in filenames: if toolbox.old_populations_samplesize != 1 or filename == f"{prefix}_{toolbox.id_seed}.txt": old_pop = interpret.compile( interpret.load(old_populations_folder + "/" + filename)) if len(old_pop) > 0: old_pops.append(old_pop) elif toolbox.old_populations_samplesize == 1: toolbox.f.write( "RuntimeWarning: stopped because no set covering needed, 0 evals\n" ) exit() if toolbox.old_populations_samplesize != 1: if toolbox.old_populations_samplesize < len(old_pops): old_pops = random.sample(old_pops, k=toolbox.old_populations_samplesize) if toolbox.update_fam_db or toolbox.analyse_best: print(f" {len(old_pops)} files with content") return old_pops
def main(folder_with_logfiles): id = "mainline" seed = 1000 param_file = f"experimenten/params_{id}.txt" print("using param file", param_file) if not os.path.exists(param_file): exit(f"param file {param_file} does not exist") output_folder = f"tmp/{id}" if not os.path.exists(output_folder): os.makedirs(output_folder) with open(param_file, "r") as f: params = json.load(f) seed += params["seed_prefix"] params["param_file"] = param_file params["id"] = id params["output_folder"] = output_folder params["seed"] = seed params["old_populations_folder"] = folder_with_logfiles functions_file_name = params["functions_file"] problems_file_name = params["problems_file"] functions = interpret.get_functions(functions_file_name) problems = interpret.compile(interpret.load(problems_file_name)) toolbox = find_new_function.initialise_toolbox(problems[0], functions, sys.stdout, params) extract_main_line(toolbox)
def main(seed, id): param_file = f"experimenten/params_{id}.txt" if not os.path.exists(param_file): exit(f"param file {param_file} does not exist") output_folder = f"tmp/{id}" if not os.path.exists(output_folder): os.makedirs(output_folder) with open(param_file, "r") as f: params = json.load(f) seed += params["seed_prefix"] skip_seeds = params["skip_seeds"] if seed in skip_seeds: exit() os.system(f"rm -f {output_folder}/end_{seed}.txt") log_file = f"{output_folder}/log_{seed}.txt" if params.get("do_not_overwrite_logfile", False): if os.path.exists(log_file): exit(0) params["param_file"] = param_file params["id"] = id params["output_folder"] = output_folder params["seed"] = seed if False: with open(f"{output_folder}/params.txt", "w") as f: # write a copy to the output folder json.dump(params, f, sort_keys=True, indent=4) if params["use_one_random_seed"]: random.seed(seed) else: del params["seed"] params["seed2"] = seed params["random_seed"] = params["seed_prefix"] params["id_seed"] = seed random.seed(params["seed_prefix"]) with open(f"{output_folder}/log_{seed}.txt", "w") as log_file: if hasattr(log_file, "reconfigure"): log_file.reconfigure(line_buffering=True) functions_file_name = params["functions_file"] problems_file_name = params["problems_file"] functions = interpret.get_functions(functions_file_name) problems = interpret.compile(interpret.load(problems_file_name)) solved_all = solve_problems(problems, functions, log_file, params, append_functions_to_file=None) log_file.write("done\n") if params["touch_at_end"]: os.system(f"touch {output_folder}/end_{seed}.txt") return 0 if solved_all else 1
def main(param_file): '''Build layers''' if param_file[:len("experimenten/params_")] != "experimenten/params_" or param_file[-len(".txt"):] != ".txt": exit("param file must have format 'experimenten/params_id.txt'") id = param_file[len("experimenten/params_"):-len(".txt")] output_folder = f"tmp/{id}" if not os.path.exists(output_folder): os.makedirs(output_folder) with open(param_file, "r") as f: params = json.load(f) functions_file_name = params["functions_file"] inputs_file_name = params["inputs_file"] verbose = params["verbose"] language_subset = params["language_subset"] start_level = params["start_level"] max_depth = params["max_depth"] with open(f"{output_folder}/params.txt", "w") as f: # write a copy to the output folder json.dump(params, f, sort_keys=True, indent=4) log_file = f"{output_folder}/log.txt" with open(log_file, "w") as log_file: if hasattr(log_file, "reconfigure"): log_file.reconfigure(line_buffering=True) all_functions = interpret.get_functions(functions_file_name) input_chunks = interpret.compile(interpret.load(inputs_file_name)) example_inputs = [] for inputs_chunk, evaluations_chunk in input_chunks: example_inputs += inputs_chunk layer_builder = LayerBuilder(example_inputs, log_file, verbose, language_subset) for layer_level in range(start_level, start_level+1): if verbose >= 0: print(f"L{layer_level}") new_functions, usage = layer_builder.build_layer(max_depth=max_depth, old_functions=all_functions, layer_level=layer_level) if len(new_functions) == 0: if verbose >= 0: print(f"L{layer_level} no new functions found") return 1 write_layer(f"{output_folder}/L{layer_level}D3.txt", new_functions) write_family_size(f"{output_folder}/L{layer_level}D3_family_size.txt", new_functions) write_usage(f"{output_folder}/L{layer_level}D3_usage.txt", usage) for fname, params, code, _ in new_functions: interpret.add_function(["function", fname, params, code], all_functions) solved_all = compute_solved_all(input_chunks, all_functions, log_file, verbose) return 0 if solved_all else 1
def main(functions_file_name, inputs_file_name): functions = interpret.get_functions(functions_file_name) input_chunks = interpret.compile(interpret.load(inputs_file_name)) compute_solved_all(input_chunks, functions)
def analyse_vastlopers_via_best_files_no_family_db(toolbox): print(f"analysis starts, reading files ...") families = dict() file_count = 0 for filename in os.listdir(toolbox.old_populations_folder): if filename.startswith("best"): best_list = interpret.compile( interpret.load(toolbox.old_populations_folder + "/" + filename)) if len(best_list) > 0: file_count += 1 assert len(best_list) == 1 code = best_list[0] deap_str = interpret.convert_code_to_deap_str(code, toolbox) ind = gp.PrimitiveTree.from_string(deap_str, toolbox.pset) ind.age = 0 ind.id = toolbox.get_unique_id() pp_str = make_pp_str(ind) evaluate_individual(toolbox, ind, pp_str, 0) ind.model_outputs = cpp_coupling.run_on_all_inputs( toolbox.cpp_handle, ind) key = ind.fam.raw_error if key not in families: families[key] = ([], ind.fam.raw_error, ind.fam.raw_error_matrix, ind.model_outputs) id_seed = int(filename[5:9]) families[key][0].append(id_seed) families = [(raw_error, raw_error_matrix, seeds, outputs) for key, (seeds, raw_error, raw_error_matrix, outputs) in families.items()] families.sort(key=lambda item: -item[0]) filename = f"{toolbox.output_folder}/analysis.txt" print(f"writing anaysis result of {file_count} files in {filename} ...") with open(filename, "w") as f: elem = toolbox.example_inputs[-1][0] data = toolbox.example_inputs[-1][1] f.write( f"{'error':7} count {'error_matrix':127} example:merge({data},{elem})\n" ) sum_count = 0 for raw_error, raw_error_matrix, seeds, model_outputs in families: msg = "" for i in range(raw_error_matrix.shape[0]): msg += "|" for j in range(raw_error_matrix.shape[1]): x = round(raw_error_matrix[i, j]) if x == 0: msg += "." elif x - 10 > 25: msg += "*" elif x >= 10: msg += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[x - 10] else: msg += str(x) f.write( f"{raw_error:7.3f} {len(seeds):5d} {msg}| {str(model_outputs[-1])}\n" ) sum_count += len(seeds) f.write(f"{' ':7} {sum_count:5}\n") filename = f"{toolbox.output_folder}/sorted_seeds.txt" print(f"writing sorted seeds in {filename} ...") with open(filename, "w") as f: for raw_error, raw_error_matrix, seeds, _outputs in families: f.write(f"{raw_error:7.3f} {len(seeds):5d}") seeds.sort() for id_seed in seeds: f.write(f" {id_seed}") f.write(f"\n") print(f"anaysis done")