示例#1
0
def read_family_db(toolbox):
    # toolbox.f.write("reading families db, please have some patience\n")
    if toolbox.update_fam_db or toolbox.analyse_best or toolbox.compute_p_cx_c0:
        print(f"reading families db in {toolbox.fam_db_file} ...")
        t0 = time.time()
    families = interpret.compile(interpret.load(toolbox.fam_db_file))
    if toolbox.update_fam_db or toolbox.analyse_best or toolbox.compute_p_cx_c0:
        print(
            f"    {round(time.time() - t0)} seconds for reading {len(families)} families"
        )
        t0 = time.time()
    for code in families:
        deap_str = interpret.convert_code_to_deap_str(code, toolbox)
        ind = gp.PrimitiveTree.from_string(deap_str, toolbox.pset)
        ind.age = 0
        ind.id = toolbox.get_unique_id()
        pp_str = make_pp_str(ind)
        evaluate_individual(toolbox, ind, pp_str, 0)
        if toolbox.clear_representatives_after_reading_family_db:
            # Prevent that the extra short DB snippets will influence the search : remove the code
            ind.fam.representative = None  # only the family NUMBER may be used
    toolbox.new_families_list = []
    if toolbox.update_fam_db or toolbox.analyse_best or toolbox.compute_p_cx_c0:
        elapsed = round(time.time() - t0)
        if elapsed > 0:
            print(
                f"    {elapsed} seconds for processing, {round(len(toolbox.families_list)/elapsed)} families/sec"
            )
    toolbox.t0 = time.time()  # discard time lost by reading in the family db
示例#2
0
def read_old_populations(toolbox, old_populations_folder, prefix):
    if toolbox.update_fam_db or toolbox.analyse_best:
        print(f"reading files in {old_populations_folder} ...")
    old_pops = []
    filenames = []
    for filename in os.listdir(old_populations_folder):
        if filename[:len(prefix)] == prefix:
            id = int(filename[len(prefix) + 1:len(prefix) + 1 + 4])
            if id // toolbox.old_populations_samplesize == toolbox.id_seed // toolbox.old_populations_samplesize:
                filenames.append(filename)
    filenames.sort()
    for filename in filenames:
        if toolbox.old_populations_samplesize != 1 or filename == f"{prefix}_{toolbox.id_seed}.txt":
            old_pop = interpret.compile(
                interpret.load(old_populations_folder + "/" + filename))
            if len(old_pop) > 0:
                old_pops.append(old_pop)
            elif toolbox.old_populations_samplesize == 1:
                toolbox.f.write(
                    "RuntimeWarning: stopped because no set covering needed, 0 evals\n"
                )
                exit()
    if toolbox.old_populations_samplesize != 1:
        if toolbox.old_populations_samplesize < len(old_pops):
            old_pops = random.sample(old_pops,
                                     k=toolbox.old_populations_samplesize)
    if toolbox.update_fam_db or toolbox.analyse_best:
        print(f"    {len(old_pops)} files with content")
    return old_pops
def main(folder_with_logfiles):
    id = "mainline"
    seed = 1000
    param_file = f"experimenten/params_{id}.txt"
    print("using param file", param_file)
    if not os.path.exists(param_file):
        exit(f"param file {param_file} does not exist")
    output_folder = f"tmp/{id}"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    with open(param_file, "r") as f:
        params = json.load(f)
    seed += params["seed_prefix"]

    params["param_file"] = param_file
    params["id"] = id
    params["output_folder"] = output_folder
    params["seed"] = seed
    params["old_populations_folder"] = folder_with_logfiles

    functions_file_name = params["functions_file"]
    problems_file_name = params["problems_file"]
    functions = interpret.get_functions(functions_file_name)
    problems = interpret.compile(interpret.load(problems_file_name))
    toolbox = find_new_function.initialise_toolbox(problems[0], functions,
                                                   sys.stdout, params)

    extract_main_line(toolbox)
def main(seed, id):
    param_file = f"experimenten/params_{id}.txt"
    if not os.path.exists(param_file):
        exit(f"param file {param_file} does not exist")
    output_folder = f"tmp/{id}"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    with open(param_file, "r") as f:
        params = json.load(f)
    seed += params["seed_prefix"]
    skip_seeds = params["skip_seeds"]
    if seed in skip_seeds:
        exit()
    os.system(f"rm -f {output_folder}/end_{seed}.txt")
    log_file = f"{output_folder}/log_{seed}.txt"
    if params.get("do_not_overwrite_logfile", False):
        if os.path.exists(log_file):
            exit(0)
    params["param_file"] = param_file
    params["id"] = id
    params["output_folder"] = output_folder
    params["seed"] = seed

    if False:
        with open(f"{output_folder}/params.txt", "w") as f:
            # write a copy to the output folder
            json.dump(params, f, sort_keys=True, indent=4)

    if params["use_one_random_seed"]:
        random.seed(seed)
    else:
        del params["seed"]
        params["seed2"] = seed
        params["random_seed"] = params["seed_prefix"]
        params["id_seed"] = seed
        random.seed(params["seed_prefix"])
    with open(f"{output_folder}/log_{seed}.txt", "w") as log_file:
        if hasattr(log_file, "reconfigure"):
            log_file.reconfigure(line_buffering=True)
        functions_file_name = params["functions_file"]
        problems_file_name = params["problems_file"]
        functions = interpret.get_functions(functions_file_name)
        problems = interpret.compile(interpret.load(problems_file_name))
        solved_all = solve_problems(problems,
                                    functions,
                                    log_file,
                                    params,
                                    append_functions_to_file=None)
        log_file.write("done\n")
        if params["touch_at_end"]:
            os.system(f"touch {output_folder}/end_{seed}.txt")
        return 0 if solved_all else 1
def main(param_file):
    '''Build layers'''
    if param_file[:len("experimenten/params_")] != "experimenten/params_" or param_file[-len(".txt"):] != ".txt":
        exit("param file must have format 'experimenten/params_id.txt'")
    id = param_file[len("experimenten/params_"):-len(".txt")]
    output_folder = f"tmp/{id}"
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    with open(param_file, "r") as f:
        params = json.load(f)
    functions_file_name = params["functions_file"]
    inputs_file_name = params["inputs_file"]
    verbose = params["verbose"]
    language_subset = params["language_subset"]
    start_level = params["start_level"]
    max_depth = params["max_depth"]
    with open(f"{output_folder}/params.txt", "w") as f:
        # write a copy to the output folder
        json.dump(params, f, sort_keys=True, indent=4)

    log_file = f"{output_folder}/log.txt"
    with open(log_file, "w") as log_file:
        if hasattr(log_file, "reconfigure"):
            log_file.reconfigure(line_buffering=True)
        all_functions = interpret.get_functions(functions_file_name)
        input_chunks = interpret.compile(interpret.load(inputs_file_name))
        example_inputs = []
        for inputs_chunk, evaluations_chunk in input_chunks:
            example_inputs += inputs_chunk
        layer_builder = LayerBuilder(example_inputs, log_file, verbose, language_subset)
        for layer_level in range(start_level, start_level+1):
            if verbose >= 0:
                print(f"L{layer_level}")
            new_functions, usage = layer_builder.build_layer(max_depth=max_depth, old_functions=all_functions, layer_level=layer_level)
            if len(new_functions) == 0:
                if verbose >= 0:
                    print(f"L{layer_level} no new functions found")
                return 1
            write_layer(f"{output_folder}/L{layer_level}D3.txt", new_functions)
            write_family_size(f"{output_folder}/L{layer_level}D3_family_size.txt", new_functions)
            write_usage(f"{output_folder}/L{layer_level}D3_usage.txt", usage)
            for fname, params, code, _ in new_functions:
                interpret.add_function(["function", fname, params, code], all_functions)
            solved_all = compute_solved_all(input_chunks, all_functions, log_file, verbose)
        return 0 if solved_all else 1
示例#6
0
def main(functions_file_name, inputs_file_name):
    functions = interpret.get_functions(functions_file_name)
    input_chunks = interpret.compile(interpret.load(inputs_file_name))
    compute_solved_all(input_chunks, functions)
示例#7
0
def analyse_vastlopers_via_best_files_no_family_db(toolbox):
    print(f"analysis starts, reading files ...")
    families = dict()
    file_count = 0
    for filename in os.listdir(toolbox.old_populations_folder):
        if filename.startswith("best"):
            best_list = interpret.compile(
                interpret.load(toolbox.old_populations_folder + "/" +
                               filename))
            if len(best_list) > 0:
                file_count += 1
                assert len(best_list) == 1
                code = best_list[0]
                deap_str = interpret.convert_code_to_deap_str(code, toolbox)
                ind = gp.PrimitiveTree.from_string(deap_str, toolbox.pset)
                ind.age = 0
                ind.id = toolbox.get_unique_id()
                pp_str = make_pp_str(ind)
                evaluate_individual(toolbox, ind, pp_str, 0)
                ind.model_outputs = cpp_coupling.run_on_all_inputs(
                    toolbox.cpp_handle, ind)
                key = ind.fam.raw_error
                if key not in families:
                    families[key] = ([], ind.fam.raw_error,
                                     ind.fam.raw_error_matrix,
                                     ind.model_outputs)
                id_seed = int(filename[5:9])
                families[key][0].append(id_seed)
    families = [(raw_error, raw_error_matrix, seeds, outputs)
                for key, (seeds, raw_error, raw_error_matrix,
                          outputs) in families.items()]
    families.sort(key=lambda item: -item[0])
    filename = f"{toolbox.output_folder}/analysis.txt"
    print(f"writing anaysis result of {file_count} files in {filename} ...")
    with open(filename, "w") as f:
        elem = toolbox.example_inputs[-1][0]
        data = toolbox.example_inputs[-1][1]
        f.write(
            f"{'error':7} count  {'error_matrix':127} example:merge({data},{elem})\n"
        )
        sum_count = 0
        for raw_error, raw_error_matrix, seeds, model_outputs in families:
            msg = ""
            for i in range(raw_error_matrix.shape[0]):
                msg += "|"
                for j in range(raw_error_matrix.shape[1]):
                    x = round(raw_error_matrix[i, j])
                    if x == 0:
                        msg += "."
                    elif x - 10 > 25:
                        msg += "*"
                    elif x >= 10:
                        msg += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[x - 10]
                    else:
                        msg += str(x)
            f.write(
                f"{raw_error:7.3f} {len(seeds):5d}  {msg}| {str(model_outputs[-1])}\n"
            )
            sum_count += len(seeds)
        f.write(f"{' ':7} {sum_count:5}\n")
    filename = f"{toolbox.output_folder}/sorted_seeds.txt"
    print(f"writing sorted seeds in {filename} ...")
    with open(filename, "w") as f:
        for raw_error, raw_error_matrix, seeds, _outputs in families:
            f.write(f"{raw_error:7.3f} {len(seeds):5d}")
            seeds.sort()
            for id_seed in seeds:
                f.write(f" {id_seed}")
            f.write(f"\n")
    print(f"anaysis done")