示例#1
0
def learn_synthetic(input_directory, output_directory, runs, sample_size, processes, time_out, learn_options: LearnOptions):
    commands = []

    db = get_synthetic_db(input_directory)
    for name in db.getall():
        entry = db.get(name)
        matching_samples = []
        for sample in entry["samples"]:
            if sample["sample_size"] == sample_size and len(matching_samples) < runs:
                matching_samples.append(sample)
        if len(matching_samples) != runs:
            raise RuntimeError("Insufficient samples available, prepare more samples first")

        for sample in matching_samples:
            detail_learn_options = learn_options.copy()
            detail_learn_options.domain = os.path.join(input_directory, "{}.density".format(name))
            detail_learn_options.data = os.path.join(input_directory, sample["samples_file"])
            detail_learn_options.labels = os.path.join(input_directory, sample["labels_file"])

            export_file = "{}{sep}{}.{}.{}.result" \
                .format( output_directory, name, sample_size, sample["seed"], sep=os.path.sep)
            log_file = "{}{sep}{}.{}.{}.log" \
                .format(output_directory, name, sample_size, sample["seed"], sep=os.path.sep)

            if not os.path.exists(os.path.dirname(export_file)):
                os.makedirs(os.path.dirname(export_file))

            commands.append("incal-track {} --export {} --log {}"
                            .format(detail_learn_options.print_arguments(), export_file, log_file))

    run_commands(commands, processes, time_out)
示例#2
0
 def learn_inc(_data, _labels, _i, _k, _h):
     # strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
     strategy = RandomViolationsStrategy(10)
     learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
     initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
     learner.add_observer(PlottingObserver(domain, directory, "run_{}_{}_{}".format(_i, _k, _h),
                                           domain.real_vars[0], domain.real_vars[1], None, False))
     return learner.learn(domain, _data, _labels, initial_indices)
示例#3
0
def get_experiment(res_path=None):
    def import_handler(parameters_dict, results_dict, config_dict):
        for key, entry in parameters_dict.items():
            if isinstance(entry, str):
                index = entry.find("res/")
                if index >= 0:
                    parameters_dict[key] = res_path + os.path.sep + entry[index+4:]

    config = Options()
    config.add_option("export", str)
    return Experiment(LearnOptions(), LearnResults(), config, import_handler if res_path else None)
示例#4
0
    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds,
                                    background_knowledge=bg_knowledge)
        if negative_bootstrap > 0:
            _data, _labels = OneClassStrategy.add_negatives(domain, _data, _labels, thresholds, negative_bootstrap)

        learner = KCnfSmtLearner(_k, _h, strategy, symmetry_breaking)

        random.seed(seed)        
        initial_indices = LearnOptions.initial_random(20)(list(range(len(_data))))
        res = learner.learn(domain, _data, _labels, initial_indices)
        return res
示例#5
0
 def learn_inc(_data, _labels, _i, _k, _h):
     strategy = OneClassStrategy(
         RandomViolationsStrategy(10),
         thresholds)  #, background_knowledge=(a | b) & (~a | ~b))
     learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
     initial_indices = LearnOptions.initial_random(20)(list(
         range(len(_data))))
     # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
     learner.add_observer(
         PlottingObserver(domain, "test_output/bg",
                          "run_{}_{}_{}".format(_i, _k,
                                                _h), domain.real_vars[0],
                          domain.real_vars[1], None, False))
     return learner.learn(domain, _data, _labels, initial_indices)
示例#6
0
文件: cli.py 项目: mboehme/learn2fix
def main():
    smt_lib_name = "smt-lib-benchmark"
    synthetic_name = "synthetic"
    parser = argparse.ArgumentParser(
        description="Interface with benchmark or synthetic data for experiments"
    )

    parser.add_argument("source")
    parser.add_argument("--sample_size", type=int, default=None)
    parser.add_argument("--runs", type=int, default=None)
    parser.add_argument("--input_dir", type=str, default=None)
    parser.add_argument("--output_dir", type=str, default=None)
    parser.add_argument("--processes", type=int, default=None)
    parser.add_argument("--time_out", type=int, default=None)

    task_parsers = parser.add_subparsers(dest="task")
    prepare_parser = task_parsers.add_parser("prepare")
    prepare_parser.add_argument("--reset_samples", type=bool, default=False)
    learn_parser = task_parsers.add_parser("learn")
    analyze_parser = task_parsers.add_parser("analyze")
    analyze_parser.add_argument("--dirs", nargs="+", type=str)
    analyze_parser.add_argument("--res_path", type=str, default=None)

    show_parsers = analyze_parser.add_subparsers()
    show_parser = show_parsers.add_parser("show")
    show.add_arguments(show_parser)

    learn_options = LearnOptions()
    learn_options.add_arguments(learn_parser)

    args = parser.parse_args()
    if args.task == "prepare":
        if args.source == smt_lib_name:
            prepare_smt_lib_benchmark()
            prepare_ratios()
            prepare_samples(args.runs, args.sample_size, args.reset_samples)
        elif args.source == synthetic_name:
            prepare_synthetic(args.input_dir, args.output_dir, args.runs,
                              args.sample_size)
    elif args.task == "learn":
        learn_options.parse_arguments(args)
        if args.source == smt_lib_name:
            learn_benchmark(args.runs, args.sample_size, args.processes,
                            args.time_out, learn_options)
        elif args.source == synthetic_name:
            learn_synthetic(args.input_dir, args.output_dir, args.runs,
                            args.sample_size, args.processes, args.time_out,
                            learn_options)
        elif args.source.startswith("ex"):
            example_name = args.source.split(":", 1)[1]
            domain, formula = examples.get_by_name(example_name)
            np.random.seed(1)
            from pywmi.sample import uniform
            samples = uniform(domain, args.sample_size)
            from pywmi import evaluate
            labels = evaluate(domain, formula, samples)
            learn_options.set_value("domain", domain, False)
            learn_options.set_value("data", samples, False)
            learn_options.set_value("labels", labels, False)
            (formula, k, h), duration = learn_options.call(True)
            print("[{:.2f}s] Learned formula (k={}, h={}): {}".format(
                duration, k, h, pretty_print(formula)))
    elif args.task == "analyze":
        analyze(args.dirs, args.res_path, show.parse_args(args))
示例#7
0
def learn_benchmark(runs, sample_size, processes, time_out, learn_options: LearnOptions):
    # def filter1(entry):
    #     return "real_variables_count" in entry and entry["real_variables_count"] + entry["bool_variables_count"] <= 10
    #
    # count = 0
    # boolean = 0
    # for name, entry, density_filename in select_benchmark_files(filter1):
    #     if entry["bool_variables_count"] > 0:
    #         boolean += 1
    #     count += 1
    #
    # print("{} / {}".format(boolean, count))
    #
    # count = 0
    # boolean = 0
    # for name, entry, density_filename in select_benchmark_files(benchmark_filter):
    #     if entry["bool_variables_count"] > 0:
    #         boolean += 1
    #     count += 1
    #
    # print("{} / {}".format(boolean, count))

    def learn_filter(_e):
        return benchmark_filter(_e) and "samples" in _e

    count = 0
    problems_to_learn = []
    for name, entry, density_filename in select_benchmark_files(learn_filter):
        if len(entry["bounds"]) > 0:
            best_ratio = min(rel_ratio(t[1]) for t in entry["bounds"])
            if best_ratio <= 0.3:
                qualifying = [t for t in entry["bounds"] if rel_ratio(t[1]) <= 0.3 and abs(rel_ratio(t[1]) - best_ratio) <= best_ratio / 5]
                selected = sorted(qualifying, key=lambda x: get_bound_volume(x[0]))[0]
                print(name, "\n", rel_ratio(selected[1]), best_ratio, selected[0], entry["bool_variables_count"])
                count += 1
                selected_samples = [s for s in entry["samples"]
                                    if s["bounds"] == selected[0] and s["sample_size"] >= sample_size]
                if len(selected_samples) < runs:
                    raise RuntimeError("Insufficient number of data set available ({} of {})"
                                       .format(len(selected_samples), runs))
                elif len(selected_samples) > runs:
                    selected_samples = selected_samples[:runs]
                for selected_sample in selected_samples:
                    problems_to_learn.append((name, density_filename, selected_sample))

    commands = []
    for name, density_filename, selected_sample in problems_to_learn:
        detail_learn_options = learn_options.copy()
        detail_learn_options.domain = density_filename
        detail_learn_options.data = selected_sample["samples_filename"]
        detail_learn_options.labels = selected_sample["labels_filename"]
        export_file = "{}{sep}{}.{}.{}.result".format(get_benchmark_results_dir(), name, selected_sample["sample_size"],
                                                      selected_sample["seed"], sep=os.path.sep)
        log_file = "{}{sep}{}.{}.{}.log".format(get_benchmark_results_dir(), name, selected_sample["sample_size"],
                                                      selected_sample["seed"], sep=os.path.sep)
        if not os.path.exists(os.path.dirname(export_file)):
            os.makedirs(os.path.dirname(export_file))
        commands.append("incal-track {} --export {} --log {}"
                        .format(detail_learn_options.print_arguments(), export_file, log_file))

    run_commands(commands, processes, time_out)