def main(filename, sample_count): seed = time.time() random.seed(seed) target_formula = smt.read_smtlib(filename) variables = target_formula.get_free_variables() var_names = [str(v) for v in variables] var_types = {str(v): v.symbol_type() for v in variables} var_domains = {str(v): (0, 200) for v in variables} # TODO This is a hack domain = problem.Domain(var_names, var_types, var_domains) name = basename(filename).split(".")[0] target_problem = problem.Problem(domain, target_formula, name) # compute_difference(domain, target_formula, target_formula) samples = generator.get_problem_samples(target_problem, sample_count, 1) initial_indices = random.sample(list(range(sample_count)), 20) learner = KCnfSmtLearner(3, 3, RandomViolationsStrategy(5)) dir_name = "../output/{}".format(name) img_name = "{}_{}_{}".format(learner.name, sample_count, seed) # learner.add_observer(plotting.PlottingObserver(data_set.samples, dir_name, img_name, "r0", "r1")) with open("log.txt", "w") as f: learner.add_observer(inc_logging.LoggingObserver(f)) print(parse.smt_to_nested(learner.learn(domain, samples, initial_indices)))
def learn_inc(_data, _k, _h): violations_strategy = RandomViolationsStrategy(violations_size) learner = KCnfSmtLearner(_k, _h, violations_strategy) initial_indices = random.sample(list(range(len(data))), initial_size) log_file = os.path.join(log_dir, "{}_{}_{}.txt".format(problem_name, _k, _h)) learner.add_observer(inc_logging.LoggingObserver(log_file, seed, True, violations_strategy)) learned_theory = learner.learn(domain, data, initial_indices) # learned_theory = Or(*[And(*planes) for planes in hyperplane_dnf]) print("Learned theory:\n{}".format(parse.smt_to_nested(learned_theory))) return learned_theory
def learn_f(_data, _k, _h): selection_strategy = incremental_config.get_selection_strategy() if bias == "cnf": learner = KCnfSmtLearner(_k, _h, selection_strategy) elif bias == "dnf": learner = KDnfSmtLearner(_k, _h, selection_strategy) initial_indices = incremental_config.get_initial_indices() log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, _k, _h) learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy)) return learner.learn(domain, data, initial_indices)
def learn_inc(_data, i, _k, _h): learner = KCnfSmtLearner(_k, _h, RandomViolationsStrategy(10)) dir_name = "../output/{}".format(problem.name) img_name = "{}_{}_{}_{}_{}_{}".format(learner.name, i, _k, _h, len(data), seed) learner.add_observer( plotting.PlottingObserver(problem.domain, data, dir_name, img_name, feat_x, feat_y)) initial_indices = random.sample(list(range(len(data))), 20) learned_theory = learner.learn(problem.domain, data, initial_indices) print("Learned theory:\n{}".format(pretty_print(learned_theory))) return learned_theory
def learn_inc(_data, i, _k, _h): learner = KCnfSmtLearner( _k, _h, OneClassStrategy(RandomViolationsStrategy(1), thresholds)) dir_name = "../output/{}_one_class".format(problem.name) img_name = "{}_{}_{}_{}_{}_{}".format(learner.name, i, _k, _h, len(data), seed) learner.add_observer( plotting.PlottingObserver(problem.domain, data, dir_name, img_name, feat_x, feat_y)) # Filter boolean versions # learner.add_observer(plotting.PlottingObserver(data, dir_name + "/week", img_name, "chocolate", "banana", # condition=lambda instance, _l: not instance["weekend"])) # learner.add_observer(plotting.PlottingObserver(data, dir_name + "/weekend", img_name, "chocolate", "banana", # condition=lambda instance, _l: instance["weekend"])) initial_indices = random.sample(list(range(len(data))), 20) learned_theory = learner.learn(problem.domain, data, initial_indices) # learned_theory = Or(*[And(*planes) for planes in hyperplane_dnf]) print("Learned theory:\n{}".format(pretty_print(learned_theory))) return learned_theory
def learn_synthetic(input_dir, prefix, results_dir, bias, incremental_config, plot=None, sample_count=None, time_out=None, parameter_free=False): input_dir = os.path.abspath(input_dir) data_sets = list(import_synthetic_data_files(input_dir, prefix)) if not os.path.exists(results_dir): os.makedirs(results_dir) overview = os.path.join(results_dir, "problems.txt") if not os.path.isfile(overview): flat = {} else: with open(overview, "r") as f: flat = json.load(f) for data_set in data_sets: synthetic_problem = data_set.synthetic_problem data = data_set.samples name = synthetic_problem.theory_problem.name domain = synthetic_problem.theory_problem.domain if name not in flat: flat[name] = {} print(name) seed = hash(time.time()) random.seed(seed) if sample_count is not None and sample_count < len(data): data = random.sample(data, sample_count) else: sample_count = len(data) incremental_config.set_data(data) incremental_config.domain = domain if not parameter_free: initial_indices = incremental_config.get_initial_indices() h = synthetic_problem.half_space_count k = synthetic_problem.formula_count if bias == "cnf" or bias == "dnf": selection_strategy = incremental_config.get_selection_strategy() if bias == "cnf": learner = KCnfSmtLearner(k, h, selection_strategy) elif bias == "dnf": learner = KDnfSmtLearner(k, h, selection_strategy) if plot is not None and plot and synthetic_problem.bool_count == 0 and synthetic_problem.real_count == 2: import plotting feats = domain.real_vars plots_dir = os.path.join(results_dir, name) exp_id = "{}_{}_{}".format(learner.name, sample_count, seed) learner.add_observer(plotting.PlottingObserver(data, plots_dir, exp_id, *feats)) log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, k, h) learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy)) else: raise RuntimeError("Unknown bias {}".format(bias)) result = timeout(learner.learn, [domain, data, initial_indices], duration=time_out) else: def learn_f(_data, _k, _h): selection_strategy = incremental_config.get_selection_strategy() if bias == "cnf": learner = KCnfSmtLearner(_k, _h, selection_strategy) elif bias == "dnf": learner = KDnfSmtLearner(_k, _h, selection_strategy) initial_indices = incremental_config.get_initial_indices() log_file = "{}_{}_{}_{}_{}.learning_log.txt".format(name, sample_count, seed, _k, _h) learner.add_observer(LoggingObserver(os.path.join(results_dir, log_file), seed, True, selection_strategy)) return learner.learn(domain, data, initial_indices) result, k, h = learn_bottom_up(data, learn_f, 3, 1) if result is None: flat[name][sample_count] = {"k": k, "h": h, "seed": seed, "bias": bias, "time_out": True} else: flat[name][sample_count] = {"k": k, "h": h, "seed": seed, "bias": bias, "time_out": False} if time_out is not None: flat[name][sample_count]["time_limit"] = time_out with open(overview, "w") as f: json.dump(flat, f)
def main(): n = 1000 seed = 65 random.seed(65) # problem = simple_univariate_problem() # problem = simple_checker_problem() # problem = simple_checker_problem_cnf() # problem = shared_hyperplane_problem() problem = checker_problem() # problem = cross_problem() # problem = bool_xor_problem() # data = [ # ({"x": Real(0.1), "y": Real(0.1)}, True), # ({"x": Real(0.9), "y": Real(0.9)}, True), # ({"x": Real(0.1), "y": Real(0.9)}, False), # ({"x": Real(0.9), "y": Real(0.1)}, False), # ] # problem = ice_cream_problem() sample_time_start = time.time() data = sample(problem, n, seed=seed) sample_time_elapsed = time.time() - sample_time_start print("Computing samples took {:.2f}s".format(sample_time_elapsed)) # learn_parameter_free(problem, data, seed) learn_one_class(problem, data, seed) exit() start = time.time() border_indices = random.sample(list(range(len(data))), 20) # border_indices = find_border_points_fast(problem.domain, data) # border_finished_time = time.time() - start # print("Computing border points took {:.2f}s".format(border_finished_time)) # border_indices = list(range(len(data))) border_points_name = "../output/{}_{}_{}".format(problem.name, len(data), seed) # draw_border_points("x", "y", data, border_indices, border_points_name) # data_subset = [data[i] for i in sorted(list(border_indices))] # draw_points("x", "y", data, "data") # learner = OCTLearner(1, 3, 0) # learner = KDNFLearner(2, 2, 0, negated_hyperplanes=True) # learner = KDNFLearner(2, 8, 0, negated_hyperplanes=False) # learner = GreedyLogicDNFLearner(2, 2) # learner = KDnfSmtLearner(8, 2) # learner = GreedyMaxRuleLearner(8) # learner = GreedyMilpRuleLearner(4, 4) learner = KCnfSmtLearner(5, 5, RandomViolationsStrategy(20)) # if isinstance(learner, KDNFLearner): # learner_name = "dnf" # elif isinstance(learner, GreedyLogicDNFLearner): # learner_name = "dnf_logic_greedy" # elif isinstance(learner, KDNFLogicLearner): # learner_name = "dnf_logic" # elif isinstance(learner, GreedyMaxRuleLearner): # learner_name = "dnf_greedy_one_rule" # elif isinstance(learner, KDnfSmtLearner): # learner_name = "dnf_smt" # else: # learner_name = "dt" dir_name = "../output/{}".format(problem.name) img_name = "{}_{}_{}".format(learner.name, len(data), seed) learner.add_observer( plotting.PlottingObserver(data, dir_name, img_name, "x", "y")) domain = problem.domain learned_theory = learner.learn(domain, data, border_indices) # learned_theory = Or(*[And(*planes) for planes in hyperplane_dnf]) print("Learned theory:\n{}".format(parse.smt_to_nested(learned_theory)))