示例#1
0
def main():
    domain, formula, name = checker_problem()
    thresholds = {v: 0.1 for v in domain.real_vars}
    data = uniform(domain, 1000)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(RandomViolationsStrategy(10), thresholds)
        learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
        initial_indices = LearnOptions.initial_random(20)(list(
            range(len(_data))))
        # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
        learner.add_observer(
            PlottingObserver(domain, "test_output/checker",
                             "run_{}_{}_{}".format(_i, _k,
                                                   _h), domain.real_vars[0],
                             domain.real_vars[1], None, False))
        return learner.learn(domain, _data, _labels, initial_indices)

    (new_data, new_labels,
     formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1,
                                      None, None)
    print("Learned CNF(k={}, h={}) formula {}".format(k, h,
                                                      pretty_print(formula)))
    print("Data-set grew from {} to {} entries".format(len(labels),
                                                       len(new_labels)))
示例#2
0
def background_knowledge_example():
    domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)])
    a, b, x, y = domain.get_symbols(domain.variables)
    formula = (a | b) & (~a | ~b) & (x >= 0) & (x <= y) & (y <= 1)
    thresholds = {v: 0.1 for v in domain.real_vars}
    data = uniform(domain, 10000)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(
            RandomViolationsStrategy(10),
            thresholds)  #, background_knowledge=(a | b) & (~a | ~b))
        learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
        initial_indices = LearnOptions.initial_random(20)(list(
            range(len(_data))))
        # learner.add_observer(LoggingObserver(None, _k, _h, None, True))
        learner.add_observer(
            PlottingObserver(domain, "test_output/bg",
                             "run_{}_{}_{}".format(_i, _k,
                                                   _h), domain.real_vars[0],
                             domain.real_vars[1], None, False))
        return learner.learn(domain, _data, _labels, initial_indices)

    (new_data, new_labels,
     formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1, 1,
                                      None, None)
    print("Learned CNF(k={}, h={}) formula {}".format(k, h,
                                                      pretty_print(formula)))
    print("Data-set grew from {} to {} entries".format(len(labels),
                                                       len(new_labels)))
    def compute_probabilities(self,
                              queries,
                              sample_count=None,
                              add_bounds=False):
        sample_count = sample_count if sample_count is not None else self.sample_count
        samples = uniform(self.domain, sample_count, rand_gen=self.rand_gen)
        labels = evaluate(self.domain, self.support, samples)
        positive_samples = samples[labels]

        results = []
        if self.weight is not None:
            sample_weights = evaluate(self.domain, self.weight,
                                      positive_samples)
            total = sum(sample_weights)
            for query in queries:
                if total > 0:
                    query_labels = numpy.logical_and(
                        evaluate(self.domain, query, positive_samples),
                        labels[labels])
                    results.append(sum(sample_weights[query_labels]) / total)
                else:
                    results.append(None)
        else:
            total = positive_samples.shape[0]
            for query in queries:
                if total > 0:
                    query_labels = numpy.logical_and(
                        evaluate(self.domain, query, positive_samples),
                        labels[labels])
                    results.append(sum(query_labels) / total)
                else:
                    results.append(None)

        return results
示例#4
0
    def get_volume(self, desired_samples=None, total_raw=None, query=None):
        # if desired_samples is not None and total_raw is None:
        #     return self.get_volume(desired_samples, self.get_volume())

        if self.empty:
            return 0

        if self.is_leaf:
            # raw_volume = self.accepted_count() / len(self.labels) * (self.volume / self.builder.volume)
            if desired_samples is not None:
                if total_raw == 0:
                    required_samples = 0
                else:
                    # required_samples = int(math.ceil(desired_samples * raw_volume / total_raw - len(self.samples)))
                    required_samples = int(
                        math.ceil(desired_samples - len(self.samples)))
                # print("Required: " + str(required_samples))
                if required_samples > 0:
                    new_samples = uniform(self.domain,
                                          required_samples,
                                          rand_gen=self.rand_gen)
                    new_labels = self.builder.oracle.check(new_samples)
                    self.samples = np.concatenate([self.samples, new_samples])
                    self.labels = np.concatenate([self.labels, new_labels])
                # return self.accepted_count() / len(self.labels) * (self.volume / self.builder.volume)
            # else:
            #     return raw_volume
            return self.accepted_count() / len(
                self.labels) * (self.volume / self.builder.volume)
        else:
            return sum(
                node.get_volume(desired_samples=desired_samples,
                                total_raw=total_raw) for node in self.children)
def test_boolean():
    domain = Domain.make(["a", "b", "c"])
    sample_count = 10
    data = sample.uniform(domain, sample_count)
    assert len(data) == sample_count
    for i in range(sample_count):
        for j in range(3):
            assert data[i, j] == 0 or data[i, j] == 1
def test_real():
    domain = Domain.make([], ["x", "y"], [(-1, 1), (2, 10)])
    sample_count = 10
    data = sample.uniform(domain, sample_count)
    assert len(data) == sample_count
    for i in range(sample_count):
        assert -1 <= data[i, 0] <= 1
        assert 2 <= data[i, 1] <= 10
def _test_plot_data():
    domain = Domain.make(["a"], ["x", "y"], [(0, 1), (0, 1)])
    a, x, y = domain.get_symbols(["a", "x", "y"])
    formula = a | (~a & (x <= y))
    data = uniform(domain, 100)
    labels = evaluate(domain, formula, data)
    mpl.use('Agg')
    plot_data(None, domain, (data, labels))
    assert True
    def integrate(self, domain, convex_bounds: List[LinearInequality],
                  polynomial: Polynomial):
        formula = smt.And(*[i.to_smt() for i in convex_bounds])

        if self.bounding_box > 0:
            if self.bounding_box == 1:
                a_matrix = numpy.zeros(
                    (len(convex_bounds), len(domain.real_vars)))
                b_matrix = numpy.zeros((len(convex_bounds), ))
                for i, bound in enumerate(convex_bounds):
                    for j in range(len(domain.real_vars)):
                        a_matrix[i, j] = bound.a(domain.real_vars[j])
                    b_matrix[i] = bound.b()

                lb_ub_bounds = {}
                c = numpy.zeros((len(domain.real_vars), ))
                for j in range(len(domain.real_vars)):
                    c[j] = 1
                    # noinspection PyTypeChecker
                    lb = scipy.optimize.linprog(c, a_matrix, b_matrix).x[j]
                    # noinspection PyTypeChecker
                    ub = scipy.optimize.linprog(-c, a_matrix, b_matrix).x[j]
                    c[j] = 0
                    lb_ub_bounds[domain.real_vars[j]] = (lb, ub)
            elif self.bounding_box == 2:
                samples = uniform(domain,
                                  self.sample_count,
                                  rand_gen=self.rand_gen)
                labels = evaluate(domain, formula, samples)
                samples = samples[labels == 1]

                try:
                    samples.sort(axis=0)
                    std = abs(samples[0:-1, :] - samples[1:, :]).std(axis=0)
                    lbs = samples[0, :] - std
                    ubs = samples[-1, :] + std
                except ValueError:
                    return 0

                lb_ub_bounds = {
                    domain.variables[j]: (lbs[j], ubs[j])
                    for j in range(len(domain.variables))
                }
            else:
                raise ValueError("Illegal bounding box value {}".format(
                    self.bounding_box))
            domain = Domain(domain.variables, domain.var_types, lb_ub_bounds)

        engine = RejectionEngine(domain,
                                 formula,
                                 polynomial.to_smt(),
                                 self.sample_count,
                                 seed=self.seed)
        result = engine.compute_volume()
        if self.bounding_box:
            result = result
        return result
示例#9
0
def negative_samples_example(background_knowledge):
    domain = Domain.make(["a", "b"], ["x", "y"], [(0, 1), (0, 1)])
    a, b, x, y = domain.get_symbols(domain.variables)
    formula = (a | b) & (~a | ~b) & (x <= y) & domain.get_bounds()
    background_knowledge = (a | b) & (~a
                                      | ~b) if background_knowledge else None
    thresholds = {"x": 0.1, "y": 0.2}
    data = uniform(domain, 10000)
    labels = evaluate(domain, formula, data)
    data = data[labels == 1]
    labels = labels[labels == 1]
    original_sample_count = len(labels)

    start_time = time.time()

    data, labels = OneClassStrategy.add_negatives(domain, data, labels,
                                                  thresholds, 100,
                                                  background_knowledge)
    print("Created {} negative examples".format(
        len(labels) - original_sample_count))

    directory = "test_output{}bg_sampled{}{}".format(
        os.path.sep, os.path.sep, time.strftime("%Y-%m-%d %Hh%Mm%Ss"))

    def learn_inc(_data, _labels, _i, _k, _h):
        strategy = OneClassStrategy(RandomViolationsStrategy(10),
                                    thresholds,
                                    background_knowledge=background_knowledge)
        learner = KCnfSmtLearner(_k, _h, strategy, "mvn")
        initial_indices = LearnOptions.initial_random(20)(list(
            range(len(_data))))
        learner.add_observer(
            PlottingObserver(domain, directory,
                             "run_{}_{}_{}".format(_i, _k,
                                                   _h), domain.real_vars[0],
                             domain.real_vars[1], None, False))
        return learner.learn(domain, _data, _labels, initial_indices)

    (new_data, new_labels,
     learned_formula), k, h = learn_bottom_up(data, labels, learn_inc, 1, 1, 1,
                                              1, None, None)
    if background_knowledge:
        learned_formula = learned_formula & background_knowledge

    duration = time.time() - start_time

    print("{}".format(smt_to_nested(learned_formula)))
    print("Learned CNF(k={}, h={}) formula {}".format(
        k, h, pretty_print(learned_formula)))
    print("Data-set grew from {} to {} entries".format(len(labels),
                                                       len(new_labels)))
    print("Learning took {:.2f}s".format(duration))

    test_data, labels = OneClassStrategy.add_negatives(domain, data, labels,
                                                       thresholds, 1000,
                                                       background_knowledge)
    assert all(evaluate(domain, learned_formula, test_data) == labels)
示例#10
0
def get_problem_samples(domain, support, sample_count, max_ratio):
    minimal_count = sample_count * min(max_ratio, 1 - max_ratio)
    samples = uniform(domain, sample_count)
    labels = evaluate(domain, support, samples)
    positive_count = sum(labels)
    if positive_count < minimal_count or (sample_count -
                                          positive_count) < minimal_count:
        raise InsufficientBalanceError()

    return samples, labels
示例#11
0
def generate_half_space_sample(domain, real_count):
    samples = uniform(domain, real_count)
    coefficients, offset = Learner.fit_hyperplane(domain, samples)
    coefficients = [
        smt.Real(float(coefficients[i][0])) *
        domain.get_symbol(domain.real_vars[i]) for i in range(real_count)
    ]
    if random.random() < 0.5:
        return smt.Plus(*coefficients) <= offset
    else:
        return smt.Plus(*coefficients) >= offset
示例#12
0
def prepare_ratios():
    sample_count = 1000
    bounds_pool = [(-1, 1), (-10, 10), (-100, 100), (-1000, 1000)]
    ratios = dict()
    for name, entry, density_filename in select_benchmark_files(
            lambda e: "bounds" not in e and benchmark_filter(e)):
        print("Finding ratios for {}".format(name))
        pysmt.environment.push_env()
        pysmt.environment.get_env().enable_infix_notation = True

        density = Density.import_from(density_filename)
        domain = density.domain

        result_bounds = []
        result_ratios = []
        for bounds in itertools.product(
                *[bounds_pool for _ in range(len(domain.real_vars))]):
            var_bounds = dict(zip(domain.real_vars, bounds))
            restricted_domain = Domain(domain.variables, domain.var_types,
                                       var_bounds)
            samples = uniform(restricted_domain, sample_count)
            labels = evaluate(restricted_domain, density.support, samples)
            positive_count = sum(labels)
            if 0 < positive_count < sample_count:
                ratio = positive_count / sample_count
                result_bounds.append(var_bounds)
                result_ratios.append(ratio)

        ratios[name] = list(zip(result_bounds, result_ratios))
        print(name, result_ratios)

        pysmt.environment.pop_env()

    with open(get_summary_file(), "rb") as summary_file_reference:
        summary = pickle.load(summary_file_reference)

    for name, bounds in ratios.items():
        summary[name]["bounds"] = bounds

    with open(get_summary_file(), "wb") as summary_file_reference:
        pickle.dump(summary, summary_file_reference)
def test_mixed():
    domain = Domain(["a", "x", "b", "y", "c"], {
        "a": smt.BOOL,
        "x": smt.REAL,
        "b": smt.BOOL,
        "y": smt.REAL,
        "c": smt.BOOL
    }, {
        "x": (-1, 1),
        "y": (2, 10)
    })
    sample_count = 10
    data = sample.uniform(domain, sample_count)
    assert len(data) == sample_count
    for i in range(sample_count):
        assert len(data[i, :]) == len(domain.variables)
        assert data[i, 0] == 0 or data[i, 0] == 1
        assert -1 <= data[i, 1] <= 1
        assert data[i, 2] == 0 or data[i, 2] == 1
        assert 2 <= data[i, 3] <= 10
        assert data[i, 4] == 0 or data[i, 4] == 1
    def compute_volume(self,
                       sample_count=None,
                       add_bounds=False,
                       ohe_variables=None):
        sample_count = sample_count if sample_count is not None else self.sample_count
        samples = uniform(
            self.domain,
            sample_count,
            rand_gen=self.rand_gen,
            ohe_variables=ohe_variables,
        )
        labels = evaluate(self.domain, self.support, samples)

        if ohe_variables is None:
            bound_volume = (self.domain.get_volume()
                            if len(self.domain.real_vars) > 0 else 2**len(
                                self.domain.bool_vars))
        else:
            ohevars = {x for ohe in ohe_variables for x in ohe}
            bound_volume = 2**len(
                [v for v in self.domain.bool_vars if v not in ohevars])
            for ohe in ohe_variables:
                bound_volume *= len(ohe)

            real_volume = self.domain.get_bounding_box_volume()
            if real_volume != 0:
                bound_volume *= real_volume

        approx_volume = bound_volume * sum(labels) / len(labels)

        if self.weight is not None:
            pos_samples = samples[labels]
            sample_weights = evaluate(self.domain, self.weight, pos_samples)
            try:
                return sum(
                    sample_weights) / pos_samples.shape[0] * approx_volume
            except ZeroDivisionError:
                return 0.0
        else:
            return approx_volume
示例#15
0
    def add_negatives(domain,
                      data,
                      labels,
                      thresholds,
                      sample_count,
                      background_knowledge=None,
                      distance_measure=None):
        # type: (Domain, np.ndarray, np.ndarray, Dict, int, FNode, Any) -> Tuple[np.ndarray, np.ndarray]

        new_data = uniform(domain, sample_count)
        background_knowledge = background_knowledge or TRUE()
        supported_indices = evaluate(domain, background_knowledge, new_data)
        boolean_indices = [
            i for i, v in enumerate(domain.variables) if domain.is_bool(v)
        ]
        real_indices = [
            i for i, v in enumerate(domain.variables) if domain.is_real(v)
        ]
        for j in range(new_data.shape[0]):
            valid_negative = True
            for i in range(data.shape[0]):
                # noinspection PyTypeChecker
                if labels[i] and all(
                        data[i, boolean_indices] == new_data[j,
                                                             boolean_indices]):
                    in_range = True
                    for ri, v in zip(real_indices, domain.real_vars):
                        t = thresholds[v] if isinstance(
                            thresholds, dict) else thresholds[i, ri]
                        if abs(data[i, ri] - new_data[j, ri]) > t:
                            in_range = False
                            break
                    valid_negative = valid_negative and (not in_range)
                    if not valid_negative:
                        break
            supported_indices[j] = supported_indices[j] and valid_negative
        new_data = new_data[supported_indices == 1, :]
        return np.concatenate([data, new_data], axis=0), np.concatenate(
            [labels, np.zeros(new_data.shape[0])])
示例#16
0
 def get_samples(self):
     return uniform(self.domain, self.sample_count)
示例#17
0
文件: cli.py 项目: mboehme/learn2fix
def main():
    smt_lib_name = "smt-lib-benchmark"
    synthetic_name = "synthetic"
    parser = argparse.ArgumentParser(
        description="Interface with benchmark or synthetic data for experiments"
    )

    parser.add_argument("source")
    parser.add_argument("--sample_size", type=int, default=None)
    parser.add_argument("--runs", type=int, default=None)
    parser.add_argument("--input_dir", type=str, default=None)
    parser.add_argument("--output_dir", type=str, default=None)
    parser.add_argument("--processes", type=int, default=None)
    parser.add_argument("--time_out", type=int, default=None)

    task_parsers = parser.add_subparsers(dest="task")
    prepare_parser = task_parsers.add_parser("prepare")
    prepare_parser.add_argument("--reset_samples", type=bool, default=False)
    learn_parser = task_parsers.add_parser("learn")
    analyze_parser = task_parsers.add_parser("analyze")
    analyze_parser.add_argument("--dirs", nargs="+", type=str)
    analyze_parser.add_argument("--res_path", type=str, default=None)

    show_parsers = analyze_parser.add_subparsers()
    show_parser = show_parsers.add_parser("show")
    show.add_arguments(show_parser)

    learn_options = LearnOptions()
    learn_options.add_arguments(learn_parser)

    args = parser.parse_args()
    if args.task == "prepare":
        if args.source == smt_lib_name:
            prepare_smt_lib_benchmark()
            prepare_ratios()
            prepare_samples(args.runs, args.sample_size, args.reset_samples)
        elif args.source == synthetic_name:
            prepare_synthetic(args.input_dir, args.output_dir, args.runs,
                              args.sample_size)
    elif args.task == "learn":
        learn_options.parse_arguments(args)
        if args.source == smt_lib_name:
            learn_benchmark(args.runs, args.sample_size, args.processes,
                            args.time_out, learn_options)
        elif args.source == synthetic_name:
            learn_synthetic(args.input_dir, args.output_dir, args.runs,
                            args.sample_size, args.processes, args.time_out,
                            learn_options)
        elif args.source.startswith("ex"):
            example_name = args.source.split(":", 1)[1]
            domain, formula = examples.get_by_name(example_name)
            np.random.seed(1)
            from pywmi.sample import uniform
            samples = uniform(domain, args.sample_size)
            from pywmi import evaluate
            labels = evaluate(domain, formula, samples)
            learn_options.set_value("domain", domain, False)
            learn_options.set_value("data", samples, False)
            learn_options.set_value("labels", labels, False)
            (formula, k, h), duration = learn_options.call(True)
            print("[{:.2f}s] Learned formula (k={}, h={}): {}".format(
                duration, k, h, pretty_print(formula)))
    elif args.task == "analyze":
        analyze(args.dirs, args.res_path, show.parse_args(args))
示例#18
0
def prepare_samples(n, sample_size, reset):
    samples_dir = get_benchmark_samples_dir()

    seeds = [random.randint(0, 2**32 - 1) for _ in range(n)]
    samples_dict = dict()

    def sample_filter(_entry):
        if "bounds" in _entry and benchmark_filter(_entry):
            if "samples" not in _entry["samples"]:
                return True
            else:
                return reset or any(
                    len([
                        s for s in _entry["samples"] if s["sample_size"] ==
                        sample_size and s["bounds"] == _bounds[0]
                    ]) < n for _bounds in _entry["bounds"]
                    if 0.2 <= _bounds[1] <= 0.8)
        return False

    for name, entry, filename in select_benchmark_files(sample_filter):
        print("Creating samples for {}".format(name))
        pysmt.environment.push_env()
        pysmt.environment.get_env().enable_infix_notation = True

        density = Density.import_from(filename)
        samples_dict[name] = [] if reset else entry.get("samples", [])

        for i, (bounds, ratio) in enumerate(entry["bounds"]):
            if not (0.2 <= ratio <= 0.8):
                continue

            print(i, bounds, ratio)
            previous_samples = [] if reset else ([
                s for s in entry.get("samples", [])
                if s["sample_size"] == sample_size and s["bounds"] == bounds
            ])
            bounded_domain = Domain(density.domain.variables,
                                    density.domain.var_types, bounds)

            for j in range(n - len(previous_samples)):
                seed = seeds[j]
                samples_filename = "{}{}{}.{}.{}.{}.sample.npy".format(
                    samples_dir, os.path.sep, name, sample_size, seed, i)
                labels_filename = "{}{}{}.{}.{}.{}.labels.npy".format(
                    samples_dir, os.path.sep, name, sample_size, seed, i)

                if not os.path.exists(os.path.dirname(samples_filename)):
                    os.makedirs(os.path.dirname(samples_filename))

                random.seed(seed)
                np.random.seed(seed)
                samples = uniform(bounded_domain, sample_size)
                labels = evaluate(bounded_domain, density.support, samples)
                np.save(samples_filename, samples)
                np.save(labels_filename, labels)

                samples_dict[name].append({
                    "bounds": bounds,
                    "seed": seed,
                    "samples_filename": samples_filename,
                    "labels_filename": labels_filename,
                    "sample_size": sample_size
                })

        pysmt.environment.pop_env()

    def edit(summary):
        for _n, _s in samples_dict.items():
            summary[_n]["samples"] = _s

    edit_summary(edit)
示例#19
0
from inspect import signature

import numpy as np

from smtlearn.examples import ice_cream_problem
from pywmi.plot import plot_data, plot_formula
from pywmi.sample import uniform
from pywmi.smt_check import evaluate
import random
from smtlearn.violations.core import RandomViolationsStrategy
from smtlearn.k_cnf_smt_learner import KCnfSmtLearner
from pywmi.smt_print import pretty_print

random.seed(666)
np.random.seed(666)

domain, formula, name = ice_cream_problem()
# plot_formula(None, domain, formula)

data = uniform(domain, 100)
labels = evaluate(domain, formula, data)

learner = KCnfSmtLearner(3, 3, RandomViolationsStrategy(10))
initial_indices = random.sample(range(data.shape[0]), 20)

learned_theory = learner.learn(domain, data, labels, initial_indices)
print(pretty_print(learned_theory))
示例#20
0
    def build_tree(self, bounds=None, volume=None, depth=0):
        """
        Builds a sampling tree
        :param Tuple bounds: The list of bounds (bound = ((lb, closed?), (ub, closed?)))
        :param float volume: The bounds volume
        :param int depth: The depth of the current tree
        :return Node: The tree
        """

        if bounds is None:
            bounds = self.bounds
            domain = self.domain
        else:
            domain = self.domain.change_bounds({
                v: (t[0][0], t[1][0])
                for v, t in zip(self.domain.real_vars, bounds)
            })

        if volume is None:
            volume = self.get_volume(bounds)

        samples = uniform(domain, self.sample_count, rand_gen=self.rand_gen)
        labels = self.oracle.check(samples)

        accepted_count = sum(labels)
        # print("Ratio is: {} (bounds={})".format(accepted_count / self.sample_count, bounds))
        if self.stopping_f(accepted_count / self.sample_count,
                           volume / self.volume, depth):
            if accepted_count / self.sample_count >= 0.5:
                pass  # print("Stopping because sufficient samples ({} / {}) with volume={}".format(accepted_count, self.sample_count, volume))
            else:
                pass  # print("Stopping because insufficient volume ({})".format(volume))
            return Node(samples, labels, volume, self, bounds, False,
                        self.rand_gen)  # Sufficiently full region

        if accepted_count > 0 or self.oracle.get_accepted_sample() is not None:
            split = None
            score = None
            for i in range(len(bounds)):
                lb, ub = bounds[i][0][0], bounds[i][1][0]
                split_value = lb + (ub - lb) / 2
                if accepted_count < self.sample_count:
                    split_score = self.scoring_f(samples, labels, i,
                                                 split_value)
                else:
                    split_score = ub - lb
                if score is None or split_score > score:
                    split = (i, split_value)
                    score = split_score

            # print("Splitting on {} <= {} (volume={})".format(split[0], split[1], volume))
            bounds_1 = tuple(b if i != split[0] else (b[0], (split[1], True))
                             for i, b in enumerate(bounds))
            self.oracle.add_split(split, True)
            child_1 = self.build_tree(bounds_1, volume / 2, depth + 1)

            self.oracle.remove_last_split()
            bounds_2 = tuple(b if i != split[0] else ((split[1], False), b[1])
                             for i, b in enumerate(bounds))
            self.oracle.add_split(split, False)
            child_2 = self.build_tree(bounds_2, volume / 2, depth + 1)

            # print("Done splitting on {} <= {} (volume={})".format(split[0], split[1], volume))
            return Node(samples, labels, volume, self, bounds, False,
                        self.rand_gen, split,
                        (child_1, child_2))  # Splitting region

        # print("Stopping because no samples, volume={}".format(volume))
        return Node(samples, labels, volume, self, bounds, True,
                    self.rand_gen)  # Empty region
示例#21
0
def prepare_synthetic(input_directory, output_directory, runs, sample_size):
    seeds = [random.randint(0, 2**32 - 1) for _ in range(runs)]

    db = get_synthetic_db(output_directory, True)
    os.makedirs(output_directory)
    for filename in glob.glob("{}/**/synthetics*.txt".format(input_directory),
                              recursive=True):
        pysmt.environment.push_env()
        pysmt.environment.get_env().enable_infix_notation = True
        with open(filename) as file_reference:
            flat = json.load(file_reference)

        name = flat["synthetic_problem"]["problem"]["name"]
        print(name)

        if not db.exists(name):
            domain = import_domain(
                flat["synthetic_problem"]["problem"]["domain"])
            formula = nested_to_smt(
                flat["synthetic_problem"]["problem"]["theory"])
            Density(domain, formula, smt.Real(1.0)).export_to(
                os.path.join(output_directory, "{}.density".format(name)))
            entry = {
                "domain": export_domain(domain),
                "generation": {
                    "h": flat["synthetic_problem"]["half_space_count"],
                    "k": flat["synthetic_problem"]["formula_count"],
                    "l": flat["synthetic_problem"]["terms_per_formula"],
                    "structure": flat["synthetic_problem"]["cnf_or_dnf"],
                },
                "formula": smt_to_nested(formula),
                "samples": []
            }
        else:
            entry = dict(db.get(name))
            domain = import_domain(entry["domain"])
            formula = import_domain(entry["domain"])

        samples = entry.get("samples", [])
        matching_samples = []
        for sample in samples:
            if sample["sample_size"] == sample_size:
                matching_samples.append(sample)

        for i in range(runs - len(matching_samples)):
            seed = seeds[len(matching_samples) + i]
            samples_file = "{}.{}.{}.samples.npy".format(
                name, sample_size, seed)
            labels_file = "{}.{}.{}.labels.npy".format(name, sample_size, seed)
            np.random.seed(seed)
            data = uniform(domain, sample_size)
            np.save(os.path.join(output_directory, samples_file), data)
            labels = evaluate(domain, formula, data)
            np.save(os.path.join(output_directory, labels_file), labels)
            samples.append({
                "sample_size": sample_size,
                "seed": seed,
                "samples_file": samples_file,
                "labels_file": labels_file
            })

        entry["samples"] = samples
        db.set(name, entry)

        pysmt.environment.pop_env()