示例#1
0
def ocamlInduce(g,
                frontiers,
                _=None,
                topK=1,
                pseudoCounts=1.0,
                aic=1.0,
                structurePenalty=0.001,
                a=0,
                CPUs=1,
                bs=1000000,
                topI=300):
    # This is a dirty hack!
    # Memory consumption increases with the number of CPUs
    # And early on we have a lot of stuff to compress
    # If this is the first iteration, only use a fraction of the available CPUs
    topK = 5
    topI = 600
    if all(not p.isInvented for p in g.primitives):
        if a > 3:
            CPUs = max(1, int(CPUs / 6))
        else:
            CPUs = max(1, int(CPUs / 3))
    else:
        CPUs = max(1, int(CPUs / 2))
    CPUs = 2

    # X X X FIXME X X X
    # for unknown reasons doing compression all in one go works correctly and doing it with Python and the outer loop causes problems
    iterations = 99  # maximum number of components to add at once

    while True:
        g0 = g

        originalFrontiers = frontiers
        t2f = {f.task: f for f in frontiers}
        frontiers = [f for f in frontiers if not f.empty]
        message = {
            "arity": a,
            "topK": topK,
            "pseudoCounts": float(pseudoCounts),
            "aic": aic,
            "bs": bs,
            "topI": topI,
            "structurePenalty": float(structurePenalty),
            "CPUs": CPUs,
            "DSL": g.json(),
            "iterations": iterations,
            "frontiers": [f.json() for f in frontiers]
        }

        message = json.dumps(message)
        if True:
            timestamp = datetime.datetime.now().isoformat()
            os.system("mkdir  -p compressionMessages")
            fn = "compressionMessages/%s" % timestamp
            with open(fn, "w") as f:
                f.write(message)
            eprint("Compression message saved to:", fn)

        try:
            # Get relative path
            compressor_file = os.path.join(get_root_dir(), 'compression')
            process = subprocess.Popen(compressor_file,
                                       stdin=subprocess.PIPE,
                                       stdout=subprocess.PIPE)
            response, error = process.communicate(
                bytes(message, encoding="utf-8"))
            response = json.loads(response.decode("utf-8"))
        except OSError as exc:
            raise exc

        g = response["DSL"]
        g = Grammar(g["logVariable"],
                    [(l, p.infer(), p) for production in g["productions"]
                     for l in [production["logProbability"]]
                     for p in [Program.parse(production["expression"])]],
                    continuationType=g0.continuationType)

        frontiers = {
            original.task: Frontier([
                FrontierEntry(p,
                              logLikelihood=e["logLikelihood"],
                              logPrior=g.logLikelihood(original.task.request,
                                                       p))
                for e in new["programs"]
                for p in [Program.parse(e["program"])]
            ],
                                    task=original.task)
            for original, new in zip(frontiers, response["frontiers"])
        }
        frontiers = [
            frontiers.get(f.task, t2f[f.task]) for f in originalFrontiers
        ]
        if iterations == 1 and len(g) > len(g0):
            eprint("Grammar changed - running another round of consolidation.")
            continue
        else:
            eprint("Finished consolidation.")
            return g, frontiers
示例#2
0
def main(args):
    """
    Takes the return value of the `commandlineArguments()` function as input and
    trains/tests the model on manipulating sequences of numbers.
    """
    random.seed(args.pop("random_seed"))

    tasks = make_list_bootstrap_tasks()
    print(tasks)
    maxTasks = args.pop("maxTasks")
    if maxTasks and len(tasks) > maxTasks:

        eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks)))
        random.shuffle(tasks)
        del tasks[maxTasks:]

    primitives = McCarthyPrimitives()
    from dreamcoder.program import Program, Invented
    # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))")
    # plus = Invented(plus)
    # primitives.append(plus)
    # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))")
    # minus = Invented(minus)
    # primitives.append(minus)
    # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))")
    # times = Invented(times)
    # primitives.append(times)
    baseGrammar = Grammar.uniform(primitives)
    baseGrammar = Grammar(
        0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p)
              for p in primitives])

    extractor = {
        "learned": LearnedFeatureExtractor,
    }[args.pop("extractor")]
    extractor.H = args.pop("hidden")

    timestamp = datetime.datetime.now().isoformat()
    outputDirectory = "experimentOutputs/list/%s" % timestamp
    os.system("mkdir -p %s" % outputDirectory)

    args.update({
        "featureExtractor": extractor,
        "outputPrefix": "%s/list" % outputDirectory,
        "evaluationTimeout": 0.0005,
    })

    eprint("Got {} list tasks".format(len(tasks)))
    split = args.pop("split")
    if split:
        train_some = defaultdict(list)
        for t in tasks:
            # necessary = train_necessary(t)
            # if not necessary:
            #     continue
            # if necessary == "some":
            # train_some[t.name.split()[0]].append(t)
            # else:
            t.mustTrain = True
        # for k in sorted(train_some):
        #     ts = train_some[k]
        #     random.shuffle(ts)
        #     ts.pop().mustTrain = True

        test, train = testTrainSplit(tasks, split)

        eprint("Alotted {} tasks for training and {} for testing".format(
            len(train), len(test)))
    else:
        train = tasks
        test = []

    result = explorationCompression(baseGrammar,
                                    train,
                                    testingTasks=test,
                                    **args)
    print([x.bestPosterior for x in result.taskSolutions.values()])