def ocamlInduce(g, frontiers, _=None, topK=1, pseudoCounts=1.0, aic=1.0, structurePenalty=0.001, a=0, CPUs=1, bs=1000000, topI=300): # This is a dirty hack! # Memory consumption increases with the number of CPUs # And early on we have a lot of stuff to compress # If this is the first iteration, only use a fraction of the available CPUs topK = 5 topI = 600 if all(not p.isInvented for p in g.primitives): if a > 3: CPUs = max(1, int(CPUs / 6)) else: CPUs = max(1, int(CPUs / 3)) else: CPUs = max(1, int(CPUs / 2)) CPUs = 2 # X X X FIXME X X X # for unknown reasons doing compression all in one go works correctly and doing it with Python and the outer loop causes problems iterations = 99 # maximum number of components to add at once while True: g0 = g originalFrontiers = frontiers t2f = {f.task: f for f in frontiers} frontiers = [f for f in frontiers if not f.empty] message = { "arity": a, "topK": topK, "pseudoCounts": float(pseudoCounts), "aic": aic, "bs": bs, "topI": topI, "structurePenalty": float(structurePenalty), "CPUs": CPUs, "DSL": g.json(), "iterations": iterations, "frontiers": [f.json() for f in frontiers] } message = json.dumps(message) if True: timestamp = datetime.datetime.now().isoformat() os.system("mkdir -p compressionMessages") fn = "compressionMessages/%s" % timestamp with open(fn, "w") as f: f.write(message) eprint("Compression message saved to:", fn) try: # Get relative path compressor_file = os.path.join(get_root_dir(), 'compression') process = subprocess.Popen(compressor_file, stdin=subprocess.PIPE, stdout=subprocess.PIPE) response, error = process.communicate( bytes(message, encoding="utf-8")) response = json.loads(response.decode("utf-8")) except OSError as exc: raise exc g = response["DSL"] g = Grammar(g["logVariable"], [(l, p.infer(), p) for production in g["productions"] for l in [production["logProbability"]] for p in [Program.parse(production["expression"])]], continuationType=g0.continuationType) frontiers = { original.task: Frontier([ FrontierEntry(p, logLikelihood=e["logLikelihood"], logPrior=g.logLikelihood(original.task.request, p)) for e in new["programs"] for p in [Program.parse(e["program"])] ], task=original.task) for original, new in zip(frontiers, response["frontiers"]) } frontiers = [ frontiers.get(f.task, t2f[f.task]) for f in originalFrontiers ] if iterations == 1 and len(g) > len(g0): eprint("Grammar changed - running another round of consolidation.") continue else: eprint("Finished consolidation.") return g, frontiers
def main(args): """ Takes the return value of the `commandlineArguments()` function as input and trains/tests the model on manipulating sequences of numbers. """ random.seed(args.pop("random_seed")) tasks = make_list_bootstrap_tasks() print(tasks) maxTasks = args.pop("maxTasks") if maxTasks and len(tasks) > maxTasks: eprint("Unwilling to handle {} tasks, truncating..".format(len(tasks))) random.shuffle(tasks) del tasks[maxTasks:] primitives = McCarthyPrimitives() from dreamcoder.program import Program, Invented # plus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0))))))))))") # plus = Invented(plus) # primitives.append(plus) # minus = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 ($2 (decr0 $1) (decr0 $0)))))))))") # minus = Invented(minus) # primitives.append(minus) # times = Program.parse("(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 0 (#(lambda (lambda (fix2 $1 $0 (lambda (lambda (lambda (if0 $0 $1 (incr ($2 $1 (decr0 $0)))))))))) $1 ($2 (decr0 $0) $1)))))))))") # times = Invented(times) # primitives.append(times) baseGrammar = Grammar.uniform(primitives) baseGrammar = Grammar( 0.0, [(5.0 if p.name.startswith('fix') else 0.0, p.infer(), p) for p in primitives]) extractor = { "learned": LearnedFeatureExtractor, }[args.pop("extractor")] extractor.H = args.pop("hidden") timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/list/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) args.update({ "featureExtractor": extractor, "outputPrefix": "%s/list" % outputDirectory, "evaluationTimeout": 0.0005, }) eprint("Got {} list tasks".format(len(tasks))) split = args.pop("split") if split: train_some = defaultdict(list) for t in tasks: # necessary = train_necessary(t) # if not necessary: # continue # if necessary == "some": # train_some[t.name.split()[0]].append(t) # else: t.mustTrain = True # for k in sorted(train_some): # ts = train_some[k] # random.shuffle(ts) # ts.pop().mustTrain = True test, train = testTrainSplit(tasks, split) eprint("Alotted {} tasks for training and {} for testing".format( len(train), len(test))) else: train = tasks test = [] result = explorationCompression(baseGrammar, train, testingTasks=test, **args) print([x.bestPosterior for x in result.taskSolutions.values()])