def iterative_pcfg(*args, **kwargs): kwargs.update({ "message": "iterative_pcfg", "onlyBaselines": False, "outputPrefix": None, "useRecognitionModel": False, "iterations": kwargs["iterations"], # XXX: should we change this? "aic": float("inf"), "pseudoCounts": 0, }) return ec.explorationCompression(*args, **kwargs)
def enumeration(*args, **kwargs): kwargs.update({ "message": "enumeration", "onlyBaselines": False, "outputPrefix": None, "useRecognitionModel": False, "iterations": 1, "aic": float("inf"), # We will be evaluating the baselines using benchmarking on the testing set # So we should just use whatever frontier size will be used for benchmarking #"frontierSize": 200000, }) return ec.explorationCompression(*args, **kwargs)
def robustfill(*args, **kwargs): kwargs.update({ "message": "robustfill", "onlyBaselines": False, "outputPrefix": None, "useRecognitionModel": True, # Trained a recognition model on a ton of iterations on only samples from an unlearned generative model "iterations": 1, "steps": 50000, "helmholtzRatio": 1.0, "pseudoCounts": 1., "aic": float("inf"), }) return ec.explorationCompression(*args, **kwargs)
tasks=tasks, bidirectional=True) if __name__ == "__main__": tasks = makeTasks() eprint("Generated", len(tasks), "tasks") test, train = testTrainSplit(tasks, 0.2) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) baseGrammar = Grammar.uniform(primitives) explorationCompression(baseGrammar, train, testingTasks=test, outputPrefix="experimentOutputs/text", evaluationTimeout=0.0005, **commandlineArguments( steps=500, iterations=10, helmholtzRatio=0.5, topK=2, maximumFrontier=2, structurePenalty=10., a=3, activation="relu", CPUs=numberOfCPUs(), featureExtractor=LearnedFeatureExtractor, pseudoCounts=10.0))
f.close() self.mean = [] if __name__ == "__main__": tasks = makeTasks() eprint("Generated", len(tasks), "tasks") test, train = testTrainSplit(tasks, 0.5) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) baseGrammar = Grammar.uniform(primitives) explorationCompression(baseGrammar, train, testingTasks=test, outputPrefix="experimentOutputs/geom", compressor="rust", evaluationTimeout=0.01, **commandlineArguments( steps=200, a=1, iterations=100, useRecognitionModel=True, helmholtzRatio=0.5, helmholtzBatch=200, featureExtractor=GeomFeatureCNN, topK=2, maximumFrontier=1000, CPUs=numberOfCPUs(), pseudoCounts=10.0))
for s in [0.1, 0.5, 1, 3]: start = time.time() losses = callCompiled(debugMany, hardTasks, clamp, lr, steps, attempts, s) losses = dict(zip(hardTasks, losses)) failures = 0 for t, l in sorted(losses.items(), key=lambda t_l: t_l[1]): # print t,l if l > -t.likelihoodThreshold: failures += 1 eprint("clamp,lr,steps, attempts,std", clamp, lr, steps, attempts, s) eprint("%d/%d failures" % (failures, len(hardTasks))) eprint("dt=", time.time() - start) eprint() eprint() assert False timestamp = datetime.datetime.now().isoformat() outputDirectory = "experimentOutputs/rational/%s" % timestamp os.system("mkdir -p %s" % outputDirectory) explorationCompression(baseGrammar, train, outputPrefix="%s/rational" % outputDirectory, evaluationTimeout=0.1, testingTasks=test, **arguments)
"compressor": "rust" }) baseGrammar = Grammar.uniform(prims()) from makeListTasks import make_list_bootstrap_tasks, bonusListProblems if not args.pop("Lucas"): train = make_list_bootstrap_tasks() #Max commented this line if args.pop("filter_task_args"): train = [t for t in train if len(t.request.functionArguments()) == 1] eprint("Total number of training tasks:",len(train)) for t in train: eprint(t.describe()) eprint() #eprint("train:",train) #eprint("train[0].features:",train[0].features) #eprint("train[0]:",train[0]) #testing stuff #eprint([t.request for t in train]) explorationCompression(baseGrammar, train, testingTasks=test, **args)
while len(circuits) < NUMBEROFTASKS * 2: inputs = sampleDistribution(inputDistribution) gates = sampleDistribution(gateDistribution) newTask = Circuit(numberOfInputs=inputs, numberOfGates=gates) if newTask not in circuits: circuits.append(newTask) eprint("Sampled %d circuits with %d unique functions" % (len(circuits), len({t.signature for t in circuits}))) tasks = [t.task() for t in circuits[:NUMBEROFTASKS]] testing = [t.task() for t in circuits[NUMBEROFTASKS:]] baseGrammar = Grammar.uniform(primitives) explorationCompression(baseGrammar, tasks, testingTasks=testing, outputPrefix="experimentOutputs/circuit", evaluationTimeout=None, **commandlineArguments( iterations=10, aic=1., structurePenalty=1, CPUs=numberOfCPUs(), featureExtractor=DeepFeatureExtractor, topK=2, maximumFrontier=100, helmholtzRatio=0.5, a=2, activation="relu", pseudoCounts=5.))
if args.pop('use_initial_lexicon'): print("Using initial lexicon for Puddleworld PyCCG learner.") pyccg_learner = WordLearner(initial_puddleworld_lex) else: pyccg_learner = WordLearner(None) learner = ECLanguageLearner(pyccg_learner, ec_ontology_translation_fn=puddleworld_ec_translation_fn, use_pyccg_enum=use_pyccg_enum, use_blind_enum=use_blind_enum) # Run Dreamcoder exploration/compression. explorationCompression(baseGrammar, allTrain, testingTasks=allTest, outputPrefix=outputDirectory, custom_wake_generative=learner.wake_generative_with_pyccg, **args) ################################################################################################### ### Checkpoint analyses. Can be safely ignored to run the PyCCG+Dreamcoder learner itself. # These are in this file because Dill is silly and requires loading from the original calling file. if checkpoint_analysis is not None: # Load the checkpoint. print("Loading checkpoint ", checkpoint_analysis) with open(checkpoint_analysis,'rb') as handle: result = dill.load(handle) recognitionModel = result.recognitionModel
S=5), makeTask("series capacitors", arrow(tlist(tpositive), tpositive), lambda cs: sum(c**(-1) for c in cs)**(-1), N=20, S=5), ] if __name__ == "__main__": baseGrammar = Grammar.uniform([ real, f0, f1, fpi, real_power, real_subtraction, real_addition, real_multiplication ]) eprint("Got %d equation discovery tasks..." % len(tasks)) explorationCompression(baseGrammar, tasks, outputPrefix="experimentOutputs/scientificLaws", evaluationTimeout=0.1, testingTasks=[], **commandlineArguments(iterations=10, CPUs=numberOfCPUs(), structurePenalty=1., helmholtzRatio=0.5, a=3, maximumFrontier=10000, topK=2, featureExtractor=None, pseudoCounts=10.0))
return e def invented(self, e): return e.body.visit(self) def abstraction(self, e): return Abstraction(e.body.visit(self)) def application(self, e): return Application(e.f.visit(self), e.x.visit(self)) def index(self, e): return e RandomParameterization.single = RandomParameterization() if __name__ == "__main__": baseGrammar = Grammar.uniform(primitives) statistics = Task.standardizeTasks(tasks) featureExtractor = makeFeatureExtractor(statistics) explorationCompression( baseGrammar, tasks, outputPrefix="experimentOutputs/continuousPolynomial", **commandlineArguments(frontierSize=10**2, iterations=5, featureExtractor=featureExtractor, pseudoCounts=10.0))
for t in train: l = t.logLikelihood(e) eprint(t, l) biggest = min(biggest,l) eprint(biggest) assert False if False: with timing("best first enumeration"): baseGrammar.bestFirstEnumeration(arrow(tint,tint)) with timing("depth first search"): print len(list(enumeration(baseGrammar, Context.EMPTY, [], arrow(tint,tint), maximumDepth = 99, upperBound = 13, lowerBound = 0))) assert False explorationCompression(baseGrammar, train, outputPrefix = "experimentOutputs/regression", evaluationTimeout = None, testingTasks = test, **commandlineArguments(frontierSize = 10**2, iterations = 10, CPUs = numberOfCPUs(), structurePenalty = 1., helmholtzRatio = 0.5, a = 1,#arity maximumFrontier = 1000, topK = 2, featureExtractor = DeepFeatureExtractor, pseudoCounts = 10.0))