def induceGrammar(*args, **kwargs): if sum(not f.empty for f in args[1]) == 0: eprint("No nonempty frontiers, exiting grammar induction early.") return args[0], args[1] with timing("Induced a grammar"): backend = kwargs.pop("backend", "pypy") if backend == "pypy": g, newFrontiers = callCompiled(pypyInduce, *args, **kwargs) elif backend == "rust": g, newFrontiers = rustInduce(*args, **kwargs) elif backend == "vs": g, newFrontiers = rustInduce(*args, vs=True, **kwargs) elif backend == "pypy_vs": kwargs.pop('iteration') kwargs.pop('topk_use_only_likelihood') fn = '/tmp/vs.pickle' with open(fn, 'wb') as handle: pickle.dump((args, kwargs), handle) eprint( "For debugging purposes, the version space compression invocation has been saved to", fn) g, newFrontiers = callCompiled(induceGrammar_Beta, *args, **kwargs) elif backend == "ocaml": kwargs.pop('iteration') kwargs.pop('topk_use_only_likelihood') kwargs['topI'] = 300 kwargs['bs'] = 1000000 g, newFrontiers = ocamlInduce(*args, **kwargs) elif backend == "memorize": g, newFrontiers = memorizeInduce(*args, **kwargs) else: assert False, "unknown compressor" return g, newFrontiers
def induceGrammar(*args, **kwargs): if sum(not f.empty for f in args[1]) == 0: eprint("No nonempty frontiers, exiting grammar induction early.") return args[0], args[1] backend = kwargs.pop("backend", "pypy") if 'pypy' in backend: # pypy might not like some of the imports needed for the primitives # but the primitive values are irrelevant for compression # therefore strip them out and then replace them once we are done # ditto for task data g0,frontiers = args[0].strip_primitive_values(), \ [front.strip_primitive_values() for front in args[1]] original_tasks = {f.task.name: f.task for f in frontiers} frontiers = [Frontier(f.entries, Task(f.task.name,f.task.request,[])) for f in frontiers ] args = [g0,frontiers] with timing("Induced a grammar"): if backend == "pypy": g, newFrontiers = callCompiled(pypyInduce, *args, **kwargs) elif backend == "rust": g, newFrontiers = rustInduce(*args, **kwargs) elif backend == "vs": g, newFrontiers = rustInduce(*args, vs=True, **kwargs) elif backend == "pypy_vs": kwargs.pop('iteration') kwargs.pop('topk_use_only_likelihood') fn = '/tmp/vs.pickle' with open(fn, 'wb') as handle: pickle.dump((args, kwargs), handle) eprint("For debugging purposes, the version space compression invocation has been saved to", fn) g, newFrontiers = callCompiled(induceGrammar_Beta, *args, **kwargs) elif backend == "ocaml": kwargs.pop('iteration') kwargs.pop('topk_use_only_likelihood') kwargs['topI'] = 300 kwargs['bs'] = 1000000 g, newFrontiers = ocamlInduce(*args, **kwargs) elif backend == "memorize": g, newFrontiers = memorizeInduce(*args, **kwargs) else: assert False, "unknown compressor" if 'pypy' in backend: g, newFrontiers = g.unstrip_primitive_values(), \ [front.unstrip_primitive_values() for front in newFrontiers] newFrontiers = [Frontier(f.entries, original_tasks[f.task.name]) for f in newFrontiers] return g, newFrontiers
def get(): results = [p.get() for p in promises] frontiers = [] with timing("(Helmholtz enumeration) Decoded json into frontiers"): for request, result in zip(requests, results): response = json.loads(result.decode("utf-8")) for b, entry in enumerate(response): frontiers.append( Frontier([ FrontierEntry(program=Program.parse(p), logPrior=entry["ll"], logLikelihood=0.) for p in entry["programs"] ], task=Task(str(b), request, []))) eprint("Total number of Helmholtz frontiers:", len(frontiers)) return frontiers