def sygusCompetition(checkpoints, tasks): from pathos.multiprocessing import Pool import datetime # map from task to list of search times, one for each checkpoint. # search time will be None if it is not solved searchTimes = {t: [] for t in tasks} CPUs = int(8 / len(checkpoints)) maxWorkers = int(numberOfCPUs() / CPUs) workers = Pool(maxWorkers) eprint( f"You gave me {len(checkpoints)} checkpoints to ensemble. Each checkpoint will get {CPUs} CPUs. Creating a pool of {maxWorkers} worker processes." ) timeout = 3600 promises = [] for t in tasks: for checkpoint in checkpoints: promise = workers.apply_async(competeOnOneTask, (checkpoint, t), { "CPUs": CPUs, "timeout": timeout }) promises.append(promise) eprint(f"Queued {len(promises)} jobs.") for promise in promises: dt, task = promise.get() if dt is not None: searchTimes[task].append(dt) searchTimes = { t: min(ts) if len(ts) > 0 else None for t, ts in searchTimes.items() } fn = "experimentOutputs/text_competition_%s.p" % ( datetime.datetime.now().isoformat()) with open(fn, "wb") as handle: pickle.dump(searchTimes, handle) eprint() hits = sum(t is not None for t in searchTimes.values()) total = len(searchTimes) percentage = 100 * hits / total eprint("Hits %d/%d = %f\n" % (hits, total, percentage)) eprint() eprint("Exported competition results to", fn)
tasks=tasks, bidirectional=True) if __name__ == "__main__": tasks = makeTasks() eprint("Generated", len(tasks), "tasks") test, train = testTrainSplit(tasks, 0.2) eprint("Split tasks into %d/%d test/train" % (len(test), len(train))) baseGrammar = Grammar.uniform(primitives) explorationCompression(baseGrammar, train, testingTasks=test, outputPrefix="experimentOutputs/text", evaluationTimeout=0.0005, **commandlineArguments( steps=500, iterations=10, helmholtzRatio=0.5, topK=2, maximumFrontier=2, structurePenalty=10., a=3, activation="relu", CPUs=numberOfCPUs(), featureExtractor=LearnedFeatureExtractor, pseudoCounts=10.0))
def __init__(self, data, window, bank=None, UG=None, numberOfCPUs=None, maximumNumberOfRules=7, fixedMorphology=None, maximumRadius=3, problemName=None, globalTimeout=None): UnderlyingProblem.__init__(self, data, problemName=problemName, bank=bank, UG=UG, fixedMorphology=fixedMorphology) self.numberOfCPUs = numberOfCPUs if numberOfCPUs is not None else \ int(math.ceil(utilities.numberOfCPUs()*0.75)) self.maximumNumberOfRules = maximumNumberOfRules self.maximumRadius = maximumRadius totalNumberOfWords = sum(x is not None for i in self.data for x in i) wordsPerDataPoint = float(totalNumberOfWords) / len(self.data) if window is None: window = self.guessWindow() self.windowSize = window if self.problemName in Problem.named and \ Problem.named[problemName].parameters is not None and \ "fixedMorphologyThreshold" in Problem.named[problemName].parameters: self.fixedMorphologyThreshold = Problem.named[ problemName].parameters["fixedMorphologyThreshold"] print "Using custom fixed morphology threshold of", self.fixedMorphologyThreshold elif wordsPerDataPoint >= 12: self.fixedMorphologyThreshold = 1 else: self.fixedMorphologyThreshold = 3 # Map from inflection_index to Maybe (prefix, suffix, count) self.morphologyHistory = [ None for _ in xrange(self.numberOfInflections) ] self.fixedUnderlyingFormThreshold = 3 # Map from (surface1, ..., surface_I) to ur self.fixedUnderlyingForms = {} # Map from (surface1, ..., surface_I) to (ur, count) self.underlyingFormHistory = {} # After we have seen a rule be around for at least this many # times in a row we keep it forever self.ruleFreezingThreshold = 10 self.frozenRules = set([]) # Map from rule to how many times in a row we have seen it lately self.ruleHistory = {} self.pervasiveTimeout = 2 * 60 * 60 # let's not try and run the solver more than 2h self.globalTimeout = globalTimeout