def computeOneNode(self, node): """push 'node' one step further in its computation requirements self.intermediates_[node] either contains a list of values to be computed or an execution context """ if self.intermediates_[node] is None: context = self.grabContext() #the intermediates can either be None or #an execution context. in this case, since its a list #we have not even started computation yet, so we need to create #an ExecutionContext and begin computing with self.contextEnterer(context): context.resetInterruptState() if isinstance(node, tuple): with freestoreLock: #this operation may be copying values in the freestore as we're #updating them, so we need to do it under a lock context.placeInEvaluationStateWithoutRenamingMutableVectors(*node) context.resume() elif isinstance(node, FORANative.PausedComputation): context.resumePausedComputation(node) context.resetInterruptState() context.resume() else: assert False, "don't know what to do with node of type %s" % node self.intermediates_[node] = context elif isinstance(self.intermediates_[node], FORANative.ExecutionContext): #this was a cacherequest node, and if we're here, we filled them #all out context = self.intermediates_[node] req = context.getCacheRequest() if CacheSemantics.isVectorCacheLoadRequest(req): with self.contextEnterer(context): context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(), ImplValContainer_() ) ) elif CacheSemantics.isCacheRequestWithResult(req): result = CacheSemantics.getCacheRequestComputationResult(req) with self.contextEnterer(context): context.resetInterruptState() context.resume(result) else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] res = [] exception = None for t in cacheCalls: assert t in self.finishedValues_, ( "Couldn't find result for: %s in %s" % (t,"\n".join([str(x) for x in self.finishedValues_.keys()])) ) if self.finishedValues_[t].isException(): if exception is None: exception = self.finishedValues_[t] else: res.append(self.finishedValues_[t].asResult.result) with self.contextEnterer(context): if exception: context.resetInterruptState() context.resume(exception) else: context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(tuple(res)), ImplValContainer_() ) ) else: #this was a split request splitResult, splitComputationLog = self.intermediates_[node] for slotComputation in splitResult.submittedComputations(): assert slotComputation in self.finishedValues_ value = self.finishedValues_[slotComputation] if value.isFailure(): self.finishNode_(node, value) return else: splitResult.futuresSplitResult.slotCompleted( splitResult.computationsToSlotIndices[slotComputation], value) del splitResult.computationsToSlotIndices[slotComputation] submittableFutures = splitResult.futuresSplitResult.indicesOfSubmittableFutures() if len(submittableFutures) == 0: context = self.grabContext() toResumeWith = splitResult.futuresSplitResult.getFinalResult() context.resumePausedComputation(toResumeWith) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() else: with self.lock_: futuresSplitResult = splitResult.futuresSplitResult isFinished, result = self.findMeatyPausedComputations(futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, []) return else: toResume = result context = self.grabContext() context.resumePausedComputation(toResume) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() while True: if context.isFinished(): result = context.getFinishedResult() self.checkContextBackIn(context) #now, wake up any dependencies self.finishNode_(node, result) return elif context.isVectorLoad(): for vectorToLoad in context.getVectorLoadAsVDIDs(): toLoad = None loaded = False if self.offlineCache_ is not None: toLoad = self.offlineCache_.loadIfExists(vectorToLoad.page) if toLoad is not None: self.vdm_.loadSerializedVectorPage(vectorToLoad.page, toLoad) loaded = True if not loaded and vectorToLoad.isExternal(): #is this an external dataset, attempt to load it from there PythonIoTasks.loadExternalDataset( getCurrentS3Interface(), vectorToLoad, self.vdm_, self.inProcessDownloader ) loaded = True assert loaded, "lost the definition for VDID: %s" % vectorToLoad with self.contextEnterer(context): context.resetInterruptState() context.resume() #go back around and try again elif context.isInterrupted(): toResume = None if self.checkShouldSplit(context): futuresSplitResult = context.splitWithFutures() if futuresSplitResult is not None: with self.lock_: futuresSplitResult.disallowRepeatNodes() isFinished, result = self.findMeatyPausedComputations(futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, context.getComputationLog()) self.checkContextBackIn(context) return else: toResume = result #if we're here, then we didn't split #go back around and try again with self.contextEnterer(context): if toResume is not None: context.resumePausedComputation(toResume) context.resetInterruptState() context.resume() elif context.isCacheRequest(): #these are thew new dependencies req = context.getCacheRequest() deps = set() if CacheSemantics.isVectorCacheLoadRequest(req): pass elif CacheSemantics.isCacheRequestWithResult(req): pass else: cacheCalls = [x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req)] with self.lock_: #register any dependencies for t in cacheCalls: if t not in self.finishedValues_ and t not in self.intermediates_: #its a new request self.intermediates_[t] = None self.completable_.put(t) self.watchers_[t] = threading.Event() if t not in self.finishedValues_: deps.add(t) self.dependencies_[node] = deps if not deps: #we could go again with self.lock_: self.completable_.put(node) return
def computeOneNode(self, node): """push 'node' one step further in its computation requirements self.intermediates_[node] either contains a list of values to be computed or an execution context """ if self.intermediates_[node] is None: context = self.grabContext() #the intermediates can either be None or #an execution context. in this case, since its a list #we have not even started computation yet, so we need to create #an ExecutionContext and begin computing with self.contextEnterer(context): context.resetInterruptState() if isinstance(node, tuple): with freestoreLock: #this operation may be copying values in the freestore as we're #updating them, so we need to do it under a lock context.placeInEvaluationStateWithoutRenamingMutableVectors( *node) context.resume() elif isinstance(node, FORANative.PausedComputation): context.resumePausedComputation(node) context.resetInterruptState() context.resume() else: assert False, "don't know what to do with node of type %s" % node self.intermediates_[node] = context elif isinstance(self.intermediates_[node], FORANative.ExecutionContext): #this was a cacherequest node, and if we're here, we filled them #all out context = self.intermediates_[node] req = context.getCacheRequest() if CacheSemantics.isVectorCacheLoadRequest(req): with self.contextEnterer(context): context.resetInterruptState() context.resume( ComputationResult_.Result(ImplValContainer_(), ImplValContainer_())) elif CacheSemantics.isCacheRequestWithResult(req): result = CacheSemantics.getCacheRequestComputationResult(req) with self.contextEnterer(context): context.resetInterruptState() context.resume(result) else: cacheCalls = [ x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req) ] res = [] exception = None for t in cacheCalls: assert t in self.finishedValues_, ( "Couldn't find result for: %s in %s" % (t, "\n".join( [str(x) for x in self.finishedValues_.keys()]))) if self.finishedValues_[t].isException(): if exception is None: exception = self.finishedValues_[t] else: res.append(self.finishedValues_[t].asResult.result) with self.contextEnterer(context): if exception: context.resetInterruptState() context.resume(exception) else: context.resetInterruptState() context.resume( ComputationResult_.Result( ImplValContainer_(tuple(res)), ImplValContainer_())) else: #this was a split request splitResult, splitComputationLog = self.intermediates_[node] for slotComputation in splitResult.submittedComputations(): assert slotComputation in self.finishedValues_ value = self.finishedValues_[slotComputation] if value.isFailure(): self.finishNode_(node, value) return else: splitResult.futuresSplitResult.slotCompleted( splitResult.computationsToSlotIndices[slotComputation], value) del splitResult.computationsToSlotIndices[slotComputation] submittableFutures = splitResult.futuresSplitResult.indicesOfSubmittableFutures( ) if len(submittableFutures) == 0: context = self.grabContext() toResumeWith = splitResult.futuresSplitResult.getFinalResult() context.resumePausedComputation(toResumeWith) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() else: with self.lock_: futuresSplitResult = splitResult.futuresSplitResult isFinished, result = self.findMeatyPausedComputations( futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = (splitResult, []) return else: toResume = result context = self.grabContext() context.resumePausedComputation(toResume) context.resetInterruptState() self.intermediates_[node] = context with self.contextEnterer(context): context.resume() while True: if context.isFinished(): result = context.getFinishedResult() self.checkContextBackIn(context) #now, wake up any dependencies self.finishNode_(node, result) return elif context.isVectorLoad(): for vectorToLoad in context.getVectorLoadAsVDIDs(): toLoad = None loaded = False if self.offlineCache_ is not None: toLoad = self.offlineCache_.loadIfExists( vectorToLoad.page) if toLoad is not None: self.vdm_.loadSerializedVectorPage( vectorToLoad.page, toLoad) loaded = True if not loaded and vectorToLoad.isExternal(): #is this an external dataset, attempt to load it from there PythonIoTasks.loadExternalDataset( getCurrentS3Interface(), vectorToLoad, self.vdm_, self.inProcessDownloader) loaded = True assert loaded, "lost the definition for VDID: %s" % vectorToLoad with self.contextEnterer(context): context.resetInterruptState() context.resume() #go back around and try again elif context.isInterrupted(): toResume = None if self.checkShouldSplit(context): futuresSplitResult = context.splitWithFutures() if futuresSplitResult is not None: with self.lock_: futuresSplitResult.disallowRepeatNodes() isFinished, result = self.findMeatyPausedComputations( futuresSplitResult) if not isFinished: splitResult = self.computeIntermediatesForSplitResult( node, futuresSplitResult, result) self.intermediates_[node] = ( splitResult, context.getComputationLog()) self.checkContextBackIn(context) return else: toResume = result #if we're here, then we didn't split #go back around and try again with self.contextEnterer(context): if toResume is not None: context.resumePausedComputation(toResume) context.resetInterruptState() context.resume() elif context.isCacheRequest(): #these are thew new dependencies req = context.getCacheRequest() deps = set() if CacheSemantics.isVectorCacheLoadRequest(req): pass elif CacheSemantics.isCacheRequestWithResult(req): pass else: cacheCalls = [ x.extractApplyTuple() for x in CacheSemantics.processCacheCall(req) ] with self.lock_: #register any dependencies for t in cacheCalls: if t not in self.finishedValues_ and t not in self.intermediates_: #its a new request self.intermediates_[t] = None self.completable_.put(t) self.watchers_[t] = threading.Event() if t not in self.finishedValues_: deps.add(t) self.dependencies_[node] = deps if not deps: #we could go again with self.lock_: self.completable_.put(node) return