def extractVectorDataAsNumpyArrayInChunks(self, stepSize = 100000): """Return the data as a sequence of numpy arrays each of which is no larger than 'stepSize'. This is used to prevent us from creating memory fragmentation when we are loading lots of arrays of different sizes. """ if self.computedValueVector.vectorImplVal is None: return None if len(self.vectorDataIds) > 0 and not self.isLoaded: return None if not self.vdmThinksIsLoaded(): return None result = [] index = self.lowIndex while index < self.highIndex and result is not None: tailResult = ComputedValueGateway.getGateway().extractVectorDataAsNumpyArray( self.computedValueVector, index, min(self.highIndex, index+stepSize) ) index += stepSize if tailResult is not None: result.append(tailResult) else: result = None if result is None and not self.vdmThinksIsLoaded(): logging.info("CumulusClient: %s was marked loaded but returned None", self) self.isLoaded = False ComputedValueGateway.getGateway().reloadVector(self) return result
def trigger(self): if self.successOrError is not None: return if self.computedValue.valueIVC is None: self.successOrError={'success':False, 'message': "Tried to trigger write before calculation was finished."} return if not self.computedValue.valueIVC.isVectorOfChar(): self.successOrError={'success':False, 'message': "Result should have been a string."} return if self.computedValue.isException: self.successOrError={'success':False, 'message': "Result should have been a string. Got an exception instead."} return def callback(result): if result.isSuccess(): self.successOrError={'success':True} else: self.successOrError={'success':False, 'message': str(result)} ComputedValueGateway.getGateway().createExternalIoTask( CumulusNative.ExternalIoTask.WriteCharBigvecToS3( self.computedValue.valueIVC.getVectorBigvecGuid(), CumulusNative.S3KeyAndCredentials( self.bucketname, self.keyname, "", "", "" ) ), callback )
def extractVectorDataAsNumpyArrayInChunks(self, stepSize=100000): """Return the data as a sequence of numpy arrays each of which is no larger than 'stepSize'. This is used to prevent us from creating memory fragmentation when we are loading lots of arrays of different sizes. """ if self.computedValueVector.vectorImplVal is None: return None if len(self.vectorDataIds) > 0 and not self.isLoaded: return None if not self.vdmThinksIsLoaded(): return None result = [] index = self.lowIndex while index < self.highIndex and result is not None: tailResult = ComputedValueGateway.getGateway( ).extractVectorDataAsNumpyArray( self.computedValueVector, index, min(self.highIndex, index + stepSize)) index += stepSize if tailResult is not None: result.append(tailResult) else: result = None if result is None and not self.vdmThinksIsLoaded(): logging.info( "CumulusClient: %s was marked loaded but returned None", self) self.isLoaded = False ComputedValueGateway.getGateway().reloadVector(self) return result
def update(self): if ComputedValueGateway.getGateway().getPersistentCacheIndex() is None: return self.totalBytesInCache = ComputedValueGateway.getGateway().getPersistentCacheIndex().totalBytesInCache() self.totalObjectsInCache = ComputedValueGateway.getGateway().getPersistentCacheIndex().totalObjectsInCache() self.totalComputationsInCache = ComputedValueGateway.getGateway().getPersistentCacheIndex().totalComputationsInCache() self.totalReachableComputationsInCache = ComputedValueGateway.getGateway().getPersistentCacheIndex().totalReachableComputationsInCache()
def extractVectorItemAsIVC(self, ct): if self.computedValueVector.vectorImplVal is None: return None if len(self.vectorDataIds) > 0 and not self.isLoaded: return None result = ComputedValueGateway.getGateway().extractVectorItem(self.computedValueVector, ct) if result is None: logging.info("CumulusClient: %s was marked loaded but returned None", self) self.isLoaded = False ComputedValueGateway.getGateway().reloadVector(self) return result
def vectorDataIds(self): if self.computedValueVector.vectorImplVal is None: return [] return self.computedValueVector.vectorImplVal.getVectorDataIdsForSlice( self.lowIndex, self.highIndex, ComputedValueGateway.getGateway().vdm)
def submittedComputationId(self): computationId = ComputedValueGateway.getGateway().submittedComputationId(self.cumulusComputationDefinition) if computationId is None: return return computationId.toSimple()
def slicesByPage(self): if self.vectorImplVal is None: return [] return [self.getMappableSlice(low,high) for low,high in self.vectorImplVal.getVectorPageSliceRanges( ComputedValueGateway.getGateway().vdm )]
def totalVectorBytesReferenced(self): if self.checkpointStatus is None: return 0 stats = self.checkpointStatus.statistics return ComputedValueGateway.getGateway().bytecountForBigvecs( self.checkpointStatus.bigvecsReferenced)
def vectorDataIds(self): if self.computedValueVector.vectorImplVal is None: return [] return self.computedValueVector.vectorImplVal.getVectorDataIdsForSlice( self.lowIndex, self.highIndex, ComputedValueGateway.getGateway().vdm )
def extractVectorDataAsNumpyArray(self): if self.computedValueVector.vectorImplVal is None: return None if not self.isLoaded: return None result = ComputedValueGateway.getGateway( ).extractVectorDataAsNumpyArray(self.computedValueVector, self.lowIndex, self.highIndex) if result is None: logging.warn( "CumulusClient: %s was marked loaded but returned None", self) self.isLoaded = False ComputedValueGateway.getGateway().reloadVector(self) return result
def totalVectorBytesReferenced(self): if self.checkpointStatus is None: return 0 stats = self.checkpointStatus.statistics return ComputedValueGateway.getGateway().bytecountForBigvecs( self.checkpointStatus.bigvecsReferenced )
def trigger(self): if self.successOrError is not None: return if self.computedValue.valueIVC is None: self.successOrError = { 'success': False, 'message': "Tried to trigger write before calculation was finished." } return if not self.computedValue.valueIVC.isVectorOfChar(): self.successOrError = { 'success': False, 'message': "Result should have been a string." } return if self.computedValue.isException: self.successOrError = { 'success': False, 'message': "Result should have been a string. Got an exception instead." } return def callback(result): if result.isSuccess(): self.successOrError = {'success': True} else: self.successOrError = { 'success': False, 'message': str(result) } ComputedValueGateway.getGateway().createExternalIoTask( CumulusNative.ExternalIoTask.WriteCharBigvecToS3( self.computedValue.valueIVC.getVectorBigvecGuid(), CumulusNative.S3KeyAndCredentials(self.bucketname, self.keyname, "", "", "")), callback)
def extractVectorDataAsPythonArray(self): if self.computedValueVector.vectorImplVal is None: return None if len(self.vectorDataIds) > 0 and not self.isLoaded: return None result = ComputedValueGateway.getGateway().extractVectorDataAsPythonArray( self.computedValueVector, self.lowIndex, self.highIndex ) if result is None and not self.vdmThinksIsLoaded(): logging.info("CumulusClient: %s was marked loaded but returned None. reloading", self) self.isLoaded = False ComputedValueGateway.getGateway().reloadVector(self) return result
def initialize(self, purePythonMDSAsJson): """Initialize the converter assuming a set of pyfora builtins""" import pyfora.ObjectRegistry as ObjectRegistry import ufora.FORA.python.PurePython.Converter as Converter try: logging.info("Initializing the PyforaObjectConverter") objectRegistry_[0] = ObjectRegistry.ObjectRegistry() converter_[0] = Converter.constructConverter( Converter.canonicalPurePythonModule(), ComputedValueGateway.getGateway().vdm ) except: logging.critical("Failed to initialize the PyforaObjectConverter: %s", traceback.format_exc()) raise
def getClusterStatus(self, args): gateway = ComputedValueGateway.getGateway().cumulusGateway return gateway.getClusterStatus()
def decreaseRequestCount(self, *args): ComputedValueGateway.getGateway().decreaseRequestCount(self, self.cumulusComputationDefinition)
def requestComputationCheckpoint(self, *args): ComputedValueGateway.getGateway().requestComputationCheckpoint(self, self.cumulusComputationDefinition)
def cancel(self, *args): ComputedValueGateway.getGateway().cancelComputation(self, self.cumulusComputationDefinition)
def extractVectorContents(vectorIVC): if len(vectorIVC) == 0: return {'listContents': []} #if this is an unpaged vector we can handle it without callback vdm = ComputedValueGateway.getGateway().vdm if vdm.vectorDataIsLoaded(vectorIVC, 0, len(vectorIVC)) and vectorIVC.isVectorEntirelyUnpaged(): #see if it's a string. This is the only way to be holding a Vector of char if vectorIVC.isVectorOfChar(): res = vdm.extractVectorContentsAsNumpyArray(vectorIVC, 0, len(vectorIVC)) assert res is not None return {'string': res.tostring()} #see if it's simple enough to transmit as numpy data if len(vectorIVC.getVectorElementsJOR()) == 1 and len(vectorIVC) > 1: firstElement = vdm.extractVectorItem(vectorIVC, 0) if isOfSimpleType(firstElement): res = vdm.extractVectorContentsAsNumpyArray(vectorIVC, 0, len(vectorIVC)) if res is not None: assert len(res) == len(vectorIVC) return {'contentsAsNumpyArray': res} #see if we can extract the data as a regular pythonlist res = vdm.extractVectorContentsAsPythonArray(vectorIVC, 0, len(vectorIVC)) assert res is not None return {'listContents': res} vec = ComputedValue.ComputedValueVector(vectorImplVal=vectorIVC) vecSlice = vec.entireSlice res = None preventPythonArrayExtraction = False #see if it's a string. This is the only way to be holding a Vector of char if vectorIVC.isVectorOfChar(): res = vecSlice.extractVectorDataAsNumpyArray() if res is not None: res = {'string': res.tostring()} #see if it's simple enough to transmit as numpy data if res is None and len(vectorIVC.getVectorElementsJOR()) == 1 and len(vectorIVC) > 1: res = vecSlice.extractVectorDataAsNumpyArray() if res is not None: firstElement = vecSlice.extractVectorItemAsIVC(0) if firstElement is None: #note we can't import this at the top of the file because this file gets imported #during the build process, which doesn't have pyfora installed. import pyfora.Exceptions as Exceptions raise Exceptions.ForaToPythonConversionError( "Shouldn't be possible to download data as numpy, and then not get the first value" ) if isOfSimpleType(firstElement): res = {'contentsAsNumpyArray': res} else: res = None else: if not vecSlice.vdmThinksIsLoaded(): #there's a race condition where the data could be loaded between now and #the call to 'extractVectorDataAsPythonArray'. This prevents it. preventPythonArrayExtraction = True #see if we can extract the data as a regular pythonlist if not preventPythonArrayExtraction and res is None: res = vecSlice.extractVectorDataAsPythonArray() if res is not None: res = {'listContents': res} if res is None: vecSlice.increaseRequestCount() return None return res
def decreaseRequestCount(self, *args): ComputedValueGateway.getGateway().decreaseVectorRequestCount(self)
def increaseRequestCount(self, *args): """request the data in the leaf of this vector""" ComputedValueGateway.getGateway().increaseVectorRequestCount(self)
def initialize(self, purePythonMDSAsJson): """Initialize the converter assuming a set of pyfora builtins""" try: import pyfora.ObjectRegistry as ObjectRegistry import ufora.FORA.python.PurePython.Converter as Converter import ufora.FORA.python.PurePython.PyforaSingletonAndExceptionConverter as PyforaSingletonAndExceptionConverter import ufora.native.FORA as ForaNative import ufora.FORA.python.ModuleImporter as ModuleImporter logging.info("Initializing the PyforaObjectConverter") objectRegistry_[0] = ObjectRegistry.ObjectRegistry() if purePythonMDSAsJson is None: converter_[0] = Converter.Converter() else: purePythonModuleImplval = ModuleImporter.importModuleFromMDS( ModuleDirectoryStructure.ModuleDirectoryStructure.fromJson(purePythonMDSAsJson), "fora", "purePython", searchForFreeVariables=True ) singletonAndExceptionConverter = \ PyforaSingletonAndExceptionConverter.PyforaSingletonAndExceptionConverter( purePythonModuleImplval ) primitiveTypeMapping = { bool: purePythonModuleImplval.getObjectMember("PyBool"), str: purePythonModuleImplval.getObjectMember("PyString"), int: purePythonModuleImplval.getObjectMember("PyInt"), float: purePythonModuleImplval.getObjectMember("PyFloat"), type(None): purePythonModuleImplval.getObjectMember("PyNone"), } nativeConstantConverter = ForaNative.PythonConstantConverter( primitiveTypeMapping ) nativeListConverter = ForaNative.makePythonListConverter( purePythonModuleImplval.getObjectMember("PyList") ) nativeTupleConverter = ForaNative.makePythonTupleConverter( purePythonModuleImplval.getObjectMember("PyTuple") ) nativeDictConverter = ForaNative.makePythonDictConverter( purePythonModuleImplval.getObjectMember("PyDict") ) foraBuiltinsImplVal = ModuleImporter.builtinModuleImplVal() converter_[0] = Converter.Converter( nativeListConverter=nativeListConverter, nativeTupleConverter=nativeTupleConverter, nativeDictConverter=nativeDictConverter, nativeConstantConverter=nativeConstantConverter, singletonAndExceptionConverter=singletonAndExceptionConverter, vdmOverride=ComputedValueGateway.getGateway().vdm, purePythonModuleImplVal=purePythonModuleImplval, foraBuiltinsImplVal=foraBuiltinsImplVal ) except: logging.critical("Failed to initialize the PyforaObjectConverter: %s", traceback.format_exc()) raise
def vdmThinksIsLoaded(self): return ComputedValueGateway.getGateway().vectorDataIsLoaded( self.computedValueVector, self.lowIndex, self.highIndex )
def triggerGarbageCollectionImmediately(self, completePurge): ComputedValueGateway.getGateway().triggerPerstistentCacheGarbageCollection(True if completePurge else False)
def setMaxBytesInCache(self, *args): ComputedValueGateway.getGateway().getPersistentCacheIndex().setMaxBytesInCache(args[0])
def extractVectorContents(vectorIVC): if len(vectorIVC) == 0: return {'listContents': []} #if this is an unpaged vector we can handle it without callback vdm = ComputedValueGateway.getGateway().vdm if vdm.vectorDataIsLoaded(vectorIVC, 0, len(vectorIVC)) and vectorIVC.isVectorEntirelyUnpaged(): #see if it's a string. This is the only way to be holding a Vector of char if vectorIVC.isVectorOfChar(): res = vdm.extractVectorContentsAsNumpyArray(vectorIVC, 0, len(vectorIVC)) assert res is not None return {'string': res.tostring()} #see if it's simple enough to transmit as numpy data if len(vectorIVC.getVectorElementsJOR()) == 1 and len(vectorIVC) > 1: res = vdm.extractVectorContentsAsNumpyArray(vectorIVC, 0, len(vectorIVC)) if res is not None: assert len(res) == len(vectorIVC) firstElement = vdm.extractVectorItem(vectorIVC, 0) return {'firstElement': firstElement, 'contentsAsNumpyArrays': [res]} #see if we can extract the data as a regular pythonlist res = vdm.extractVectorContentsAsPythonArray(vectorIVC, 0, len(vectorIVC)) assert res is not None return {'listContents': res} vec = ComputedValue.ComputedValueVector(vectorImplVal=vectorIVC) vecSlice = vec.entireSlice res = None preventPythonArrayExtraction = False #see if it's a string. This is the only way to be holding a Vector of char if vectorIVC.isVectorOfChar(): res = vecSlice.extractVectorDataAsNumpyArray() if res is not None: res = {'string': res.tostring()} #see if it's simple enough to transmit as numpy data if res is None and len(vectorIVC.getVectorElementsJOR()) == 1 and len(vectorIVC) > 1: res = vecSlice.extractVectorDataAsNumpyArrayInChunks() if res is not None: firstElement = vecSlice.extractVectorItemAsIVC(0) if firstElement is None: #note we can't import this at the top of the file because this file gets imported #during the build process, which doesn't have pyfora installed. import pyfora.Exceptions as Exceptions raise Exceptions.ForaToPythonConversionError( "Shouldn't be possible to download data as numpy, and then not get the first value" ) res = {'firstElement': firstElement, 'contentsAsNumpyArrays': res} else: if not vecSlice.vdmThinksIsLoaded(): #there's a race condition where the data could be loaded between now and #the call to 'extractVectorDataAsPythonArray'. This prevents it. preventPythonArrayExtraction = True #see if we can extract the data as a regular pythonlist if not preventPythonArrayExtraction and res is None: res = vecSlice.extractVectorDataAsPythonArray() if res is not None: res = {'listContents': res} if res is None: vecSlice.increaseRequestCount() return None return res
def maxBytesInCache(self): if ComputedValueGateway.getGateway().getPersistentCacheIndex() is None: return 0 return ComputedValueGateway.getGateway().getPersistentCacheIndex().getMaxBytesInCache()