def loadJob(command, jobStore): """ Unpickles a job.Job instance by decoding the command. See job.Job._serialiseFirstJob and job.Job._serialiseFirstJob job.Job._makeJobWrappers to see how the Job is encoded in the command. Essentially the command is a reference to a jobStoreFileID containing the pickle file for the job and a list of modules which must be imported so that the Job can be successfully unpickled. """ commandTokens = command.split() assert "scriptTree" == commandTokens[0] pickleFile = commandTokens[1] jobClassName = commandTokens[2] # must import lazily because toil might not be on sys.path when the top-level of this module is run from toil.resource import ModuleDescriptor userModule = ModuleDescriptor(*commandTokens[3:]) if not userModule.belongsToToil: userModule = userModule.localize() if userModule.dirPath not in sys.path: sys.path.append(userModule.dirPath) userModule = importlib.import_module(userModule.name) thisModule = sys.modules[__name__] thisModule.__dict__[jobClassName] = userModule.__dict__[jobClassName] if pickleFile == "firstJob": openFileStream = jobStore.readSharedFileStream(pickleFile) else: openFileStream = jobStore.readFileStream(pickleFile) with openFileStream as fileHandle: return cPickle.load(fileHandle)
def loadJob(command, jobStore): """ Unpickles a job.Job instance by decoding the command. See job.Job._serialiseFirstJob and job.Job._serialiseFirstJob job.Job._makeJobWrappers to see how the Job is encoded in the command. Essentially the command is a reference to a jobStoreFileID containing the pickle file for the job and a list of modules which must be imported so that the Job can be successfully unpickled. """ commandTokens = command.split() assert "scriptTree" == commandTokens[0] pickleFile = commandTokens[1] jobClassName = commandTokens[2] # must import lazily because toil might not be on sys.path when the top-level of this module is run from toil.resource import ModuleDescriptor userModule = ModuleDescriptor(*commandTokens[3:]) if not userModule.belongsToToil: userModule = userModule.localize() if userModule.dirPath not in sys.path: sys.path.append(userModule.dirPath) userModule = importlib.import_module(userModule.name) thisModule = sys.modules[__name__] thisModule.__dict__[jobClassName] = userModule.__dict__[jobClassName] if pickleFile == "firstJob": openFileStream = jobStore.readSharedFileStream( pickleFile ) else: openFileStream = jobStore.readFileStream( pickleFile ) with openFileStream as fileHandle: return cPickle.load( fileHandle )
def __init__(self, memory=None, cores=None, disk=None): """ This method must be called by any overiding constructor. Memory is the maximum number of bytes of memory the job will require to run. Cores is the number of CPU cores required. """ self.cores = cores self.memory = human2bytes(str(memory)) if memory is not None else memory self.disk = human2bytes(str(disk)) if disk is not None else disk #Private class variables #See Job.addChild self._children = [] #See Job.addFollowOn self._followOns = [] #See Job.addService self._services = [] #A follow-on, service or child of a job A, is a "direct successor" of A, if B #is a direct successor of A, then A is a "direct predecessor" of B. self._directPredecessors = set() # Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module # defining the class self is an instance of, which may be a subclass of Job that may be # defined in a different module. self.userModule = ModuleDescriptor.forModule(self.__module__) #See Job.rv() self._rvs = {} self._promiseJobStore = None
def __init__(self, memory=None, cores=None, disk=None): """ This method must be called by any overiding constructor. Memory is the maximum number of bytes of memory the job will require to run. Cores is the number of CPU cores required. """ self.cores = cores self.memory = human2bytes(str(memory)) if memory is not None else memory self.disk = human2bytes(str(disk)) if disk is not None else disk #Private class variables #See Job.addChild self._children = [] #See Job.addFollowOn self._followOns = [] #See Job.addService self._services = [] #A follow-on, service or child of a job A, is a "successor" of A, if B #is a successor of A, then A is a predecessor of B. self._predecessors = set() # Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module # defining the class self is an instance of, which may be a subclass of Job that may be # defined in a different module. self.userModule = ModuleDescriptor.forModule(self.__module__) #See Job.rv() self._rvs = {}
def __init__(self, memory=sys.maxint, cpu=sys.maxint, disk=sys.maxint): """ This method must be called by any overiding constructor. Memory is the maximum number of bytes of memory the job will require to run. Cpu is the number of cores required. """ self.memory = memory self.cpu = cpu self.disk = disk #Private class variables #See Job.addChild self._children = [] #See Job.addFollowOn self._followOns = [] #See Job.addService self._services = [] #A follow-on, service or child of a job A, is a "successor" of A, if B #is a successor of A, then A is a predecessor of B. self._predecessors = set() #Variables used for serialisation self.userModule = ModuleDescriptor.forModule(self.__module__) #See Job.rv() self._rvs = {}
def __init__(self, userFunction, *args, **kwargs): # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary cpu = kwargs.pop("cpu") if "cpu" in kwargs else sys.maxint disk = kwargs.pop("disk") if "disk" in kwargs else sys.maxint memory = kwargs.pop("memory") if "memory" in kwargs else sys.maxint Job.__init__(self, memory=memory, cpu=cpu, disk=disk) self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__) self.userFunctionName = str(userFunction.__name__) self._args=args self._kwargs=kwargs
def __init__(self, userFunction, *args, **kwargs): # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary cpu = kwargs.pop("cpu") if "cpu" in kwargs else sys.maxint disk = kwargs.pop("disk") if "disk" in kwargs else sys.maxint memory = kwargs.pop("memory") if "memory" in kwargs else sys.maxint Job.__init__(self, memory=memory, cpu=cpu, disk=disk) self.userFunctionModule = ModuleDescriptor.forModule( userFunction.__module__) self.userFunctionName = str(userFunction.__name__) self._args = args self._kwargs = kwargs
def __init__(self, userFunction, *args, **kwargs): # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary cores = kwargs.pop("cores") if "cores" in kwargs else None disk = kwargs.pop("disk") if "disk" in kwargs else None memory = kwargs.pop("memory") if "memory" in kwargs else None Job.__init__(self, memory=memory, cores=cores, disk=disk) #If dill is installed pickle the user function directly #else use indirect method self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__).globalize() self.userFunctionName = str(userFunction.__name__) self._args=args self._kwargs=kwargs
def __init__(self, service): """ :type service: Job.Service """ Job.__init__(self, memory=service.memory, cores=service.cores) # service.__module__ is the module defining the class service is an instance of. self.serviceModule = ModuleDescriptor.forModule(service.__module__).globalize() #The service to run, pickled self.pickledService = cPickle.dumps(service) #An empty file in the jobStore which when deleted is used to signal #that the service should cease, is initialised in #Job._modifyJobGraphForServices self.stopFileStoreID = None #Similarly a empty file which when deleted is used to signal that the #service is established self.startFileStoreID = None
def create(cls, function, *args, **kwargs): """ Capture the given callable and arguments as an instance of this class. :param callable function: The deferred action to take in the form of a function :param tuple args: Non-keyword arguments to the function :param dict kwargs: Keyword arguments to the function """ # The general principle is to deserialize as late as possible, i.e. when the function is # to be invoked, as that will avoid redundantly deserializing deferred functions for # concurrently running jobs when the cache state is loaded from disk. By implication we # should serialize as early as possible. We need to serialize the function as well as its # arguments. return cls(*list(map(dill.dumps, (function, args, kwargs))), name=function.__name__, module=ModuleDescriptor.forModule(function.__module__).globalize())
def __init__(self, service): """ :type service: Job.Service """ Job.__init__(self, memory=service.memory, cores=service.cores) # service.__module__ is the module defining the class service is an instance of. self.serviceModule = ModuleDescriptor.forModule(service.__module__).globalize() self.serviceClassName = service.__class__.__name__ #The service to run, pickled self.pickledService = cPickle.dumps(service) #An empty file in the jobStore which when deleted is used to signal #that the service should cease, is initialised in #Job._modifyJobGraphForServices self.stopFileStoreID = None #Similarly a empty file which when deleted is used to signal that the #service is established self.startFileStoreID = None
def _loadJob(command, jobStore): """ Unpickles a job.Job instance by decoding the command. See job.Job._serialiseFirstJob and job.Job._makeJobWrappers to see how the Job is encoded in the command. Essentially the command is a reference to a jobStoreFileID containing the pickle file for the job and a list of modules which must be imported so that the Job can be successfully unpickled. """ commandTokens = command.split() assert "scriptTree" == commandTokens[0] userModule = ModuleDescriptor(*(commandTokens[3:])) Job._loadClass(commandTokens[2], userModule) pickleFile = commandTokens[1] if pickleFile == "firstJob": openFileStream = jobStore.readSharedFileStream( pickleFile ) else: openFileStream = jobStore.readFileStream( pickleFile ) with openFileStream as fileHandle: return cPickle.load( fileHandle )
def _test(self, module_name, shouldBelongToToil=False, expectedContents=None): module = ModuleDescriptor.forModule(module_name) # Assert basic attributes and properties self.assertEqual(module.belongsToToil, shouldBelongToToil) self.assertEquals(module.name, module_name) if shouldBelongToToil: self.assertTrue(module.dirPath.endswith('/src')) # Before the module is saved as a resource, localize() and globalize() are identity # methods. This should log warnings. self.assertIs(module.localize(), module) self.assertIs(module.globalize(), module) # Create a mock job store ... jobStore = MagicMock() # ... to generate a fake URL for the resource ... url = 'file://foo.zip' jobStore.getSharedPublicUrl.return_value = url # ... and save the resource to it. resource = module.saveAsResourceTo(jobStore) # Ensure that the URL generation method is actually called, ... jobStore.getSharedPublicUrl.assert_called_once_with(resource.pathHash) # ... and that ensure that writeSharedFileStream is called. jobStore.writeSharedFileStream.assert_called_once_with( resource.pathHash, isProtected=False) # Now it gets a bit complicated: Ensure that the context manager returned by the # jobStore's writeSharedFileStream() method is entered and that the file handle yielded # by the context manager is written to once with the zipped source tree from which # 'toil.resource' was orginally imported. Keep the zipped tree around such that we can # mock the download later. file_handle = jobStore.writeSharedFileStream.return_value.__enter__.return_value # The first 0 index selects the first call of write(), the second 0 selects positional # instead of keyword arguments, and the third 0 selects the first positional, i.e. the # contents. This is a bit brittle since it assumes that all the data is written in a # single call to write(). If more calls are made we can easily concatenate them. zipFile = file_handle.write.call_args_list[0][0][0] self.assertTrue( zipFile.startswith('PK')) # the magic header for ZIP files # Check contents if requested if expectedContents is not None: with ZipFile(BytesIO(zipFile)) as _zipFile: self.assertEqual(set(_zipFile.namelist()), expectedContents) self.assertEquals(resource.url, url) # Now we're on the worker. Prepare the storage for localized resources Resource.prepareSystem() # Register the resource for subsequent lookup. resource.register() # Lookup the resource and ensure that the result is equal to but not the same as the # original resource. Lookup will also be used when we localize the module that was # originally used to create the resource. localResource = Resource.lookup(module._resourcePath) self.assertEquals(resource, localResource) self.assertIsNot(resource, localResource) # Now show that we can localize the module using the registered resource. Set up a mock # urlopen() that yields the zipped tree ... mock_urlopen = MagicMock() mock_urlopen.return_value.read.return_value = zipFile with patch('toil.resource.urlopen', mock_urlopen): # ... and use it to download and unpack the resource localModule = module.localize() # The name should be equal between original and localized resource ... self.assertEquals(module.name, localModule.name) # ... but the directory should be different. self.assertNotEquals(module.dirPath, localModule.dirPath) # Show that we can 'undo' localization. This is necessary when the user script's jobs are # invoked on the worker where they generate more child jobs. self.assertEquals(localModule.globalize(), module)
def _test(self, module_name, shouldBelongToToil=False, expectedContents=None, allowExtraContents=True): module = ModuleDescriptor.forModule(module_name) # Assert basic attributes and properties self.assertEqual(module.belongsToToil, shouldBelongToToil) self.assertEquals(module.name, module_name) if shouldBelongToToil: self.assertTrue(module.dirPath.endswith('/src')) # Before the module is saved as a resource, localize() and globalize() are identity # methods. This should log warnings. self.assertIs(module.localize(), module) self.assertIs(module.globalize(), module) # Create a mock job store ... jobStore = MagicMock() # ... to generate a fake URL for the resource ... url = 'file://foo.zip' jobStore.getSharedPublicUrl.return_value = url # ... and save the resource to it. resource = module.saveAsResourceTo(jobStore) # Ensure that the URL generation method is actually called, ... jobStore.getSharedPublicUrl.assert_called_once_with(resource.pathHash) # ... and that ensure that writeSharedFileStream is called. jobStore.writeSharedFileStream.assert_called_once_with(resource.pathHash, isProtected=False) # Now it gets a bit complicated: Ensure that the context manager returned by the # jobStore's writeSharedFileStream() method is entered and that the file handle yielded # by the context manager is written to once with the zipped source tree from which # 'toil.resource' was orginally imported. Keep the zipped tree around such that we can # mock the download later. file_handle = jobStore.writeSharedFileStream.return_value.__enter__.return_value # The first 0 index selects the first call of write(), the second 0 selects positional # instead of keyword arguments, and the third 0 selects the first positional, i.e. the # contents. This is a bit brittle since it assumes that all the data is written in a # single call to write(). If more calls are made we can easily concatenate them. zipFile = file_handle.write.call_args_list[0][0][0] self.assertTrue(zipFile.startswith('PK')) # the magic header for ZIP files # Check contents if requested if expectedContents is not None: with ZipFile(BytesIO(zipFile)) as _zipFile: actualContents = set(_zipFile.namelist()) if allowExtraContents: self.assertTrue(actualContents.issuperset(expectedContents)) else: self.assertEqual(actualContents, expectedContents) self.assertEquals(resource.url, url) # Now we're on the worker. Prepare the storage for localized resources Resource.prepareSystem() try: # Register the resource for subsequent lookup. resource.register() # Lookup the resource and ensure that the result is equal to but not the same as the # original resource. Lookup will also be used when we localize the module that was # originally used to create the resource. localResource = Resource.lookup(module._resourcePath) self.assertEquals(resource, localResource) self.assertIsNot(resource, localResource) # Now show that we can localize the module using the registered resource. Set up a mock # urlopen() that yields the zipped tree ... mock_urlopen = MagicMock() mock_urlopen.return_value.read.return_value = zipFile with patch('toil.resource.urlopen', mock_urlopen): # ... and use it to download and unpack the resource localModule = module.localize() # The name should be equal between original and localized resource ... self.assertEquals(module.name, localModule.name) # ... but the directory should be different. self.assertNotEquals(module.dirPath, localModule.dirPath) # Show that we can 'undo' localization. This is necessary when the user script's jobs # are invoked on the worker where they generate more child jobs. self.assertEquals(localModule.globalize(), module) finally: Resource.cleanSystem()