示例#1
0
def loadJob(command, jobStore):
    """
    Unpickles a job.Job instance by decoding the command.
    See job.Job._serialiseFirstJob and job.Job._serialiseFirstJob
    job.Job._makeJobWrappers to see how the Job is encoded in the command.
    Essentially the command is a reference to a jobStoreFileID containing 
    the pickle file for the job and a list of modules which must be imported
    so that the Job can be successfully unpickled.
    """
    commandTokens = command.split()
    assert "scriptTree" == commandTokens[0]
    pickleFile = commandTokens[1]
    jobClassName = commandTokens[2]
    # must import lazily because toil might not be on sys.path when the top-level of this module is run
    from toil.resource import ModuleDescriptor
    userModule = ModuleDescriptor(*commandTokens[3:])
    if not userModule.belongsToToil:
        userModule = userModule.localize()
    if userModule.dirPath not in sys.path:
        sys.path.append(userModule.dirPath)
    userModule = importlib.import_module(userModule.name)
    thisModule = sys.modules[__name__]
    thisModule.__dict__[jobClassName] = userModule.__dict__[jobClassName]
    if pickleFile == "firstJob":
        openFileStream = jobStore.readSharedFileStream(pickleFile)
    else:
        openFileStream = jobStore.readFileStream(pickleFile)
    with openFileStream as fileHandle:
        return cPickle.load(fileHandle)
示例#2
0
def loadJob(command, jobStore):
    """
    Unpickles a job.Job instance by decoding the command.
    See job.Job._serialiseFirstJob and job.Job._serialiseFirstJob
    job.Job._makeJobWrappers to see how the Job is encoded in the command.
    Essentially the command is a reference to a jobStoreFileID containing 
    the pickle file for the job and a list of modules which must be imported
    so that the Job can be successfully unpickled.
    """
    commandTokens = command.split()
    assert "scriptTree" == commandTokens[0]
    pickleFile = commandTokens[1]
    jobClassName = commandTokens[2]
    # must import lazily because toil might not be on sys.path when the top-level of this module is run
    from toil.resource import ModuleDescriptor
    userModule = ModuleDescriptor(*commandTokens[3:])
    if not userModule.belongsToToil:
        userModule = userModule.localize()
    if userModule.dirPath not in sys.path:
        sys.path.append(userModule.dirPath)
    userModule = importlib.import_module(userModule.name)
    thisModule = sys.modules[__name__]
    thisModule.__dict__[jobClassName] = userModule.__dict__[jobClassName]
    if pickleFile == "firstJob":
        openFileStream = jobStore.readSharedFileStream( pickleFile )
    else:
        openFileStream = jobStore.readFileStream( pickleFile )
    with openFileStream as fileHandle:
        return cPickle.load( fileHandle )
示例#3
0
文件: job.py 项目: adamnovak/toil
    def __init__(self, memory=None, cores=None, disk=None):
        """
        This method must be called by any overiding constructor.
        
        Memory is the maximum number of bytes of memory the job will
        require to run. Cores is the number of CPU cores required.
        """
        self.cores = cores
        self.memory = human2bytes(str(memory)) if memory is not None else memory
        self.disk = human2bytes(str(disk)) if disk is not None else disk
        #Private class variables

        #See Job.addChild
        self._children = []
        #See Job.addFollowOn
        self._followOns = []
        #See Job.addService
        self._services = []
        #A follow-on, service or child of a job A, is a "direct successor" of A, if B
        #is a direct successor of A, then A is a "direct predecessor" of B.
        self._directPredecessors = set()
        # Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module
        # defining the class self is an instance of, which may be a subclass of Job that may be
        # defined in a different module.
        self.userModule = ModuleDescriptor.forModule(self.__module__)
        #See Job.rv()
        self._rvs = {}
        self._promiseJobStore = None
示例#4
0
文件: job.py 项目: benedictpaten/toil
 def __init__(self, memory=None, cores=None, disk=None):
     """
     This method must be called by any overiding constructor.
     
     Memory is the maximum number of bytes of memory the job will
     require to run. Cores is the number of CPU cores required.
     """
     self.cores = cores
     self.memory = human2bytes(str(memory)) if memory is not None else memory
     self.disk = human2bytes(str(disk)) if disk is not None else disk
     #Private class variables
     
     #See Job.addChild
     self._children = []
     #See Job.addFollowOn
     self._followOns = []
     #See Job.addService
     self._services = []
     #A follow-on, service or child of a job A, is a "successor" of A, if B
     #is a successor of A, then A is a predecessor of B. 
     self._predecessors = set()
     # Note that self.__module__ is not necessarily this module, i.e. job.py. It is the module
     # defining the class self is an instance of, which may be a subclass of Job that may be
     # defined in a different module.
     self.userModule = ModuleDescriptor.forModule(self.__module__)
     #See Job.rv()
     self._rvs = {}
示例#5
0
    def __init__(self, memory=sys.maxint, cpu=sys.maxint, disk=sys.maxint):
        """
        This method must be called by any overiding constructor.
        
        Memory is the maximum number of bytes of memory the job will
        require to run. Cpu is the number of cores required. 
        """
        self.memory = memory
        self.cpu = cpu
        self.disk = disk
        #Private class variables

        #See Job.addChild
        self._children = []
        #See Job.addFollowOn
        self._followOns = []
        #See Job.addService
        self._services = []
        #A follow-on, service or child of a job A, is a "successor" of A, if B
        #is a successor of A, then A is a predecessor of B.
        self._predecessors = set()
        #Variables used for serialisation
        self.userModule = ModuleDescriptor.forModule(self.__module__)
        #See Job.rv()
        self._rvs = {}
示例#6
0
 def __init__(self, memory=sys.maxint, cpu=sys.maxint, disk=sys.maxint):
     """
     This method must be called by any overiding constructor.
     
     Memory is the maximum number of bytes of memory the job will
     require to run. Cpu is the number of cores required. 
     """
     self.memory = memory
     self.cpu = cpu
     self.disk = disk
     #Private class variables
     
     #See Job.addChild
     self._children = []
     #See Job.addFollowOn
     self._followOns = []
     #See Job.addService
     self._services = []
     #A follow-on, service or child of a job A, is a "successor" of A, if B
     #is a successor of A, then A is a predecessor of B. 
     self._predecessors = set()
     #Variables used for serialisation
     self.userModule = ModuleDescriptor.forModule(self.__module__)
     #See Job.rv()
     self._rvs = {}
示例#7
0
 def __init__(self, userFunction, *args, **kwargs):
     # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary
     cpu = kwargs.pop("cpu") if "cpu" in kwargs else sys.maxint
     disk = kwargs.pop("disk") if "disk" in kwargs else sys.maxint
     memory = kwargs.pop("memory") if "memory" in kwargs else sys.maxint
     Job.__init__(self, memory=memory, cpu=cpu, disk=disk)
     self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__)
     self.userFunctionName = str(userFunction.__name__)
     self._args=args
     self._kwargs=kwargs
示例#8
0
 def __init__(self, userFunction, *args, **kwargs):
     # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary
     cpu = kwargs.pop("cpu") if "cpu" in kwargs else sys.maxint
     disk = kwargs.pop("disk") if "disk" in kwargs else sys.maxint
     memory = kwargs.pop("memory") if "memory" in kwargs else sys.maxint
     Job.__init__(self, memory=memory, cpu=cpu, disk=disk)
     self.userFunctionModule = ModuleDescriptor.forModule(
         userFunction.__module__)
     self.userFunctionName = str(userFunction.__name__)
     self._args = args
     self._kwargs = kwargs
示例#9
0
文件: job.py 项目: adamnovak/toil
    def __init__(self, userFunction, *args, **kwargs):
        # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary
        cores = kwargs.pop("cores") if "cores" in kwargs else None
        disk = kwargs.pop("disk") if "disk" in kwargs else None
        memory = kwargs.pop("memory") if "memory" in kwargs else None
        Job.__init__(self, memory=memory, cores=cores, disk=disk)
        #If dill is installed pickle the user function directly

        #else use indirect method
        self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__).globalize()
        self.userFunctionName = str(userFunction.__name__)
        self._args=args
        self._kwargs=kwargs
示例#10
0
文件: job.py 项目: benedictpaten/toil
 def __init__(self, userFunction, *args, **kwargs):
     # FIXME: I'd rather not duplicate the defaults here, unless absolutely necessary
     cores = kwargs.pop("cores") if "cores" in kwargs else None
     disk = kwargs.pop("disk") if "disk" in kwargs else None
     memory = kwargs.pop("memory") if "memory" in kwargs else None
     Job.__init__(self, memory=memory, cores=cores, disk=disk)
     #If dill is installed pickle the user function directly
     
     #else use indirect method
     self.userFunctionModule = ModuleDescriptor.forModule(userFunction.__module__).globalize()
     self.userFunctionName = str(userFunction.__name__)
     self._args=args
     self._kwargs=kwargs
示例#11
0
文件: job.py 项目: adamnovak/toil
 def __init__(self, service):
     """
     :type service: Job.Service
     """
     Job.__init__(self, memory=service.memory, cores=service.cores)
     # service.__module__ is the module defining the class service is an instance of.
     self.serviceModule = ModuleDescriptor.forModule(service.__module__).globalize()
     #The service to run, pickled
     self.pickledService = cPickle.dumps(service)
     #An empty file in the jobStore which when deleted is used to signal
     #that the service should cease, is initialised in
     #Job._modifyJobGraphForServices
     self.stopFileStoreID = None
     #Similarly a empty file which when deleted is used to signal that the
     #service is established
     self.startFileStoreID = None
示例#12
0
    def create(cls, function, *args, **kwargs):
        """
        Capture the given callable and arguments as an instance of this class.

        :param callable function: The deferred action to take in the form of a function
        :param tuple args: Non-keyword arguments to the function
        :param dict kwargs: Keyword arguments to the function
        """
        # The general principle is to deserialize as late as possible, i.e. when the function is
        # to be invoked, as that will avoid redundantly deserializing deferred functions for
        # concurrently running jobs when the cache state is loaded from disk. By implication we
        # should serialize as early as possible. We need to serialize the function as well as its
        # arguments.
        return cls(*list(map(dill.dumps, (function, args, kwargs))),
                   name=function.__name__,
                   module=ModuleDescriptor.forModule(function.__module__).globalize())
示例#13
0
文件: job.py 项目: benedictpaten/toil
 def __init__(self, service):
     """
     :type service: Job.Service
     """
     Job.__init__(self, memory=service.memory, cores=service.cores)
     # service.__module__ is the module defining the class service is an instance of.
     self.serviceModule = ModuleDescriptor.forModule(service.__module__).globalize()
     self.serviceClassName = service.__class__.__name__
     #The service to run, pickled
     self.pickledService = cPickle.dumps(service)
     #An empty file in the jobStore which when deleted is used to signal
     #that the service should cease, is initialised in 
     #Job._modifyJobGraphForServices
     self.stopFileStoreID = None
     #Similarly a empty file which when deleted is used to signal that the 
     #service is established
     self.startFileStoreID = None
示例#14
0
文件: job.py 项目: benedictpaten/toil
 def _loadJob(command, jobStore):
     """
     Unpickles a job.Job instance by decoding the command. See job.Job._serialiseFirstJob and
     job.Job._makeJobWrappers to see how the Job is encoded in the command. Essentially the
     command is a reference to a jobStoreFileID containing the pickle file for the job and a
     list of modules which must be imported so that the Job can be successfully unpickled.
     """
     commandTokens = command.split()
     assert "scriptTree" == commandTokens[0]
     userModule = ModuleDescriptor(*(commandTokens[3:]))
     Job._loadClass(commandTokens[2], userModule)
     pickleFile = commandTokens[1]
     if pickleFile == "firstJob":
         openFileStream = jobStore.readSharedFileStream( pickleFile )
     else:
         openFileStream = jobStore.readFileStream( pickleFile )
     with openFileStream as fileHandle:
         return cPickle.load( fileHandle )
示例#15
0
    def _test(self,
              module_name,
              shouldBelongToToil=False,
              expectedContents=None):
        module = ModuleDescriptor.forModule(module_name)
        # Assert basic attributes and properties
        self.assertEqual(module.belongsToToil, shouldBelongToToil)
        self.assertEquals(module.name, module_name)
        if shouldBelongToToil:
            self.assertTrue(module.dirPath.endswith('/src'))

        # Before the module is saved as a resource, localize() and globalize() are identity
        # methods. This should log warnings.
        self.assertIs(module.localize(), module)
        self.assertIs(module.globalize(), module)
        # Create a mock job store ...
        jobStore = MagicMock()
        # ... to generate a fake URL for the resource ...
        url = 'file://foo.zip'
        jobStore.getSharedPublicUrl.return_value = url
        # ... and save the resource to it.
        resource = module.saveAsResourceTo(jobStore)
        # Ensure that the URL generation method is actually called, ...
        jobStore.getSharedPublicUrl.assert_called_once_with(resource.pathHash)
        # ... and that ensure that writeSharedFileStream is called.
        jobStore.writeSharedFileStream.assert_called_once_with(
            resource.pathHash, isProtected=False)
        # Now it gets a bit complicated: Ensure that the context manager returned by the
        # jobStore's writeSharedFileStream() method is entered and that the file handle yielded
        # by the context manager is written to once with the zipped source tree from which
        # 'toil.resource' was orginally imported. Keep the zipped tree around such that we can
        # mock the download later.
        file_handle = jobStore.writeSharedFileStream.return_value.__enter__.return_value
        # The first 0 index selects the first call of write(), the second 0 selects positional
        # instead of keyword arguments, and the third 0 selects the first positional, i.e. the
        # contents. This is a bit brittle since it assumes that all the data is written in a
        # single call to write(). If more calls are made we can easily concatenate them.
        zipFile = file_handle.write.call_args_list[0][0][0]
        self.assertTrue(
            zipFile.startswith('PK'))  # the magic header for ZIP files

        # Check contents if requested
        if expectedContents is not None:
            with ZipFile(BytesIO(zipFile)) as _zipFile:
                self.assertEqual(set(_zipFile.namelist()), expectedContents)

        self.assertEquals(resource.url, url)
        # Now we're on the worker. Prepare the storage for localized resources
        Resource.prepareSystem()
        # Register the resource for subsequent lookup.
        resource.register()
        # Lookup the resource and ensure that the result is equal to but not the same as the
        # original resource. Lookup will also be used when we localize the module that was
        # originally used to create the resource.
        localResource = Resource.lookup(module._resourcePath)
        self.assertEquals(resource, localResource)
        self.assertIsNot(resource, localResource)
        # Now show that we can localize the module using the registered resource. Set up a mock
        # urlopen() that yields the zipped tree ...
        mock_urlopen = MagicMock()
        mock_urlopen.return_value.read.return_value = zipFile
        with patch('toil.resource.urlopen', mock_urlopen):
            # ... and use it to download and unpack the resource
            localModule = module.localize()
        # The name should be equal between original and localized resource ...
        self.assertEquals(module.name, localModule.name)
        # ... but the directory should be different.
        self.assertNotEquals(module.dirPath, localModule.dirPath)
        # Show that we can 'undo' localization. This is necessary when the user script's jobs are
        # invoked on the worker where they generate more child jobs.
        self.assertEquals(localModule.globalize(), module)
示例#16
0
    def _test(self, module_name,
              shouldBelongToToil=False, expectedContents=None, allowExtraContents=True):
        module = ModuleDescriptor.forModule(module_name)
        # Assert basic attributes and properties
        self.assertEqual(module.belongsToToil, shouldBelongToToil)
        self.assertEquals(module.name, module_name)
        if shouldBelongToToil:
            self.assertTrue(module.dirPath.endswith('/src'))

        # Before the module is saved as a resource, localize() and globalize() are identity
        # methods. This should log warnings.
        self.assertIs(module.localize(), module)
        self.assertIs(module.globalize(), module)
        # Create a mock job store ...
        jobStore = MagicMock()
        # ... to generate a fake URL for the resource ...
        url = 'file://foo.zip'
        jobStore.getSharedPublicUrl.return_value = url
        # ... and save the resource to it.
        resource = module.saveAsResourceTo(jobStore)
        # Ensure that the URL generation method is actually called, ...
        jobStore.getSharedPublicUrl.assert_called_once_with(resource.pathHash)
        # ... and that ensure that writeSharedFileStream is called.
        jobStore.writeSharedFileStream.assert_called_once_with(resource.pathHash,
                                                               isProtected=False)
        # Now it gets a bit complicated: Ensure that the context manager returned by the
        # jobStore's writeSharedFileStream() method is entered and that the file handle yielded
        # by the context manager is written to once with the zipped source tree from which
        # 'toil.resource' was orginally imported. Keep the zipped tree around such that we can
        # mock the download later.
        file_handle = jobStore.writeSharedFileStream.return_value.__enter__.return_value
        # The first 0 index selects the first call of write(), the second 0 selects positional
        # instead of keyword arguments, and the third 0 selects the first positional, i.e. the
        # contents. This is a bit brittle since it assumes that all the data is written in a
        # single call to write(). If more calls are made we can easily concatenate them.
        zipFile = file_handle.write.call_args_list[0][0][0]
        self.assertTrue(zipFile.startswith('PK'))  # the magic header for ZIP files

        # Check contents if requested
        if expectedContents is not None:
            with ZipFile(BytesIO(zipFile)) as _zipFile:
                actualContents = set(_zipFile.namelist())
                if allowExtraContents:
                    self.assertTrue(actualContents.issuperset(expectedContents))
                else:
                    self.assertEqual(actualContents, expectedContents)

        self.assertEquals(resource.url, url)
        # Now we're on the worker. Prepare the storage for localized resources
        Resource.prepareSystem()
        try:
            # Register the resource for subsequent lookup.
            resource.register()
            # Lookup the resource and ensure that the result is equal to but not the same as the
            # original resource. Lookup will also be used when we localize the module that was
            # originally used to create the resource.
            localResource = Resource.lookup(module._resourcePath)
            self.assertEquals(resource, localResource)
            self.assertIsNot(resource, localResource)
            # Now show that we can localize the module using the registered resource. Set up a mock
            # urlopen() that yields the zipped tree ...
            mock_urlopen = MagicMock()
            mock_urlopen.return_value.read.return_value = zipFile
            with patch('toil.resource.urlopen', mock_urlopen):
                # ... and use it to download and unpack the resource
                localModule = module.localize()
            # The name should be equal between original and localized resource ...
            self.assertEquals(module.name, localModule.name)
            # ... but the directory should be different.
            self.assertNotEquals(module.dirPath, localModule.dirPath)
            # Show that we can 'undo' localization. This is necessary when the user script's jobs
            #  are invoked on the worker where they generate more child jobs.
            self.assertEquals(localModule.globalize(), module)
        finally:
            Resource.cleanSystem()