def _mockJobDescription(self, jobStoreID=None, command=None, **kwargs): """ Create a mock-up JobDescription with the given ID, command, and other parameters. """ # TODO: Use a real unittest.Mock? For now we make a real instance and just hack it up. desc = JobDescription(**kwargs) # Normally we can't pass in a command or ID, and the job # serialization logic takes care of filling them in. We set them # here. if command is not None: desc.command = command if jobStoreID is not None: desc.jobStoreID = jobStoreID return desc
def testJobDescriptionSequencing(self): j = JobDescription(command='command', requirements={}, jobName='unimportant') j.addChild('child') j.addFollowOn('followOn') # With a command, nothing should be ready to run self.assertEqual(list(j.nextSuccessors()), []) # With command cleared, child should be ready to run j.command = None self.assertEqual(list(j.nextSuccessors()), ['child']) # Without the child, the follow-on should be ready to run j.filterSuccessors(lambda jID: jID != 'child') self.assertEqual(list(j.nextSuccessors()), ['followOn']) # Without the follow-on, we should return None, to be distinct from an # empty list. Nothing left to do! j.filterSuccessors(lambda jID: jID != 'followOn') self.assertEqual(j.nextSuccessors(), None)
def _buildToilState(self, jobDesc: JobDescription) -> None: """ Traverses tree of jobs down from the subtree root JobDescription (jobDesc), building the ToilState class. :param jobDesc: The description for the root job of the workflow being run. """ # If the job description has a command, is a checkpoint, has services # or is ready to be deleted it is ready to be processed (i.e. it is updated) if (jobDesc.command is not None or (isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None) or len(jobDesc.services) > 0 or jobDesc.nextSuccessors() is None): logger.debug( "Found job to run: %s, with command: %s, with checkpoint: %s, with " "services: %s, with no next successors: %s", jobDesc.jobStoreID, jobDesc.command is not None, isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None, len(jobDesc.services) > 0, jobDesc.nextSuccessors() is None, ) # Set the job updated because we should be able to make progress on it. self.bus.put(JobUpdatedMessage(str(jobDesc.jobStoreID), 0)) if isinstance(jobDesc, CheckpointJobDescription ) and jobDesc.checkpoint is not None: jobDesc.command = jobDesc.checkpoint else: # There exist successors logger.debug( "Adding job: %s to the state with %s successors", jobDesc.jobStoreID, len(jobDesc.nextSuccessors()), ) # Record the number of successors self.successorCounts[str(jobDesc.jobStoreID)] = len( jobDesc.nextSuccessors()) def processSuccessorWithMultiplePredecessors( successor: JobDescription) -> None: # If jobDesc is not reported as complete by the successor if jobDesc.jobStoreID not in successor.predecessorsFinished: # Update the successor's status to mark the predecessor complete successor.predecessorsFinished.add(jobDesc.jobStoreID) # If the successor has no predecessors to finish assert len(successor.predecessorsFinished ) <= successor.predecessorNumber if len(successor.predecessorsFinished ) == successor.predecessorNumber: # It is ready to be run, so remove it from the set of waiting jobs self.jobsToBeScheduledWithMultiplePredecessors.remove( successorJobStoreID) # Recursively consider the successor self._buildToilState(successor) # For each successor for successorJobStoreID in jobDesc.nextSuccessors(): # If the successor does not yet point back at a # predecessor we have not yet considered it if successorJobStoreID not in self.successor_to_predecessors: # Add the job as a predecessor self.successor_to_predecessors[successorJobStoreID] = { str(jobDesc.jobStoreID) } # We load the successor job successor = self.get_job(successorJobStoreID) # If predecessor number > 1 then the successor has multiple predecessors if successor.predecessorNumber > 1: # We put the successor job in the set of waiting successor # jobs with multiple predecessors assert successorJobStoreID not in self.jobsToBeScheduledWithMultiplePredecessors self.jobsToBeScheduledWithMultiplePredecessors.add( successorJobStoreID) # Process successor processSuccessorWithMultiplePredecessors(successor) else: # The successor has only this job as a predecessor so # recursively consider the successor self._buildToilState(successor) else: # We've already seen the successor # Add the job as a predecessor assert (jobDesc.jobStoreID not in self. successor_to_predecessors[successorJobStoreID]) self.successor_to_predecessors[successorJobStoreID].add( str(jobDesc.jobStoreID)) # If the successor has multiple predecessors if successorJobStoreID in self.jobsToBeScheduledWithMultiplePredecessors: # Get the successor from cache successor = self.get_job(successorJobStoreID) # Process successor processSuccessorWithMultiplePredecessors(successor)