def testJobDescription(self): """ Tests the public interface of a JobDescription. """ command = "by your command" memory = 2^32 disk = 2^32 cores = "1" preemptable = 1 j = JobDescription(command=command, requirements={"memory": memory, "cores": cores, "disk": disk, "preemptable": preemptable}, jobName='testJobGraph', unitName='noName') #Check attributes self.assertEqual(j.command, command) self.assertEqual(j.memory, memory) self.assertEqual(j.disk, disk) self.assertEqual(j.cores, int(cores)) self.assertEqual(j.preemptable, bool(preemptable)) self.assertEqual(type(j.jobStoreID), TemporaryID) self.assertEqual(list(j.successorsAndServiceHosts()), []) self.assertEqual(list(j.allSuccessors()), []) self.assertEqual(list(j.serviceHostIDsInBatches()), []) self.assertEqual(list(j.services), []) self.assertEqual(list(j.nextSuccessors()), []) self.assertEqual(sum((len(level) for level in j.stack)), 0) self.assertEqual(j.predecessorsFinished, set()) self.assertEqual(j.logJobStoreFileID, None) #Check equals function (should be based on object identity and not contents) j2 = JobDescription(command=command, requirements={"memory": memory, "cores": cores, "disk": disk, "preemptable": preemptable}, jobName='testJobGraph', unitName='noName') self.assertNotEqual(j, j2)
def testJobDescriptionSequencing(self): j = JobDescription(command='command', requirements={}, jobName='unimportant') j.addChild('child') j.addFollowOn('followOn') # With a command, nothing should be ready to run self.assertEqual(list(j.nextSuccessors()), []) # With command cleared, child should be ready to run j.command = None self.assertEqual(list(j.nextSuccessors()), ['child']) # Without the child, the follow-on should be ready to run j.filterSuccessors(lambda jID: jID != 'child') self.assertEqual(list(j.nextSuccessors()), ['followOn']) # Without the follow-on, we should return None, to be distinct from an # empty list. Nothing left to do! j.filterSuccessors(lambda jID: jID != 'followOn') self.assertEqual(j.nextSuccessors(), None)
def nextChainable(predecessor: JobDescription, jobStore: AbstractJobStore, config: Config) -> Optional[JobDescription]: """ Returns the next chainable job's JobDescription after the given predecessor JobDescription, if one exists, or None if the chain must terminate. :param predecessor: The job to chain from :param jobStore: The JobStore to fetch JobDescriptions from. :param config: The configuration for the current run. """ #If no more jobs to run or services not finished, quit if len(predecessor.stack) == 0 or len(predecessor.services) > 0 or ( isinstance(predecessor, CheckpointJobDescription) and predecessor.checkpoint != None): logger.debug( "Stopping running chain of jobs: length of stack: %s, services: %s, checkpoint: %s", len(predecessor.stack), len(predecessor.services), (isinstance(predecessor, CheckpointJobDescription) and predecessor.checkpoint != None)) return None if len(predecessor.stack) > 1 and len(predecessor.stack[-1]) > 0 and len( predecessor.stack[-2]) > 0: # TODO: Without a real stack list we can freely mutate, we can't chain # to a child, which may branch, and then go back and do the follow-ons # of the original job. # TODO: Go back to a free-form stack list and require some kind of # stack build phase? logger.debug( "Stopping running chain of jobs because job has both children and follow-ons" ) return None #Get the next set of jobs to run jobs = predecessor.nextSuccessors() if len(jobs) == 0: # If there are no jobs, we might just not have any children. logger.debug( "Stopping running chain of jobs because job has no ready children or follow-ons" ) return None #If there are 2 or more jobs to run in parallel we quit if len(jobs) >= 2: logger.debug( "No more jobs can run in series by this worker," " it's got %i children", len(jobs) - 1) return None # Grab the only job that should be there. successorID = next(iter(jobs)) # Load the successor JobDescription successor = jobStore.load(successorID) #We check the requirements of the successor to see if we can run it #within the current worker if successor.memory > predecessor.memory: logger.debug("We need more memory for the next job, so finishing") return None if successor.cores > predecessor.cores: logger.debug("We need more cores for the next job, so finishing") return None if successor.disk > predecessor.disk: logger.debug("We need more disk for the next job, so finishing") return None if successor.preemptable != predecessor.preemptable: logger.debug( "Preemptability is different for the next job, returning to the leader" ) return None if successor.predecessorNumber > 1: logger.debug( "The next job has multiple predecessors; we must return to the leader." ) return None if len(successor.services) > 0: logger.debug( "The next job requires services that will not yet be started; we must return to the leader." ) return None if isinstance(successor, CheckpointJobDescription): # Check if job is a checkpoint job and quit if so logger.debug("Next job is checkpoint, so finishing") return None # Made it through! This job is chainable. return successor
def _buildToilState(self, jobDesc: JobDescription) -> None: """ Traverses tree of jobs down from the subtree root JobDescription (jobDesc), building the ToilState class. :param jobDesc: The description for the root job of the workflow being run. """ # If the job description has a command, is a checkpoint, has services # or is ready to be deleted it is ready to be processed (i.e. it is updated) if (jobDesc.command is not None or (isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None) or len(jobDesc.services) > 0 or jobDesc.nextSuccessors() is None): logger.debug( "Found job to run: %s, with command: %s, with checkpoint: %s, with " "services: %s, with no next successors: %s", jobDesc.jobStoreID, jobDesc.command is not None, isinstance(jobDesc, CheckpointJobDescription) and jobDesc.checkpoint is not None, len(jobDesc.services) > 0, jobDesc.nextSuccessors() is None, ) # Set the job updated because we should be able to make progress on it. self.bus.put(JobUpdatedMessage(str(jobDesc.jobStoreID), 0)) if isinstance(jobDesc, CheckpointJobDescription ) and jobDesc.checkpoint is not None: jobDesc.command = jobDesc.checkpoint else: # There exist successors logger.debug( "Adding job: %s to the state with %s successors", jobDesc.jobStoreID, len(jobDesc.nextSuccessors()), ) # Record the number of successors self.successorCounts[str(jobDesc.jobStoreID)] = len( jobDesc.nextSuccessors()) def processSuccessorWithMultiplePredecessors( successor: JobDescription) -> None: # If jobDesc is not reported as complete by the successor if jobDesc.jobStoreID not in successor.predecessorsFinished: # Update the successor's status to mark the predecessor complete successor.predecessorsFinished.add(jobDesc.jobStoreID) # If the successor has no predecessors to finish assert len(successor.predecessorsFinished ) <= successor.predecessorNumber if len(successor.predecessorsFinished ) == successor.predecessorNumber: # It is ready to be run, so remove it from the set of waiting jobs self.jobsToBeScheduledWithMultiplePredecessors.remove( successorJobStoreID) # Recursively consider the successor self._buildToilState(successor) # For each successor for successorJobStoreID in jobDesc.nextSuccessors(): # If the successor does not yet point back at a # predecessor we have not yet considered it if successorJobStoreID not in self.successor_to_predecessors: # Add the job as a predecessor self.successor_to_predecessors[successorJobStoreID] = { str(jobDesc.jobStoreID) } # We load the successor job successor = self.get_job(successorJobStoreID) # If predecessor number > 1 then the successor has multiple predecessors if successor.predecessorNumber > 1: # We put the successor job in the set of waiting successor # jobs with multiple predecessors assert successorJobStoreID not in self.jobsToBeScheduledWithMultiplePredecessors self.jobsToBeScheduledWithMultiplePredecessors.add( successorJobStoreID) # Process successor processSuccessorWithMultiplePredecessors(successor) else: # The successor has only this job as a predecessor so # recursively consider the successor self._buildToilState(successor) else: # We've already seen the successor # Add the job as a predecessor assert (jobDesc.jobStoreID not in self. successor_to_predecessors[successorJobStoreID]) self.successor_to_predecessors[successorJobStoreID].add( str(jobDesc.jobStoreID)) # If the successor has multiple predecessors if successorJobStoreID in self.jobsToBeScheduledWithMultiplePredecessors: # Get the successor from cache successor = self.get_job(successorJobStoreID) # Process successor processSuccessorWithMultiplePredecessors(successor)