def testToilStats_SortSimple(self): """ Tests the toilStats utility using the scriptTree_sort example. """ for test in xrange(self.testNo): tempDir = getTempDirectory(os.getcwd()) tempFile = getTempFile(rootDir=tempDir) outputFile = getTempFile(rootDir=tempDir) toilDir = os.path.join(tempDir, "testToil") lines = 10000 maxLineLength = 10 N = 1000 makeFileToSort(tempFile, lines, maxLineLength) # Sort the file rootPath = os.path.join(toilPackageDirPath(), "test", "sort") system("{rootPath}/sort.py " "--toil {toilDir} " "--logLevel=DEBUG " "--fileToSort={tempFile} " "--N {N} --stats " "--jobTime 0.5 " "--retryCount 99".format(**locals())) # Now get the stats toilStats = self.getScriptPath('toilStats') system("{toilStats} " "--toil {toilDir} " "--outputFile {outputFile}".format(**locals())) # Cleanup system("rm -rf %s" % tempDir)
def setUp(self): super(UtilsTest, self).setUp() self.tempDir = self._createTempDir() self.tempFile = getTempFile(rootDir=self.tempDir) self.outputFile = getTempFile(rootDir=self.tempDir) self.toilDir = os.path.join(self.tempDir, "jobstore") self.assertFalse(os.path.exists(self.toilDir)) self.lines = 1000 self.lineLen = 10 self.N = 1000 makeFileToSort(self.tempFile, self.lines, self.lineLen) # First make our own sorted version with open(self.tempFile, "r") as fileHandle: self.correctSort = fileHandle.readlines() self.correctSort.sort()
def testEncapsulation(self): """ Tests the Job.encapsulation method, which uses the EncapsulationJob class. """ # Temporary file outFile = getTempFile(rootDir=self._createTempDir()) try: # Encapsulate a job graph a = T.wrapJobFn(encapsulatedJobFn, "A", outFile) a = a.encapsulate() # Now add children/follow to the encapsulated graph d = T.wrapFn(f, a.rv(), outFile) e = T.wrapFn(f, d.rv(), outFile) a.addChild(d) a.addFollowOn(e) # Create the runner for the workflow. options = T.Runner.getDefaultOptions(self._getTestJobStorePath()) options.logLevel = "INFO" # Run the workflow, the return value being the number of failed jobs T.Runner.startToil(a, options) # Check output self.assertEquals(open(outFile, 'r').readline(), "ABCDE") finally: os.remove(outFile)
def testServiceDeadlock(self): """ Creates a job with more services than maxServices, checks that deadlock is detected. """ outFile = getTempFile(rootDir=self._createTempDir()) try: def makeWorkflow(): job = Job() r1 = job.addService(TestServiceSerialization("woot1")) r2 = job.addService(TestServiceSerialization("woot2")) r3 = job.addService(TestServiceSerialization("woot3")) job.addChildFn(fnTest, [ r1, r2, r3 ], outFile) return job # This should fail as too few services available try: self.runToil(makeWorkflow(), badWorker=0.0, maxServiceJobs=2, deadlockWait=5) except DeadlockException: print "Got expected deadlock exception" else: assert 0 # This should pass, as adequate services available self.runToil(makeWorkflow(), maxServiceJobs=3) # Check we get expected output assert open(outFile, 'r').read() == "woot1 woot2 woot3" finally: os.remove(outFile)
def testEncapsulation(self): """ Tests the Job.encapsulation method, which uses the EncapsulationJob class. """ #Temporary file outFile = getTempFile(rootDir=os.getcwd()) #Make a job graph a = T.wrapFn(f, "A", outFile) b = a.addChildFn(f, a.rv(), outFile) c = a.addFollowOnFn(f, b.rv(), outFile) #Encapsulate it a = a.encapsulate() #Now add children/follow to the encapsulated graph d = T.wrapFn(f, c.rv(), outFile) e = T.wrapFn(f, d.rv(), outFile) a.addChild(d) a.addFollowOn(e) #Create the runner for the workflow. options = T.Runner.getDefaultOptions() options.logLevel = "INFO" #Run the workflow, the return value being the number of failed jobs self.assertEquals(T.Runner.startToil(a, options), 0) T.Runner.cleanup(options) #This removes the jobStore #Check output self.assertEquals(open(outFile, 'r').readline(), "ABCDE") #Cleanup os.remove(outFile)
def issueBatchJob(self, command, memory, cores, disk, preemptable): """ Issues parasol with job commands. """ self.checkResourceRequest(memory, cores, disk) MiB = 1 << 20 truncatedMemory = (memory / MiB) * MiB # Look for a batch for jobs with these resource requirements, with # the memory rounded down to the nearest megabyte. Rounding down # meams the new job can't ever decrease the memory requirements # of jobs already in the batch. if len(self.resultsFiles) >= self.maxBatches: raise RuntimeError( 'Number of batches reached limit of %i' % self.maxBatches) try: results = self.resultsFiles[(truncatedMemory, cores)] except KeyError: results = getTempFile(rootDir=self.parasolResultsDir) self.resultsFiles[(truncatedMemory, cores)] = results # Prefix the command with environment overrides, optionally looking them up from the # current environment if the value is None command = ' '.join(concat('env', self.__environment(), command)) parasolCommand = ['-verbose', '-ram=%i' % memory, '-cpu=%i' % cores, '-results=' + results, 'add', 'job', command] # Deal with the cpus self.usedCpus += cores while True: # Process finished results with no wait try: jobID = self.cpuUsageQueue.get_nowait() except Empty: break if jobID in self.jobIDsToCpu.keys(): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 while self.usedCpus > self.maxCores: # If we are still waiting jobID = self.cpuUsageQueue.get() if jobID in self.jobIDsToCpu.keys(): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 # Now keep going while True: line = self._runParasol(parasolCommand)[1][0] match = self.parasolOutputPattern.match(line) if match is None: # This is because parasol add job will return success, even if the job was not # properly issued! logger.info('We failed to properly add the job, we will try again after a 5s.') time.sleep(5) else: jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = cores self.runningJobs.add(jobID) logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line)) return jobID
def testService(self, checkpoint=False): """ Tests the creation of a Job.Service with random failures of the worker. """ for test in xrange(2): outFile = getTempFile(rootDir=self._createTempDir()) # Temporary file messageInt = random.randint(1, sys.maxint) try: # Wire up the services/jobs t = Job.wrapJobFn(serviceTest, outFile, messageInt, checkpoint=checkpoint) # Run the workflow repeatedly until success self.runToil(t) # Check output self.assertEquals(int(open(outFile, 'r').readline()), messageInt) finally: os.remove(outFile)
def testService(self): """ Tests the creation of a Job.Service. """ # Temporary file outFile = getTempFile(rootDir=self._createTempDir()) try: # Wire up the services/jobs t = Job.wrapFn(f, "1", outFile) t.addChildFn(f, t.addService(TestService("2", "3", outFile)), outFile) # Create the runner for the workflow. options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.logLevel = "INFO" # Run the workflow, the return value being the number of failed jobs Job.Runner.startToil(t, options) # Check output self.assertEquals(open(outFile, 'r').readline(), "123") finally: os.remove(outFile)
def testStatic(self): """ Create a DAG of jobs non-dynamically and run it. DAG is: A -> F \------- B -> D \ \ \ ------- C -> E Follow on is marked by -> """ outFile = getTempFile(rootDir=self._createTempDir()) try: # Create the jobs A = Job.wrapFn(fn1Test, "A", outFile) B = Job.wrapFn(fn1Test, A.rv(), outFile) C = Job.wrapFn(fn1Test, B.rv(), outFile) D = Job.wrapFn(fn1Test, C.rv(), outFile) E = Job.wrapFn(fn1Test, D.rv(), outFile) F = Job.wrapFn(fn1Test, E.rv(), outFile) # Connect them into a workflow A.addChild(B) A.addChild(C) B.addChild(C) B.addFollowOn(E) C.addFollowOn(D) A.addFollowOn(F) # Create the runner for the workflow. options = Job.Runner.getDefaultOptions(self._getTestJobStorePath()) options.logLevel = "INFO" options.retryCount = 100 options.badWorker = 0.5 options.badWorkerFailInterval = 0.01 # Run the workflow, the return value being the number of failed jobs Job.Runner.startToil(A, options) # Check output self.assertEquals(open(outFile, 'r').readline(), "ABCDEFG") finally: os.remove(outFile)
def testStatic(self): """ Create a DAG of jobs non-dynamically and run it. DAG is: A -> F \------- B -> D \ \ \ ------- C -> E Follow on is marked by -> """ #Temporary file outFile = getTempFile(rootDir=os.getcwd()) #Create the jobs A = Job.wrapFn(f, "A", outFile) B = Job.wrapFn(f, A.rv(0), outFile) C = Job.wrapFn(f, B.rv(0), outFile) D = Job.wrapFn(f, C.rv(0), outFile) E = Job.wrapFn(f, D.rv(0), outFile) F = Job.wrapFn(f, E.rv(0), outFile) #Connect them into a workflow A.addChild(B) A.addChild(C) B.addChild(C) B.addFollowOn(E) C.addFollowOn(D) A.addFollowOn(F) #Create the runner for the workflow. options = Job.Runner.getDefaultOptions() options.logLevel = "INFO" #Run the workflow, the return value being the number of failed jobs self.assertEquals(Job.Runner.startToil(A, options), 0) Job.Runner.cleanup(options) #This removes the jobStore #Check output self.assertEquals(open(outFile, 'r').readline(), "ABCDEF") #Cleanup os.remove(outFile)
def testEvaluatingRandomDAG(self): """ Randomly generate test input then check that the ordering of the running respected the constraints. """ jobStore = self._getTestJobStorePath() for test in xrange(30): # Temporary file outFile = getTempFile(rootDir=os.getcwd()) # Make a random DAG for the set of child edges nodeNumber = random.choice(xrange(2, 20)) childEdges = self.makeRandomDAG(nodeNumber) # Get an adjacency list representation and check is acyclic adjacencyList = self.getAdjacencyList(nodeNumber, childEdges) self.assertTrue(self.isAcyclic(adjacencyList)) # Add in follow on edges - these are returned as a list, and as a set # of augmented edges in the adjacency list followOnEdges = self.addRandomFollowOnEdges(adjacencyList) self.assertTrue(self.isAcyclic(adjacencyList)) # Make the job graph rootJob = self.makeJobGraph(nodeNumber, childEdges, followOnEdges, outFile) # Run the job graph options = Job.Runner.getDefaultOptions() options.jobStore = "%s.%i" % (jobStore, test) Job.Runner.startToil(rootJob, options) # Get the ordering add the implied ordering to the graph with open(outFile, 'r') as fH: ordering = map(int, fH.readline().split()) # Check all the jobs were run self.assertEquals(set(ordering), set(xrange(nodeNumber))) # Add the ordering to the graph for i in xrange(nodeNumber - 1): adjacencyList[ordering[i]].add(ordering[i + 1]) # Check the ordering retains an acyclic graph if not self.isAcyclic(adjacencyList): print "ORDERING", ordering print "CHILD EDGES", childEdges print "FOLLOW ON EDGES", followOnEdges print "ADJACENCY LIST", adjacencyList self.assertTrue(self.isAcyclic(adjacencyList)) # Cleanup os.remove(outFile)
def testServiceRecursive(self, checkpoint=True): """ Tests the creation of a Job.Service, creating a chain of services and accessing jobs. Randomly fails the worker. """ for test in xrange(1): # Temporary file outFile = getTempFile(rootDir=self._createTempDir()) messages = [ random.randint(1, sys.maxint) for i in xrange(3) ] try: # Wire up the services/jobs t = Job.wrapJobFn(serviceTestRecursive, outFile, messages, checkpoint=checkpoint) # Run the workflow repeatedly until success self.runToil(t) # Check output self.assertEquals(map(int, open(outFile, 'r').readlines()), messages) finally: os.remove(outFile)
def testServiceParallelRecursive(self, checkpoint=True): """ Tests the creation of a Job.Service, creating parallel chains of services and accessing jobs. Randomly fails the worker. """ for test in xrange(1): # Temporary file outFiles = [ getTempFile(rootDir=self._createTempDir()) for j in xrange(2) ] messageBundles = [ [ random.randint(1, sys.maxint) for i in xrange(3) ] for j in xrange(2) ] try: # Wire up the services/jobs t = Job.wrapJobFn(serviceTestParallelRecursive, outFiles, messageBundles, checkpoint=True) # Run the workflow repeatedly until success self.runToil(t, retryCount=2) # Check output for (messages, outFile) in zip(messageBundles, outFiles): self.assertEquals(map(int, open(outFile, 'r').readlines()), messages) finally: map(os.remove, outFiles)
def main(): parser = ArgumentParser() Job.Runner.addToilOptions(parser) addCactusWorkflowOptions(parser) parser.add_argument("seqFile", help = "Seq file") parser.add_argument("outputHal", type=str, help = "Output HAL file") #Progressive Cactus Options parser.add_argument("--database", dest="database", help="Database type: tokyo_cabinet or kyoto_tycoon" " [default: %(default)s]", default="kyoto_tycoon") parser.add_argument("--configFile", dest="configFile", help="Specify cactus configuration file", default=None) parser.add_argument("--root", dest="root", help="Name of ancestral node (which" " must appear in NEWICK tree in <seqfile>) to use as a " "root for the alignment. Any genomes not below this node " "in the tree may be used as outgroups but will never appear" " in the output. If no root is specifed then the root" " of the tree is used. ", default=None) parser.add_argument("--latest", dest="latest", action="store_true", help="Use the latest version of the docker container " "rather than pulling one matching this version of cactus") parser.add_argument("--containerImage", dest="containerImage", default=None, help="Use the the specified pre-built containter image " "rather than pulling one from quay.io") parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"], help="The way to run the Cactus binaries", default=None) options = parser.parse_args() setupBinaries(options) setLoggingFromOptions(options) # Mess with some toil options to create useful defaults. # Caching generally slows down the cactus workflow, plus some # methods like readGlobalFileStream don't support forced # reads directly from the job store rather than from cache. options.disableCaching = True # Job chaining breaks service termination timing, causing unused # databases to accumulate and waste memory for no reason. options.disableChaining = True # The default deadlockWait is currently 60 seconds. This can cause # issues if the database processes take a while to actually begin # after they're issued. Change it to at least an hour so that we # don't preemptively declare a deadlock. if options.deadlockWait is None or options.deadlockWait < 3600: options.deadlockWait = 3600 if options.retryCount is None: # If the user didn't specify a retryCount value, make it 5 # instead of Toil's default (1). options.retryCount = 5 with Toil(options) as toil: importSingularityImage() #Run the workflow if options.restart: halID = toil.restart() else: options.cactusDir = getTempDirectory() #Create the progressive cactus project projWrapper = ProjectWrapper(options) projWrapper.writeXml() pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName, '%s_project.xml' % ProjectWrapper.alignmentDirName) assert os.path.exists(pjPath) project = MultiCactusProject() if not os.path.isdir(options.cactusDir): os.makedirs(options.cactusDir) project.readXML(pjPath) #import the sequences seqIDs = [] print "Importing %s sequences" % (len(project.getInputSequencePaths())) for seq in project.getInputSequencePaths(): if os.path.isdir(seq): tmpSeq = getTempFile() catFiles([os.path.join(seq, subSeq) for subSeq in os.listdir(seq)], tmpSeq) seq = tmpSeq seq = makeURL(seq) seqIDs.append(toil.importFile(seq)) project.setInputSequenceIDs(seqIDs) #import cactus config if options.configFile: cactusConfigID = toil.importFile(makeURL(options.configFile)) else: cactusConfigID = toil.importFile(makeURL(project.getConfigPath())) project.setConfigID(cactusConfigID) project.syncToFileStore(toil) configNode = ET.parse(project.getConfigPath()).getroot() configWrapper = ConfigWrapper(configNode) configWrapper.substituteAllPredefinedConstantsWithLiterals() project.writeXML(pjPath) halID = toil.start(RunCactusPreprocessorThenProgressiveDown(options, project, memory=configWrapper.getDefaultMemory())) toil.exportFile(halID, makeURL(options.outputHal))
def issueBatchJob(self, jobNode): """ Issues parasol with job commands. """ self.checkResourceRequest(jobNode.memory, jobNode.cores, jobNode.disk) MiB = 1 << 20 truncatedMemory = (jobNode.memory / MiB) * MiB # Look for a batch for jobs with these resource requirements, with # the memory rounded down to the nearest megabyte. Rounding down # meams the new job can't ever decrease the memory requirements # of jobs already in the batch. if len(self.resultsFiles) >= self.maxBatches: raise RuntimeError('Number of batches reached limit of %i' % self.maxBatches) try: results = self.resultsFiles[(truncatedMemory, jobNode.cores)] except KeyError: results = getTempFile(rootDir=self.parasolResultsDir) self.resultsFiles[(truncatedMemory, jobNode.cores)] = results # Prefix the command with environment overrides, optionally looking them up from the # current environment if the value is None command = ' '.join(concat('env', self.__environment(), jobNode.command)) parasolCommand = [ '-verbose', '-ram=%i' % jobNode.memory, '-cpu=%i' % jobNode.cores, '-results=' + results, 'add', 'job', command ] # Deal with the cpus self.usedCpus += jobNode.cores while True: # Process finished results with no wait try: jobID = self.cpuUsageQueue.get_nowait() except Empty: break if jobID in self.jobIDsToCpu.keys(): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 while self.usedCpus > self.maxCores: # If we are still waiting jobID = self.cpuUsageQueue.get() if jobID in self.jobIDsToCpu.keys(): self.usedCpus -= self.jobIDsToCpu.pop(jobID) assert self.usedCpus >= 0 # Now keep going while True: line = self._runParasol(parasolCommand)[1][0] match = self.parasolOutputPattern.match(line) if match is None: # This is because parasol add job will return success, even if the job was not # properly issued! logger.debug( 'We failed to properly add the job, we will try again after a 5s.' ) time.sleep(5) else: jobID = int(match.group(1)) self.jobIDsToCpu[jobID] = jobNode.cores self.runningJobs.add(jobID) logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line)) return jobID