Пример #1
0
 def testToilStats_SortSimple(self):
     """
     Tests the toilStats utility using the scriptTree_sort example.
     """
     for test in xrange(self.testNo):
         tempDir = getTempDirectory(os.getcwd())
         tempFile = getTempFile(rootDir=tempDir)
         outputFile = getTempFile(rootDir=tempDir)
         toilDir = os.path.join(tempDir, "testToil")
         lines = 10000
         maxLineLength = 10
         N = 1000
         makeFileToSort(tempFile, lines, maxLineLength)
         # Sort the file
         rootPath = os.path.join(toilPackageDirPath(), "test", "sort")
         system("{rootPath}/sort.py "
                "--toil {toilDir} "
                "--logLevel=DEBUG "
                "--fileToSort={tempFile} "
                "--N {N} --stats "
                "--jobTime 0.5 "
                "--retryCount 99".format(**locals()))
         # Now get the stats
         toilStats = self.getScriptPath('toilStats')
         system("{toilStats} "
                "--toil {toilDir} "
                "--outputFile {outputFile}".format(**locals()))
         # Cleanup
         system("rm -rf %s" % tempDir)
Пример #2
0
 def setUp(self):
     super(UtilsTest, self).setUp()
     self.tempDir = self._createTempDir()
     self.tempFile = getTempFile(rootDir=self.tempDir)
     self.outputFile = getTempFile(rootDir=self.tempDir)
     self.toilDir = os.path.join(self.tempDir, "jobstore")
     self.assertFalse(os.path.exists(self.toilDir))
     self.lines = 1000
     self.lineLen = 10
     self.N = 1000
     makeFileToSort(self.tempFile, self.lines, self.lineLen)
     # First make our own sorted version
     with open(self.tempFile, "r") as fileHandle:
         self.correctSort = fileHandle.readlines()
         self.correctSort.sort()
Пример #3
0
 def testEncapsulation(self):
     """
     Tests the Job.encapsulation method, which uses the EncapsulationJob
     class.
     """
     # Temporary file
     outFile = getTempFile(rootDir=self._createTempDir())
     try:
         # Encapsulate a job graph
         a = T.wrapJobFn(encapsulatedJobFn, "A", outFile)
         a = a.encapsulate()
         # Now add children/follow to the encapsulated graph
         d = T.wrapFn(f, a.rv(), outFile)
         e = T.wrapFn(f, d.rv(), outFile)
         a.addChild(d)
         a.addFollowOn(e)
         # Create the runner for the workflow.
         options = T.Runner.getDefaultOptions(self._getTestJobStorePath())
         options.logLevel = "INFO"
         # Run the workflow, the return value being the number of failed jobs
         T.Runner.startToil(a, options)
         # Check output
         self.assertEquals(open(outFile, 'r').readline(), "ABCDE")
     finally:
         os.remove(outFile)
Пример #4
0
 def testServiceDeadlock(self):
     """
     Creates a job with more services than maxServices, checks that deadlock is detected.
     """
     outFile = getTempFile(rootDir=self._createTempDir())
     try:
         def makeWorkflow():
             job = Job()
             r1 = job.addService(TestServiceSerialization("woot1"))
             r2 = job.addService(TestServiceSerialization("woot2"))
             r3 = job.addService(TestServiceSerialization("woot3"))
             job.addChildFn(fnTest, [ r1, r2, r3 ], outFile)
             return job
         
         # This should fail as too few services available
         try:
             self.runToil(makeWorkflow(), badWorker=0.0, maxServiceJobs=2, deadlockWait=5)
         except DeadlockException:
             print "Got expected deadlock exception"
         else:
             assert 0
             
         # This should pass, as adequate services available
         self.runToil(makeWorkflow(), maxServiceJobs=3)
         # Check we get expected output 
         assert open(outFile, 'r').read() == "woot1 woot2 woot3"
     finally:
         os.remove(outFile)
Пример #5
0
 def testEncapsulation(self):
     """
     Tests the Job.encapsulation method, which uses the EncapsulationJob
     class.
     """
     #Temporary file
     outFile = getTempFile(rootDir=os.getcwd())
     #Make a job graph
     a = T.wrapFn(f, "A", outFile)
     b = a.addChildFn(f, a.rv(), outFile)
     c = a.addFollowOnFn(f, b.rv(), outFile)
     #Encapsulate it
     a = a.encapsulate()
     #Now add children/follow to the encapsulated graph
     d = T.wrapFn(f, c.rv(), outFile)
     e = T.wrapFn(f, d.rv(), outFile)
     a.addChild(d)
     a.addFollowOn(e)
     #Create the runner for the workflow.
     options = T.Runner.getDefaultOptions()
     options.logLevel = "INFO"
     #Run the workflow, the return value being the number of failed jobs
     self.assertEquals(T.Runner.startToil(a, options), 0)
     T.Runner.cleanup(options) #This removes the jobStore
     #Check output
     self.assertEquals(open(outFile, 'r').readline(), "ABCDE")
     #Cleanup
     os.remove(outFile)
Пример #6
0
    def issueBatchJob(self, command, memory, cores, disk, preemptable):
        """
        Issues parasol with job commands.
        """
        self.checkResourceRequest(memory, cores, disk)

        MiB = 1 << 20
        truncatedMemory = (memory / MiB) * MiB
        # Look for a batch for jobs with these resource requirements, with
        # the memory rounded down to the nearest megabyte. Rounding down
        # meams the new job can't ever decrease the memory requirements
        # of jobs already in the batch.
        if len(self.resultsFiles) >= self.maxBatches:
            raise RuntimeError( 'Number of batches reached limit of %i' % self.maxBatches)
        try:
            results = self.resultsFiles[(truncatedMemory, cores)]
        except KeyError:
            results = getTempFile(rootDir=self.parasolResultsDir)
            self.resultsFiles[(truncatedMemory, cores)] = results

        # Prefix the command with environment overrides, optionally looking them up from the
        # current environment if the value is None
        command = ' '.join(concat('env', self.__environment(), command))
        parasolCommand = ['-verbose',
                          '-ram=%i' % memory,
                          '-cpu=%i' % cores,
                          '-results=' + results,
                          'add', 'job', command]
        # Deal with the cpus
        self.usedCpus += cores
        while True:  # Process finished results with no wait
            try:
                jobID = self.cpuUsageQueue.get_nowait()
            except Empty:
                break
            if jobID in self.jobIDsToCpu.keys():
                self.usedCpus -= self.jobIDsToCpu.pop(jobID)
            assert self.usedCpus >= 0
        while self.usedCpus > self.maxCores:  # If we are still waiting
            jobID = self.cpuUsageQueue.get()
            if jobID in self.jobIDsToCpu.keys():
                self.usedCpus -= self.jobIDsToCpu.pop(jobID)
            assert self.usedCpus >= 0
        # Now keep going
        while True:
            line = self._runParasol(parasolCommand)[1][0]
            match = self.parasolOutputPattern.match(line)
            if match is None:
                # This is because parasol add job will return success, even if the job was not
                # properly issued!
                logger.info('We failed to properly add the job, we will try again after a 5s.')
                time.sleep(5)
            else:
                jobID = int(match.group(1))
                self.jobIDsToCpu[jobID] = cores
                self.runningJobs.add(jobID)
                logger.debug("Got the parasol job id: %s from line: %s" % (jobID, line))
                return jobID
Пример #7
0
    def testService(self, checkpoint=False):
        """
        Tests the creation of a Job.Service with random failures of the worker.
        """
        for test in xrange(2):
            outFile = getTempFile(rootDir=self._createTempDir()) # Temporary file
            messageInt = random.randint(1, sys.maxint)
            try:
                # Wire up the services/jobs
                t = Job.wrapJobFn(serviceTest, outFile, messageInt, checkpoint=checkpoint)

                # Run the workflow repeatedly until success
                self.runToil(t)

                # Check output
                self.assertEquals(int(open(outFile, 'r').readline()), messageInt)
            finally:
                os.remove(outFile)
Пример #8
0
 def testService(self):
     """
     Tests the creation of a Job.Service.
     """
     # Temporary file
     outFile = getTempFile(rootDir=self._createTempDir())
     try:
         # Wire up the services/jobs
         t = Job.wrapFn(f, "1", outFile)
         t.addChildFn(f, t.addService(TestService("2", "3", outFile)), outFile)
         # Create the runner for the workflow.
         options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
         options.logLevel = "INFO"
         # Run the workflow, the return value being the number of failed jobs
         Job.Runner.startToil(t, options)
         # Check output
         self.assertEquals(open(outFile, 'r').readline(), "123")
     finally:
         os.remove(outFile)
Пример #9
0
    def testStatic(self):
        """
        Create a DAG of jobs non-dynamically and run it. DAG is:
        
        A -> F
        \-------
        B -> D  \ 
         \       \
          ------- C -> E
          
        Follow on is marked by ->
        """
        outFile = getTempFile(rootDir=self._createTempDir())
        try:

            # Create the jobs
            A = Job.wrapFn(fn1Test, "A", outFile)
            B = Job.wrapFn(fn1Test, A.rv(), outFile)
            C = Job.wrapFn(fn1Test, B.rv(), outFile)
            D = Job.wrapFn(fn1Test, C.rv(), outFile)
            E = Job.wrapFn(fn1Test, D.rv(), outFile)
            F = Job.wrapFn(fn1Test, E.rv(), outFile)
            # Connect them into a workflow
            A.addChild(B)
            A.addChild(C)
            B.addChild(C)
            B.addFollowOn(E)
            C.addFollowOn(D)
            A.addFollowOn(F)

            # Create the runner for the workflow.
            options = Job.Runner.getDefaultOptions(self._getTestJobStorePath())
            options.logLevel = "INFO"
            options.retryCount = 100
            options.badWorker = 0.5
            options.badWorkerFailInterval = 0.01
            # Run the workflow, the return value being the number of failed jobs
            Job.Runner.startToil(A, options)

            # Check output
            self.assertEquals(open(outFile, 'r').readline(), "ABCDEFG")
        finally:
            os.remove(outFile)
Пример #10
0
 def testStatic(self):
     """
     Create a DAG of jobs non-dynamically and run it. DAG is:
     
     A -> F
     \-------
     B -> D  \ 
      \       \
       ------- C -> E
       
     Follow on is marked by ->
     """
     #Temporary file
     outFile = getTempFile(rootDir=os.getcwd())
     
     #Create the jobs
     A = Job.wrapFn(f, "A", outFile)
     B = Job.wrapFn(f, A.rv(0), outFile)
     C = Job.wrapFn(f, B.rv(0), outFile)
     D = Job.wrapFn(f, C.rv(0), outFile)
     E = Job.wrapFn(f, D.rv(0), outFile)
     F = Job.wrapFn(f, E.rv(0), outFile)
     
     #Connect them into a workflow
     A.addChild(B)
     A.addChild(C)
     B.addChild(C)
     B.addFollowOn(E)
     C.addFollowOn(D)
     A.addFollowOn(F)
     
     #Create the runner for the workflow.
     options = Job.Runner.getDefaultOptions()
     options.logLevel = "INFO"
     #Run the workflow, the return value being the number of failed jobs
     self.assertEquals(Job.Runner.startToil(A, options), 0)
     Job.Runner.cleanup(options) #This removes the jobStore
     
     #Check output
     self.assertEquals(open(outFile, 'r').readline(), "ABCDEF")
     
     #Cleanup
     os.remove(outFile)
Пример #11
0
 def testEvaluatingRandomDAG(self):
     """
     Randomly generate test input then check that the ordering of the running
     respected the constraints.
     """
     jobStore = self._getTestJobStorePath()
     for test in xrange(30):
         # Temporary file
         outFile = getTempFile(rootDir=os.getcwd())
         # Make a random DAG for the set of child edges
         nodeNumber = random.choice(xrange(2, 20))
         childEdges = self.makeRandomDAG(nodeNumber)
         # Get an adjacency list representation and check is acyclic
         adjacencyList = self.getAdjacencyList(nodeNumber, childEdges)
         self.assertTrue(self.isAcyclic(adjacencyList))
         # Add in follow on edges - these are returned as a list, and as a set
         # of augmented edges in the adjacency list
         followOnEdges = self.addRandomFollowOnEdges(adjacencyList)
         self.assertTrue(self.isAcyclic(adjacencyList))
         # Make the job graph
         rootJob = self.makeJobGraph(nodeNumber, childEdges, followOnEdges, outFile)
         # Run the job  graph
         options = Job.Runner.getDefaultOptions()
         options.jobStore = "%s.%i" % (jobStore, test)
         Job.Runner.startToil(rootJob, options)
         # Get the ordering add the implied ordering to the graph
         with open(outFile, 'r') as fH:
             ordering = map(int, fH.readline().split())
         # Check all the jobs were run
         self.assertEquals(set(ordering), set(xrange(nodeNumber)))
         # Add the ordering to the graph
         for i in xrange(nodeNumber - 1):
             adjacencyList[ordering[i]].add(ordering[i + 1])
         # Check the ordering retains an acyclic graph
         if not self.isAcyclic(adjacencyList):
             print "ORDERING", ordering
             print "CHILD EDGES", childEdges
             print "FOLLOW ON EDGES", followOnEdges
             print "ADJACENCY LIST", adjacencyList
         self.assertTrue(self.isAcyclic(adjacencyList))
         # Cleanup
         os.remove(outFile)
Пример #12
0
    def testServiceRecursive(self, checkpoint=True):
        """
        Tests the creation of a Job.Service, creating a chain of services and accessing jobs.
        Randomly fails the worker.
        """
        for test in xrange(1):
            # Temporary file
            outFile = getTempFile(rootDir=self._createTempDir())
            messages = [ random.randint(1, sys.maxint) for i in xrange(3) ]
            try:
                # Wire up the services/jobs
                t = Job.wrapJobFn(serviceTestRecursive, outFile, messages, checkpoint=checkpoint)

                # Run the workflow repeatedly until success
                self.runToil(t)

                # Check output
                self.assertEquals(map(int, open(outFile, 'r').readlines()), messages)
            finally:
                os.remove(outFile)
Пример #13
0
    def testServiceParallelRecursive(self, checkpoint=True):
        """
        Tests the creation of a Job.Service, creating parallel chains of services and accessing jobs.
        Randomly fails the worker.
        """
        for test in xrange(1):
            # Temporary file
            outFiles = [ getTempFile(rootDir=self._createTempDir()) for j in xrange(2) ]
            messageBundles = [ [ random.randint(1, sys.maxint) for i in xrange(3) ] for j in xrange(2) ]
            try:
                # Wire up the services/jobs
                t = Job.wrapJobFn(serviceTestParallelRecursive, outFiles, messageBundles, checkpoint=True)

                # Run the workflow repeatedly until success
                self.runToil(t, retryCount=2)

                # Check output
                for (messages, outFile) in zip(messageBundles, outFiles):
                    self.assertEquals(map(int, open(outFile, 'r').readlines()), messages)
            finally:
                map(os.remove, outFiles)
Пример #14
0
def main():
    parser = ArgumentParser()
    Job.Runner.addToilOptions(parser)
    addCactusWorkflowOptions(parser)

    parser.add_argument("seqFile", help = "Seq file")
    parser.add_argument("outputHal", type=str, help = "Output HAL file")

    #Progressive Cactus Options
    parser.add_argument("--database", dest="database",
                      help="Database type: tokyo_cabinet or kyoto_tycoon"
                      " [default: %(default)s]",
                      default="kyoto_tycoon")
    parser.add_argument("--configFile", dest="configFile",
                      help="Specify cactus configuration file",
                      default=None)
    parser.add_argument("--root", dest="root", help="Name of ancestral node (which"
                      " must appear in NEWICK tree in <seqfile>) to use as a "
                      "root for the alignment.  Any genomes not below this node "
                      "in the tree may be used as outgroups but will never appear"
                      " in the output.  If no root is specifed then the root"
                      " of the tree is used. ", default=None)   
    parser.add_argument("--latest", dest="latest", action="store_true",
                        help="Use the latest version of the docker container "
                        "rather than pulling one matching this version of cactus")
    parser.add_argument("--containerImage", dest="containerImage", default=None,
                        help="Use the the specified pre-built containter image "
                        "rather than pulling one from quay.io")
    parser.add_argument("--binariesMode", choices=["docker", "local", "singularity"],
                        help="The way to run the Cactus binaries", default=None)

    options = parser.parse_args()

    setupBinaries(options)
    setLoggingFromOptions(options)

    # Mess with some toil options to create useful defaults.

    # Caching generally slows down the cactus workflow, plus some
    # methods like readGlobalFileStream don't support forced
    # reads directly from the job store rather than from cache.
    options.disableCaching = True
    # Job chaining breaks service termination timing, causing unused
    # databases to accumulate and waste memory for no reason.
    options.disableChaining = True
    # The default deadlockWait is currently 60 seconds. This can cause
    # issues if the database processes take a while to actually begin
    # after they're issued. Change it to at least an hour so that we
    # don't preemptively declare a deadlock.
    if options.deadlockWait is None or options.deadlockWait < 3600:
        options.deadlockWait = 3600
    if options.retryCount is None:
        # If the user didn't specify a retryCount value, make it 5
        # instead of Toil's default (1).
        options.retryCount = 5

    with Toil(options) as toil:
        importSingularityImage()
        #Run the workflow
        if options.restart:
            halID = toil.restart()
        else:
            options.cactusDir = getTempDirectory()
            #Create the progressive cactus project 
            projWrapper = ProjectWrapper(options)
            projWrapper.writeXml()

            pjPath = os.path.join(options.cactusDir, ProjectWrapper.alignmentDirName,
                                  '%s_project.xml' % ProjectWrapper.alignmentDirName)
            assert os.path.exists(pjPath)

            project = MultiCactusProject()

            if not os.path.isdir(options.cactusDir):
                os.makedirs(options.cactusDir)

            project.readXML(pjPath)
            #import the sequences
            seqIDs = []
            print "Importing %s sequences" % (len(project.getInputSequencePaths()))
            for seq in project.getInputSequencePaths():
                if os.path.isdir(seq):
                    tmpSeq = getTempFile()
                    catFiles([os.path.join(seq, subSeq) for subSeq in os.listdir(seq)], tmpSeq)
                    seq = tmpSeq
                seq = makeURL(seq)
                seqIDs.append(toil.importFile(seq))
            project.setInputSequenceIDs(seqIDs)

            #import cactus config
            if options.configFile:
                cactusConfigID = toil.importFile(makeURL(options.configFile))
            else:
                cactusConfigID = toil.importFile(makeURL(project.getConfigPath()))
            project.setConfigID(cactusConfigID)

            project.syncToFileStore(toil)
            configNode = ET.parse(project.getConfigPath()).getroot()
            configWrapper = ConfigWrapper(configNode)
            configWrapper.substituteAllPredefinedConstantsWithLiterals()


            project.writeXML(pjPath)
            halID = toil.start(RunCactusPreprocessorThenProgressiveDown(options, project, memory=configWrapper.getDefaultMemory()))

        toil.exportFile(halID, makeURL(options.outputHal))
Пример #15
0
    def issueBatchJob(self, jobNode):
        """
        Issues parasol with job commands.
        """
        self.checkResourceRequest(jobNode.memory, jobNode.cores, jobNode.disk)

        MiB = 1 << 20
        truncatedMemory = (jobNode.memory / MiB) * MiB
        # Look for a batch for jobs with these resource requirements, with
        # the memory rounded down to the nearest megabyte. Rounding down
        # meams the new job can't ever decrease the memory requirements
        # of jobs already in the batch.
        if len(self.resultsFiles) >= self.maxBatches:
            raise RuntimeError('Number of batches reached limit of %i' %
                               self.maxBatches)
        try:
            results = self.resultsFiles[(truncatedMemory, jobNode.cores)]
        except KeyError:
            results = getTempFile(rootDir=self.parasolResultsDir)
            self.resultsFiles[(truncatedMemory, jobNode.cores)] = results

        # Prefix the command with environment overrides, optionally looking them up from the
        # current environment if the value is None
        command = ' '.join(concat('env', self.__environment(),
                                  jobNode.command))
        parasolCommand = [
            '-verbose',
            '-ram=%i' % jobNode.memory,
            '-cpu=%i' % jobNode.cores, '-results=' + results, 'add', 'job',
            command
        ]
        # Deal with the cpus
        self.usedCpus += jobNode.cores
        while True:  # Process finished results with no wait
            try:
                jobID = self.cpuUsageQueue.get_nowait()
            except Empty:
                break
            if jobID in self.jobIDsToCpu.keys():
                self.usedCpus -= self.jobIDsToCpu.pop(jobID)
            assert self.usedCpus >= 0
        while self.usedCpus > self.maxCores:  # If we are still waiting
            jobID = self.cpuUsageQueue.get()
            if jobID in self.jobIDsToCpu.keys():
                self.usedCpus -= self.jobIDsToCpu.pop(jobID)
            assert self.usedCpus >= 0
        # Now keep going
        while True:
            line = self._runParasol(parasolCommand)[1][0]
            match = self.parasolOutputPattern.match(line)
            if match is None:
                # This is because parasol add job will return success, even if the job was not
                # properly issued!
                logger.debug(
                    'We failed to properly add the job, we will try again after a 5s.'
                )
                time.sleep(5)
            else:
                jobID = int(match.group(1))
                self.jobIDsToCpu[jobID] = jobNode.cores
                self.runningJobs.add(jobID)
                logger.debug("Got the parasol job id: %s from line: %s" %
                             (jobID, line))
                return jobID