Пример #1
0
 def progressiveFunction(self,
                         experimentFile,
                         toilDir,
                         batchSystem,
                         buildAvgs,
                         buildHal,
                         buildFasta,
                         toilStats,
                         subtreeRoot=None,
                         logLevel=None):
     eW = ExperimentWrapper(ET.parse(experimentFile).getroot())
     seqFile = getTempFile()
     with open(seqFile, 'w') as f:
         tree = eW.getTree()
         newick = NXNewick().writeString(tree)
         f.write('%s\n' % newick)
         for genome in eW.getGenomesWithSequence():
             f.write('%s %s\n' % (genome, eW.getSequenceID(genome)))
     config = eW.getConfigPath()
     runCactusProgressive(seqFile,
                          config,
                          toilDir,
                          batchSystem=batchSystem,
                          buildAvgs=buildAvgs,
                          toilStats=toilStats,
                          logLevel=logLevel)
    def progressiveFunction(self, experimentFile, toilDir,
                            batchSystem, buildAvgs,
                            buildReference,
                            buildHal,
                            buildFasta,
                            toilStats,
                            subtreeRoot=None):
        tempDir = getTempDirectory(os.getcwd())
        tempExperimentDir = os.path.join(tempDir, "exp")
        runCreateMultiCactusProject(experimentFile,
                                    tempExperimentDir,
                                    fixNames=False,
                                    root=subtreeRoot)
        logger.info("Put the temporary files in %s" % tempExperimentDir)

        runCactusProgressive(os.path.join(tempExperimentDir, "exp_project.xml"),
                             toilDir,
                             batchSystem=batchSystem,
                             buildAvgs=buildAvgs,
                             toilStats=toilStats)

        # Check that the headers and sequences in the output are the
        # same as the sequences in the input (minus differences in
        # repeat-masking)
        exp = ExperimentWrapper(ET.parse(experimentFile).getroot())
        seqMap = exp.buildSequenceMap()
        # Maps genome name -> headers in fasta
        headers = {}
        for genomeName, inputSequencePath in seqMap.items():
            if os.path.isdir(inputSequencePath):
                # Some "input sequence paths" are actually provided as
                # directories containing multiple FASTAs
                concatenatedPath = getTempFile()
                system("cat %s/* > %s" % (inputSequencePath, concatenatedPath))
                inputSequencePath = concatenatedPath
            headers[genomeName] = list(map(itemgetter(0), fastaRead(inputSequencePath)))

        # check headers inside .c2h output
        for expPath in glob.glob('%s/*/*_experiment.xml' % (tempExperimentDir)):
            subExp = ExperimentWrapper(ET.parse(expPath).getroot())
            outgroups = subExp.getOutgroupEvents()
            c2hPath = subExp.getHALPath()
            with open(c2hPath) as f:
                for line in f:
                    fields = line.split('\t')
                    if fields[0] == 's':
                        # Sequence line
                        genome = fields[1][1:-1]
                        header = fields[2][1:-1]
                        if genome in headers and genome not in outgroups:
                            # This genome is an input genome
                            self.assertTrue(header in headers[genome],
                                            'Header %s from output c2h %s not found in input fa %s'
                                            ' for genome %s' % (header, c2hPath, seqMap[genome], genome))


        runToilStatusAndFailIfNotComplete(toilDir)
        system("rm -rf %s" % tempDir)
Пример #3
0
 def progressiveFunction(self, experimentFile, jobTreeDir,
                         batchSystem, buildAvgs,
                         buildReference,
                         buildHal,
                         buildFasta,
                         jobTreeStats,
                         subtreeRoot=None):
     tempDir = getTempDirectory(os.getcwd())
     tempExperimentDir = os.path.join(tempDir, "exp")
     runCactusCreateMultiCactusProject(experimentFile,
                                       tempExperimentDir,
                                       fixNames=False,
                                       root=subtreeRoot)
     logger.info("Put the temporary files in %s" % tempExperimentDir)
     runCactusProgressive(os.path.join(tempExperimentDir, "exp_project.xml"),
                          jobTreeDir,
                          batchSystem=batchSystem,
                          buildAvgs=buildAvgs,
                          jobTreeStats=jobTreeStats)
     runJobTreeStatusAndFailIfNotComplete(jobTreeDir)
     system("rm -rf %s" % tempDir)
    def runProgressive(self):
        logger.debug("Going to put the alignment in %s" % self.outputDir)
        if not os.path.isdir(self.outputDir):
            os.mkdir(self.outputDir)

        if not os.path.exists(os.path.join(self.outputDir, "progressiveCactusAlignment")):
            xmlTree = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml"))
            
            #Set the parameters
            tempLocalDir = os.path.join(self.outputDir, "tempProgressiveCactusAlignment")
            system("rm -rf %s" % tempLocalDir)
            os.mkdir(tempLocalDir)
            
            #Set the config parameters
            self.params.applyToXml(xmlTree)
            config = xmlTree.getroot()
            assert config is not None
            
            #Write the config file
            tempConfigFile = os.path.join(tempLocalDir, "config.xml")
            fileHandle = open(tempConfigFile, 'w')
            assert fileHandle is not None
            tree = ET.ElementTree(config)
            tree.write(fileHandle)
            fileHandle.close()
         
            #Make the experiment file
            tempExperimentFile = os.path.join(tempLocalDir, "experiment.xml")
            
            if self.params.kyotoTycoon == True:
                dbConfElem = ET.Element("st_kv_database_conf", type="kyoto_tycoon")
                ktElem = ET.SubElement(dbConfElem, "kyoto_tycoon", host="localhost", port="1978", database_dir="dummy")
            else:
                dbConfElem = None
            
            cactusWorkflowExperiment = CactusWorkflowExperiment(
                                                 sequences=self.sequences, 
                                                 newickTreeString=self.newickTree, 
                                                 #requiredSpecies=self.requiredSpecies,
                                                 #singleCopySpecies=self.singleCopySpecies,
                                                 databaseName="cactusAlignment",
                                                 outputDir=tempLocalDir,
                                                 configFile=tempConfigFile,
                                                 databaseConf = dbConfElem)
            cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile)
            
            #The jobtree
            tempJobTreeDir = os.path.join(tempLocalDir, "jobTree")
            
            #The place to put the temporary experiment dir
            tempExperimentDir = os.path.join(tempLocalDir, "progressiveCactusAlignment")
            
      
            #The temporary experiment 
            runCactusCreateMultiCactusProject(tempExperimentFile, 
                                              tempExperimentDir)
            logger.info("Setup the cactus progressive experiment")
            
            runCactusProgressive(os.path.join(tempExperimentDir, "progressiveCactusAlignment_project.xml"), 
                                 tempJobTreeDir, 
                                 #batchSystem=batchSystem, 
                                 buildMaf=True,
                                 joinMaf=True,
                                 #buildTrees=buildTrees, buildFaces=buildFaces, buildReference=buildReference,
                                 jobTreeStats=True,
                                 maxThreads=4,
                                 logLevel="DEBUG")
            logger.info("Ran the progressive workflow")
            
            #Check if the jobtree completed sucessively.
            runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
            logger.info("Checked the job tree dir for the progressive run")
            
            #Run the cactus tree stats
            expPath = os.path.join(tempExperimentDir, "Anc0", "Anc0_experiment.xml")
            exp = ExperimentWrapper(ET.parse(expPath).getroot())
            if exp.getDbType() == "kyoto_tycoon":
                ktserver = KtserverLauncher()
                ktserver.spawnServer(exp) 
            treeStatsFile = os.path.join(self.outputDir, "treeStats.xml")
            system("cactus_treeStats --cactusDisk \'%s\' --flowerName 0 --outputFile %s" %(exp.getDiskDatabaseString(),
                                                                                        treeStatsFile))
            if exp.getDbType() == "kyoto_tycoon":
                ktserver.killServer(exp)
                
            #Now copy the true assembly back to the output
            system("mv %s %s/experiment.xml" % (tempExperimentFile, self.outputDir))
            system("mv %s %s" % (tempExperimentDir, self.outputDir))
            system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir, self.outputDir))
            system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir))
            
            #But keep a link to the multicactus project in its original path so we can navigate
            # the paths in the xml...
            actualResultsDir = os.path.join(os.path.abspath(self.outputDir), "progressiveCactusAlignment")
            tempResultsDir = os.path.join(self.outputDir, "tempProgressiveCactusAlignment")
            system("ln -s %s %s" % (actualResultsDir, tempResultsDir))
Пример #5
0
    def progressiveFunction(self,
                            experimentFile,
                            toilDir,
                            batchSystem,
                            buildAvgs,
                            buildReference,
                            buildHal,
                            buildFasta,
                            toilStats,
                            subtreeRoot=None):
        tempDir = getTempDirectory(os.getcwd())
        tempExperimentDir = os.path.join(tempDir, "exp")
        runCreateMultiCactusProject(experimentFile,
                                    tempExperimentDir,
                                    fixNames=False,
                                    root=subtreeRoot)
        logger.info("Put the temporary files in %s" % tempExperimentDir)

        runCactusProgressive(os.path.join(tempExperimentDir,
                                          "exp_project.xml"),
                             toilDir,
                             batchSystem=batchSystem,
                             buildAvgs=buildAvgs,
                             toilStats=toilStats)

        # Check that the headers and sequences in the output are the
        # same as the sequences in the input (minus differences in
        # repeat-masking)
        exp = ExperimentWrapper(ET.parse(experimentFile).getroot())
        seqMap = exp.buildSequenceMap()
        # Maps genome name -> headers in fasta
        headers = {}
        for genomeName, inputSequencePath in seqMap.items():
            if os.path.isdir(inputSequencePath):
                # Some "input sequence paths" are actually provided as
                # directories containing multiple FASTAs
                concatenatedPath = getTempFile()
                system("cat %s/* > %s" % (inputSequencePath, concatenatedPath))
                inputSequencePath = concatenatedPath
            headers[genomeName] = list(
                map(itemgetter(0), fastaRead(inputSequencePath)))

        # check headers inside .c2h output
        for expPath in glob.glob('%s/*/*_experiment.xml' %
                                 (tempExperimentDir)):
            subExp = ExperimentWrapper(ET.parse(expPath).getroot())
            outgroups = subExp.getOutgroupEvents()
            c2hPath = subExp.getHALPath()
            with open(c2hPath) as f:
                for line in f:
                    fields = line.split('\t')
                    if fields[0] == 's':
                        # Sequence line
                        genome = fields[1][1:-1]
                        header = fields[2][1:-1]
                        if genome in headers and genome not in outgroups:
                            # This genome is an input genome
                            self.assertTrue(
                                header in headers[genome],
                                'Header %s from output c2h %s not found in input fa %s'
                                ' for genome %s' %
                                (header, c2hPath, seqMap[genome], genome))

        runToilStatusAndFailIfNotComplete(toilDir)
        system("rm -rf %s" % tempDir)
Пример #6
0
        self.progressiveFunction(experimentFile, toilDir,
                                 batchSystem, buildAvgs,
                                 buildHal,
                                 buildFasta,
                                 toilStats, subtreeRoot)

    def progressiveFunction(self, experimentFile, toilDir,
                            batchSystem, buildAvgs,
                            buildHal,
                            buildFasta,
                            toilStats,
                            subtreeRoot=None):
        eW = ExperimentWrapper(ET.parse(experimentFile).getroot())
        seqFile = getTempFile()
        with open(seqFile, 'w') as f:
            tree = eW.getTree()
o            newick = NXNewick().writeString(tree)
            f.write('%s\n' % newick)
            for genome in eW.getGenomesWithSequence():
                f.write('%s %s\n' % (genome, eW.getSequenceID(genome)))
        config = eW.getConfigPath()
        runCactusProgressive(seqFile,
                             config,
                             toilDir,
                             batchSystem=batchSystem,
                             buildAvgs=buildAvgs,
                             toilStats=toilStats)

if __name__ == '__main__':
    unittest.main()