def runProgressive(self):
        logger.debug("Going to put the alignment in %s" % self.outputDir)
        if not os.path.isdir(self.outputDir):
            os.mkdir(self.outputDir)

        if not os.path.exists(os.path.join(self.outputDir, "progressiveCactusAlignment")):
            xmlTree = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml"))
            
            #Set the parameters
            tempLocalDir = os.path.join(self.outputDir, "tempProgressiveCactusAlignment")
            system("rm -rf %s" % tempLocalDir)
            os.mkdir(tempLocalDir)
            
            #Set the config parameters
            self.params.applyToXml(xmlTree)
            config = xmlTree.getroot()
            assert config is not None
            
            #Write the config file
            tempConfigFile = os.path.join(tempLocalDir, "config.xml")
            fileHandle = open(tempConfigFile, 'w')
            assert fileHandle is not None
            tree = ET.ElementTree(config)
            tree.write(fileHandle)
            fileHandle.close()
         
            #Make the experiment file
            tempExperimentFile = os.path.join(tempLocalDir, "experiment.xml")
            
            if self.params.kyotoTycoon == True:
                dbConfElem = ET.Element("st_kv_database_conf", type="kyoto_tycoon")
                ktElem = ET.SubElement(dbConfElem, "kyoto_tycoon", host="localhost", port="1978", database_dir="dummy")
            else:
                dbConfElem = None
            
            cactusWorkflowExperiment = CactusWorkflowExperiment(
                                                 sequences=self.sequences, 
                                                 newickTreeString=self.newickTree, 
                                                 #requiredSpecies=self.requiredSpecies,
                                                 #singleCopySpecies=self.singleCopySpecies,
                                                 databaseName="cactusAlignment",
                                                 outputDir=tempLocalDir,
                                                 configFile=tempConfigFile,
                                                 databaseConf = dbConfElem)
            cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile)
            
            #The jobtree
            tempJobTreeDir = os.path.join(tempLocalDir, "jobTree")
            
            #The place to put the temporary experiment dir
            tempExperimentDir = os.path.join(tempLocalDir, "progressiveCactusAlignment")
            
      
            #The temporary experiment 
            runCactusCreateMultiCactusProject(tempExperimentFile, 
                                              tempExperimentDir)
            logger.info("Setup the cactus progressive experiment")
            
            runCactusProgressive(os.path.join(tempExperimentDir, "progressiveCactusAlignment_project.xml"), 
                                 tempJobTreeDir, 
                                 #batchSystem=batchSystem, 
                                 buildMaf=True,
                                 joinMaf=True,
                                 #buildTrees=buildTrees, buildFaces=buildFaces, buildReference=buildReference,
                                 jobTreeStats=True,
                                 maxThreads=4,
                                 logLevel="DEBUG")
            logger.info("Ran the progressive workflow")
            
            #Check if the jobtree completed sucessively.
            runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
            logger.info("Checked the job tree dir for the progressive run")
            
            #Run the cactus tree stats
            expPath = os.path.join(tempExperimentDir, "Anc0", "Anc0_experiment.xml")
            exp = ExperimentWrapper(ET.parse(expPath).getroot())
            if exp.getDbType() == "kyoto_tycoon":
                ktserver = KtserverLauncher()
                ktserver.spawnServer(exp) 
            treeStatsFile = os.path.join(self.outputDir, "treeStats.xml")
            system("cactus_treeStats --cactusDisk \'%s\' --flowerName 0 --outputFile %s" %(exp.getDiskDatabaseString(),
                                                                                        treeStatsFile))
            if exp.getDbType() == "kyoto_tycoon":
                ktserver.killServer(exp)
                
            #Now copy the true assembly back to the output
            system("mv %s %s/experiment.xml" % (tempExperimentFile, self.outputDir))
            system("mv %s %s" % (tempExperimentDir, self.outputDir))
            system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir, self.outputDir))
            system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir))
            
            #But keep a link to the multicactus project in its original path so we can navigate
            # the paths in the xml...
            actualResultsDir = os.path.join(os.path.abspath(self.outputDir), "progressiveCactusAlignment")
            tempResultsDir = os.path.join(self.outputDir, "tempProgressiveCactusAlignment")
            system("ln -s %s %s" % (actualResultsDir, tempResultsDir))
示例#2
0
    def run(self):
        if not os.path.isdir(self.outputDir):
            os.mkdir(self.outputDir)
        cactusAlignmentName = "cactusAlignment"
        outputFile = os.path.join(self.outputDir, cactusAlignmentName)
        if not os.path.exists(outputFile):
            config = ET.parse(
                os.path.join(getRootPathString(), "lib",
                             "cactus_workflow_config.xml")).getroot()

            #Set the reference algorithm
            config.find("reference").attrib[
                "matching_algorithm"] = self.referenceAlgorithm

            #Do the minimum block degree configuration
            iterations = config.find("alignment").find("iterations")
            blastIteration = iterations.findall("iteration")[0]
            baseIteration = iterations.findall("iteration")[1]

            minimumBlastBlockDegree = self.minimumBlockDegree
            if minimumBlastBlockDegree <= 1:
                minimumBlastBlockDegree = 2
            blastIteration.find("core").attrib["minimumBlockDegree"] = str(
                minimumBlastBlockDegree)
            baseIteration.attrib["minimumBlockDegree"] = str(
                self.minimumBlockDegree)
            baseIteration.attrib["prune_out_stub_alignments"] = str(
                int(self.pruneOutStubAlignments))
            baseIteration.attrib["gap_gamma"] = str(float(self.gapGamma))

            #Set the blast string
            blastIteration.find(
                "blast").attrib["blastString"] = blastIteration.find(
                    "blast").attrib["blastString"].replace(
                        "PARAMETERS", self.blastAlignmentString)
            blastIteration.find(
                "blast").attrib["selfBlastString"] = blastIteration.find(
                    "blast").attrib["selfBlastString"].replace(
                        "PARAMETERS", self.blastAlignmentString)

            #Get rid of the base level, if needed
            if not self.baseLevel:
                iterations.remove(baseIteration)

            #Set the number of chains to allow in a level, during promotion
            config.find("normal").attrib["max_number_of_chains"] = str(
                self.maxNumberOfChains)

            #Set the number of chains to order per round of the matching algorithm
            config.find("reference").attrib["permutations"] = str(
                self.permutations)

            #Set the chain weight function
            if bool(self.useSimulatedAnnealing):
                config.find("reference").attrib["useSimulatedAnnealing"] = "1"

            config.find("reference").attrib["theta"] = str(self.theta)

            #Write the config file
            tempConfigFile = os.path.join(self.getLocalTempDir(), "config.xml")
            fileHandle = open(tempConfigFile, 'w')
            tree = ET.ElementTree(config)
            tree.write(fileHandle)
            fileHandle.close()

            #Make the supporting temporary files
            tempExperimentFile = os.path.join(self.getLocalTempDir(),
                                              "experiment.xml")
            tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree")
            #Make the experiment file
            cactusWorkflowExperiment = CactusWorkflowExperiment(
                sequences=self.sequences.split(),
                newickTreeString=self.options.newickTree,
                requiredSpecies=[(1, self.requiredSpecies.split())],
                singleCopySpecies=self.singleCopySpecies,
                outgroupEvent=self.options.outgroupEvent,
                databaseName=cactusAlignmentName,
                outputDir=self.getLocalTempDir(),
                configFile=tempConfigFile)
            cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile)
            #Now run cactus workflow
            runCactusWorkflow(experimentFile=tempExperimentFile,
                              jobTreeDir=tempJobTreeDir,
                              setupAndBuildAlignments=True,
                              buildTrees=False,
                              buildFaces=False,
                              buildReference=True,
                              batchSystem="single_machine",
                              maxThreads=1,
                              jobTreeStats=True)
            logger.info("Ran the workflow")
            #Check if the jobtree completed sucessively.
            runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
            logger.info("Checked the job tree dir")
            #Now copy the true assembly back to the output
            system("mv %s %s/experiment.xml" %
                   (tempExperimentFile, self.outputDir))
            system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir))
            #Copy across the final alignment
            localCactusDisk = os.path.join(self.getLocalTempDir(),
                                           cactusAlignmentName)
            #Move the final db
            system("mv %s %s" % (localCactusDisk, outputFile))
            #Compute the stats
            system(
                "jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" %
                (tempJobTreeDir, self.outputDir))
            #We're done!
        self.addChildTarget(MakeStats(outputFile, self.outputDir,
                                      self.options))
    def runVanilla(self):
        logger.debug("Going to put the alignment in %s" % self.outputDir)
        if not os.path.isdir(self.outputDir):
            os.mkdir(self.outputDir)

        if not os.path.exists(os.path.join(self.outputDir, "cactusAlignmentVanilla")):
            xmlTree = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml"))
            
            #Set the parameters
            tempLocalDir = os.path.join(self.outputDir, "tempVanillaCactusAlignment")
            system("rm -rf %s" % tempLocalDir)
            os.mkdir(tempLocalDir)
            
            #Set the config parameters
            self.params.applyToXml(xmlTree)
            config = xmlTree.getroot()
            assert config is not None
        
            #Write the config file
            tempConfigFile = os.path.join(tempLocalDir, "config.xml")
            fileHandle = open(tempConfigFile, 'w')
            assert fileHandle is not None
            tree = ET.ElementTree(config)
            tree.write(fileHandle)
            fileHandle.close()
         
            #Make the experiment file
            tempExperimentFile = os.path.join(tempLocalDir, "experiment.xml")
            #Now do standard cactus..
            #Make the experiment file
            tempExperimentFile2 = os.path.join(tempLocalDir, "experiment.xml")

            cactusWorkflowExperiment = CactusWorkflowExperiment(
                                                 sequences=self.sequences, 
                                                 newickTreeString=self.newickTree, 
                                                 #requiredSpecies=self.requiredSpecies,
                                                 #singleCopySpecies=self.singleCopySpecies,
                                                 databaseName="cactusAlignmentVanilla",
                                                 outputDir=tempLocalDir,
                                                 configFile=tempConfigFile)
            tempExperimentDir2 = os.path.join(tempLocalDir, "cactusAlignmentVanilla")
            cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile2)
           
            # apply naming to the event tree to be consistent with progressive
            exp = ExperimentWrapper(ET.parse(tempExperimentFile2).getroot())
            cleanEventTree(exp)
            exp.writeXML(tempExperimentFile2)
            
            #We're done with the progressive, now run the vanilla cactus for comparison
            tempJobTreeDir2 = os.path.join(tempLocalDir, "jobTreeVanilla")
            runCactusWorkflow(tempExperimentFile2, tempJobTreeDir2,
                              jobTreeStats=True,
                              setupAndBuildAlignments=True,
                              buildReference=True,
                              maxThreads=4)
            
            runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir2)
            logger.info("Checked the job tree dir for the vanilla run")
            
            runCactusMAFGenerator(os.path.join(self.outputDir, "cactusVanilla.maf"), getCactusDiskString(tempExperimentDir2))
            
            #Run the cactus tree stats
            treeStatsFile = os.path.join(self.outputDir, "treeStats.xml")
            system("cactus_treeStats --cactusDisk \'%s\' --flowerName 0 --outputFile %s" %(exp.getDiskDatabaseString(),
                                                                                        treeStatsFile))
            
            system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir2, self.outputDir))
            system("mv %s %s" % (tempExperimentDir2, self.outputDir))
            system("mv %s %s/experiment.xml" % (tempExperimentFile2, self.outputDir))
 def run(self):
     if not os.path.isdir(self.outputDir):
         os.mkdir(self.outputDir)
     cactusAlignmentName = "cactusAlignment"
     outputFile = os.path.join(self.outputDir, cactusAlignmentName)
     if not os.path.exists(outputFile):
         config = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml")).getroot()
         
         #Set the reference algorithm
         config.find("reference").attrib["matching_algorithm"] = self.referenceAlgorithm
         
         #Do the minimum block degree configuration
         iterations = config.find("alignment").find("iterations")
         blastIteration = iterations.findall("iteration")[0]
         baseIteration = iterations.findall("iteration")[1]
         
         minimumBlastBlockDegree = self.minimumBlockDegree
         if minimumBlastBlockDegree <= 1:
             minimumBlastBlockDegree = 2
         blastIteration.find("core").attrib["minimumBlockDegree"] = str(minimumBlastBlockDegree)
         baseIteration.attrib["minimumBlockDegree"] = str(self.minimumBlockDegree)
         baseIteration.attrib["prune_out_stub_alignments"] = str(int(self.pruneOutStubAlignments))
         baseIteration.attrib["gap_gamma"] = str(float(self.gapGamma))
         
         #Set the blast string
         blastIteration.find("blast").attrib["blastString"] = blastIteration.find("blast").attrib["blastString"].replace("PARAMETERS", self.blastAlignmentString)
         blastIteration.find("blast").attrib["selfBlastString"] = blastIteration.find("blast").attrib["selfBlastString"].replace("PARAMETERS", self.blastAlignmentString)
         
         #Get rid of the base level, if needed
         if not self.baseLevel:
             iterations.remove(baseIteration)
         
         #Set the number of chains to allow in a level, during promotion
         config.find("normal").attrib["max_number_of_chains"] = str(self.maxNumberOfChains)
         
         #Set the number of chains to order per round of the matching algorithm
         config.find("reference").attrib["permutations"]  = str(self.permutations)
         
         #Set the chain weight function
         if bool(self.useSimulatedAnnealing):
             config.find("reference").attrib["useSimulatedAnnealing"]="1"
             
         config.find("reference").attrib["theta"] = str(self.theta)
         
         #Write the config file
         tempConfigFile = os.path.join(self.getLocalTempDir(), "config.xml")
         fileHandle = open(tempConfigFile, 'w')
         tree = ET.ElementTree(config)
         tree.write(fileHandle)
         fileHandle.close()
         
         #Make the supporting temporary files
         tempExperimentFile = os.path.join(self.getLocalTempDir(), "experiment.xml")
         tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree")
         #Make the experiment file
         cactusWorkflowExperiment = CactusWorkflowExperiment(
                                              sequences=self.sequences.split(), 
                                              newickTreeString=self.options.newickTree, 
                                              requiredSpecies=[ (1, self.requiredSpecies.split() ) ],
                                              singleCopySpecies=self.singleCopySpecies,
                                              outgroupEvent = self.options.outgroupEvent,
                                              databaseName=cactusAlignmentName,
                                              outputDir=self.getLocalTempDir(),
                                              configFile=tempConfigFile)
         cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile)
         #Now run cactus workflow
         runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, 
                           setupAndBuildAlignments=True,
                           buildTrees=False, buildFaces=False, buildReference=True,
                           batchSystem="single_machine", maxThreads=1, jobTreeStats=True)
         logger.info("Ran the workflow")
         #Check if the jobtree completed sucessively.
         runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir)
         logger.info("Checked the job tree dir")
         #Now copy the true assembly back to the output
         system("mv %s %s/experiment.xml" % (tempExperimentFile, self.outputDir))
         system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir))
         #Copy across the final alignment
         localCactusDisk = os.path.join(self.getLocalTempDir(), cactusAlignmentName)
         #Move the final db
         system("mv %s %s" % (localCactusDisk, outputFile))
         #Compute the stats
         system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir, self.outputDir))
         #We're done!
     self.addChildTarget(MakeStats(outputFile, self.outputDir, self.options))