def runProgressive(self): logger.debug("Going to put the alignment in %s" % self.outputDir) if not os.path.isdir(self.outputDir): os.mkdir(self.outputDir) if not os.path.exists(os.path.join(self.outputDir, "progressiveCactusAlignment")): xmlTree = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml")) #Set the parameters tempLocalDir = os.path.join(self.outputDir, "tempProgressiveCactusAlignment") system("rm -rf %s" % tempLocalDir) os.mkdir(tempLocalDir) #Set the config parameters self.params.applyToXml(xmlTree) config = xmlTree.getroot() assert config is not None #Write the config file tempConfigFile = os.path.join(tempLocalDir, "config.xml") fileHandle = open(tempConfigFile, 'w') assert fileHandle is not None tree = ET.ElementTree(config) tree.write(fileHandle) fileHandle.close() #Make the experiment file tempExperimentFile = os.path.join(tempLocalDir, "experiment.xml") if self.params.kyotoTycoon == True: dbConfElem = ET.Element("st_kv_database_conf", type="kyoto_tycoon") ktElem = ET.SubElement(dbConfElem, "kyoto_tycoon", host="localhost", port="1978", database_dir="dummy") else: dbConfElem = None cactusWorkflowExperiment = CactusWorkflowExperiment( sequences=self.sequences, newickTreeString=self.newickTree, #requiredSpecies=self.requiredSpecies, #singleCopySpecies=self.singleCopySpecies, databaseName="cactusAlignment", outputDir=tempLocalDir, configFile=tempConfigFile, databaseConf = dbConfElem) cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile) #The jobtree tempJobTreeDir = os.path.join(tempLocalDir, "jobTree") #The place to put the temporary experiment dir tempExperimentDir = os.path.join(tempLocalDir, "progressiveCactusAlignment") #The temporary experiment runCactusCreateMultiCactusProject(tempExperimentFile, tempExperimentDir) logger.info("Setup the cactus progressive experiment") runCactusProgressive(os.path.join(tempExperimentDir, "progressiveCactusAlignment_project.xml"), tempJobTreeDir, #batchSystem=batchSystem, buildMaf=True, joinMaf=True, #buildTrees=buildTrees, buildFaces=buildFaces, buildReference=buildReference, jobTreeStats=True, maxThreads=4, logLevel="DEBUG") logger.info("Ran the progressive workflow") #Check if the jobtree completed sucessively. runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir) logger.info("Checked the job tree dir for the progressive run") #Run the cactus tree stats expPath = os.path.join(tempExperimentDir, "Anc0", "Anc0_experiment.xml") exp = ExperimentWrapper(ET.parse(expPath).getroot()) if exp.getDbType() == "kyoto_tycoon": ktserver = KtserverLauncher() ktserver.spawnServer(exp) treeStatsFile = os.path.join(self.outputDir, "treeStats.xml") system("cactus_treeStats --cactusDisk \'%s\' --flowerName 0 --outputFile %s" %(exp.getDiskDatabaseString(), treeStatsFile)) if exp.getDbType() == "kyoto_tycoon": ktserver.killServer(exp) #Now copy the true assembly back to the output system("mv %s %s/experiment.xml" % (tempExperimentFile, self.outputDir)) system("mv %s %s" % (tempExperimentDir, self.outputDir)) system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir, self.outputDir)) system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir)) #But keep a link to the multicactus project in its original path so we can navigate # the paths in the xml... actualResultsDir = os.path.join(os.path.abspath(self.outputDir), "progressiveCactusAlignment") tempResultsDir = os.path.join(self.outputDir, "tempProgressiveCactusAlignment") system("ln -s %s %s" % (actualResultsDir, tempResultsDir))
def run(self): if not os.path.isdir(self.outputDir): os.mkdir(self.outputDir) cactusAlignmentName = "cactusAlignment" outputFile = os.path.join(self.outputDir, cactusAlignmentName) if not os.path.exists(outputFile): config = ET.parse( os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml")).getroot() #Set the reference algorithm config.find("reference").attrib[ "matching_algorithm"] = self.referenceAlgorithm #Do the minimum block degree configuration iterations = config.find("alignment").find("iterations") blastIteration = iterations.findall("iteration")[0] baseIteration = iterations.findall("iteration")[1] minimumBlastBlockDegree = self.minimumBlockDegree if minimumBlastBlockDegree <= 1: minimumBlastBlockDegree = 2 blastIteration.find("core").attrib["minimumBlockDegree"] = str( minimumBlastBlockDegree) baseIteration.attrib["minimumBlockDegree"] = str( self.minimumBlockDegree) baseIteration.attrib["prune_out_stub_alignments"] = str( int(self.pruneOutStubAlignments)) baseIteration.attrib["gap_gamma"] = str(float(self.gapGamma)) #Set the blast string blastIteration.find( "blast").attrib["blastString"] = blastIteration.find( "blast").attrib["blastString"].replace( "PARAMETERS", self.blastAlignmentString) blastIteration.find( "blast").attrib["selfBlastString"] = blastIteration.find( "blast").attrib["selfBlastString"].replace( "PARAMETERS", self.blastAlignmentString) #Get rid of the base level, if needed if not self.baseLevel: iterations.remove(baseIteration) #Set the number of chains to allow in a level, during promotion config.find("normal").attrib["max_number_of_chains"] = str( self.maxNumberOfChains) #Set the number of chains to order per round of the matching algorithm config.find("reference").attrib["permutations"] = str( self.permutations) #Set the chain weight function if bool(self.useSimulatedAnnealing): config.find("reference").attrib["useSimulatedAnnealing"] = "1" config.find("reference").attrib["theta"] = str(self.theta) #Write the config file tempConfigFile = os.path.join(self.getLocalTempDir(), "config.xml") fileHandle = open(tempConfigFile, 'w') tree = ET.ElementTree(config) tree.write(fileHandle) fileHandle.close() #Make the supporting temporary files tempExperimentFile = os.path.join(self.getLocalTempDir(), "experiment.xml") tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree") #Make the experiment file cactusWorkflowExperiment = CactusWorkflowExperiment( sequences=self.sequences.split(), newickTreeString=self.options.newickTree, requiredSpecies=[(1, self.requiredSpecies.split())], singleCopySpecies=self.singleCopySpecies, outgroupEvent=self.options.outgroupEvent, databaseName=cactusAlignmentName, outputDir=self.getLocalTempDir(), configFile=tempConfigFile) cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile) #Now run cactus workflow runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, setupAndBuildAlignments=True, buildTrees=False, buildFaces=False, buildReference=True, batchSystem="single_machine", maxThreads=1, jobTreeStats=True) logger.info("Ran the workflow") #Check if the jobtree completed sucessively. runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir) logger.info("Checked the job tree dir") #Now copy the true assembly back to the output system("mv %s %s/experiment.xml" % (tempExperimentFile, self.outputDir)) system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir)) #Copy across the final alignment localCactusDisk = os.path.join(self.getLocalTempDir(), cactusAlignmentName) #Move the final db system("mv %s %s" % (localCactusDisk, outputFile)) #Compute the stats system( "jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir, self.outputDir)) #We're done! self.addChildTarget(MakeStats(outputFile, self.outputDir, self.options))
def runVanilla(self): logger.debug("Going to put the alignment in %s" % self.outputDir) if not os.path.isdir(self.outputDir): os.mkdir(self.outputDir) if not os.path.exists(os.path.join(self.outputDir, "cactusAlignmentVanilla")): xmlTree = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml")) #Set the parameters tempLocalDir = os.path.join(self.outputDir, "tempVanillaCactusAlignment") system("rm -rf %s" % tempLocalDir) os.mkdir(tempLocalDir) #Set the config parameters self.params.applyToXml(xmlTree) config = xmlTree.getroot() assert config is not None #Write the config file tempConfigFile = os.path.join(tempLocalDir, "config.xml") fileHandle = open(tempConfigFile, 'w') assert fileHandle is not None tree = ET.ElementTree(config) tree.write(fileHandle) fileHandle.close() #Make the experiment file tempExperimentFile = os.path.join(tempLocalDir, "experiment.xml") #Now do standard cactus.. #Make the experiment file tempExperimentFile2 = os.path.join(tempLocalDir, "experiment.xml") cactusWorkflowExperiment = CactusWorkflowExperiment( sequences=self.sequences, newickTreeString=self.newickTree, #requiredSpecies=self.requiredSpecies, #singleCopySpecies=self.singleCopySpecies, databaseName="cactusAlignmentVanilla", outputDir=tempLocalDir, configFile=tempConfigFile) tempExperimentDir2 = os.path.join(tempLocalDir, "cactusAlignmentVanilla") cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile2) # apply naming to the event tree to be consistent with progressive exp = ExperimentWrapper(ET.parse(tempExperimentFile2).getroot()) cleanEventTree(exp) exp.writeXML(tempExperimentFile2) #We're done with the progressive, now run the vanilla cactus for comparison tempJobTreeDir2 = os.path.join(tempLocalDir, "jobTreeVanilla") runCactusWorkflow(tempExperimentFile2, tempJobTreeDir2, jobTreeStats=True, setupAndBuildAlignments=True, buildReference=True, maxThreads=4) runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir2) logger.info("Checked the job tree dir for the vanilla run") runCactusMAFGenerator(os.path.join(self.outputDir, "cactusVanilla.maf"), getCactusDiskString(tempExperimentDir2)) #Run the cactus tree stats treeStatsFile = os.path.join(self.outputDir, "treeStats.xml") system("cactus_treeStats --cactusDisk \'%s\' --flowerName 0 --outputFile %s" %(exp.getDiskDatabaseString(), treeStatsFile)) system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir2, self.outputDir)) system("mv %s %s" % (tempExperimentDir2, self.outputDir)) system("mv %s %s/experiment.xml" % (tempExperimentFile2, self.outputDir))
def run(self): if not os.path.isdir(self.outputDir): os.mkdir(self.outputDir) cactusAlignmentName = "cactusAlignment" outputFile = os.path.join(self.outputDir, cactusAlignmentName) if not os.path.exists(outputFile): config = ET.parse(os.path.join(getRootPathString(), "lib", "cactus_workflow_config.xml")).getroot() #Set the reference algorithm config.find("reference").attrib["matching_algorithm"] = self.referenceAlgorithm #Do the minimum block degree configuration iterations = config.find("alignment").find("iterations") blastIteration = iterations.findall("iteration")[0] baseIteration = iterations.findall("iteration")[1] minimumBlastBlockDegree = self.minimumBlockDegree if minimumBlastBlockDegree <= 1: minimumBlastBlockDegree = 2 blastIteration.find("core").attrib["minimumBlockDegree"] = str(minimumBlastBlockDegree) baseIteration.attrib["minimumBlockDegree"] = str(self.minimumBlockDegree) baseIteration.attrib["prune_out_stub_alignments"] = str(int(self.pruneOutStubAlignments)) baseIteration.attrib["gap_gamma"] = str(float(self.gapGamma)) #Set the blast string blastIteration.find("blast").attrib["blastString"] = blastIteration.find("blast").attrib["blastString"].replace("PARAMETERS", self.blastAlignmentString) blastIteration.find("blast").attrib["selfBlastString"] = blastIteration.find("blast").attrib["selfBlastString"].replace("PARAMETERS", self.blastAlignmentString) #Get rid of the base level, if needed if not self.baseLevel: iterations.remove(baseIteration) #Set the number of chains to allow in a level, during promotion config.find("normal").attrib["max_number_of_chains"] = str(self.maxNumberOfChains) #Set the number of chains to order per round of the matching algorithm config.find("reference").attrib["permutations"] = str(self.permutations) #Set the chain weight function if bool(self.useSimulatedAnnealing): config.find("reference").attrib["useSimulatedAnnealing"]="1" config.find("reference").attrib["theta"] = str(self.theta) #Write the config file tempConfigFile = os.path.join(self.getLocalTempDir(), "config.xml") fileHandle = open(tempConfigFile, 'w') tree = ET.ElementTree(config) tree.write(fileHandle) fileHandle.close() #Make the supporting temporary files tempExperimentFile = os.path.join(self.getLocalTempDir(), "experiment.xml") tempJobTreeDir = os.path.join(self.getLocalTempDir(), "jobTree") #Make the experiment file cactusWorkflowExperiment = CactusWorkflowExperiment( sequences=self.sequences.split(), newickTreeString=self.options.newickTree, requiredSpecies=[ (1, self.requiredSpecies.split() ) ], singleCopySpecies=self.singleCopySpecies, outgroupEvent = self.options.outgroupEvent, databaseName=cactusAlignmentName, outputDir=self.getLocalTempDir(), configFile=tempConfigFile) cactusWorkflowExperiment.writeExperimentFile(tempExperimentFile) #Now run cactus workflow runCactusWorkflow(experimentFile=tempExperimentFile, jobTreeDir=tempJobTreeDir, setupAndBuildAlignments=True, buildTrees=False, buildFaces=False, buildReference=True, batchSystem="single_machine", maxThreads=1, jobTreeStats=True) logger.info("Ran the workflow") #Check if the jobtree completed sucessively. runJobTreeStatusAndFailIfNotComplete(tempJobTreeDir) logger.info("Checked the job tree dir") #Now copy the true assembly back to the output system("mv %s %s/experiment.xml" % (tempExperimentFile, self.outputDir)) system("mv %s %s/config.xml" % (tempConfigFile, self.outputDir)) #Copy across the final alignment localCactusDisk = os.path.join(self.getLocalTempDir(), cactusAlignmentName) #Move the final db system("mv %s %s" % (localCactusDisk, outputFile)) #Compute the stats system("jobTreeStats --jobTree %s --outputFile %s/jobTreeStats.xml" % (tempJobTreeDir, self.outputDir)) #We're done! self.addChildTarget(MakeStats(outputFile, self.outputDir, self.options))