class ProjectWrapper: alignmentDirName = 'progressiveAlignment' def __init__(self, options): self.options = options self.seqFile = SeqFile(options.seqFile) self.workingDir = options.cactusDir self.configWrapper = None self.expWrapper = None self.processConfig() self.processExperiment() def processConfig(self): # read in the default right out of cactus if self.options.configFile is not None: configPath = self.options.configFile else: dir = cactusRootPath() configPath = os.path.join(dir, "cactus_progressive_config.xml") log.info("Using config from path %s." % configPath) configXml = ET.parse(configPath).getroot() self.configWrapper = ConfigWrapper(configXml) # here we can go through the options and apply some to the config self.configWrapper.setBuildHal(True) self.configWrapper.setBuildFasta(True) def processExperiment(self): expXml = self.seqFile.toXMLElement() #create the cactus disk cdElem = ET.SubElement(expXml, "cactus_disk") database = self.options.database assert database == "kyoto_tycoon" or database == "tokyo_cabinet" confElem = ET.SubElement(cdElem, "st_kv_database_conf") confElem.attrib["type"] = database ET.SubElement(confElem, database) self.expWrapper = ExperimentWrapper(expXml) if not os.path.exists(self.workingDir): os.makedirs(self.workingDir) def writeXml(self): assert os.path.isdir(self.workingDir) configPath = absSymPath(os.path.join(self.workingDir, "config.xml")) expPath = absSymPath(os.path.join(self.workingDir, "expTemplate.xml")) self.expWrapper.setConfigPath(configPath) self.configWrapper.writeXML(configPath) self.expWrapper.writeXML(expPath) projPath = os.path.join(self.workingDir, ProjectWrapper.alignmentDirName) if len(self.seqFile.outgroups) == 0: # No outgroups specified, assume the default outgroup set outgroups = None else: outgroups = self.seqFile.outgroups runCreateMultiCactusProject(expPath, projPath, fixNames=0, outgroupNames=outgroups, root=self.options.root)
class ProjectWrapper: alignmentDirName = 'progressiveAlignment' def __init__(self, options): self.options = options self.seqFile = SeqFile(options.seqFile) self.workingDir = options.cactusDir self.configWrapper = None self.expWrapper = None self.processConfig() self.processExperiment() def processConfig(self): # read in the default right out of cactus if self.options.configFile is not None: configPath = self.options.configFile else: dir = cactusRootPath() configPath = os.path.join(dir, "cactus_progressive_config.xml") log.info("Using config from path %s." % configPath) configXml = ET.parse(configPath).getroot() self.configWrapper = ConfigWrapper(configXml) # here we can go through the options and apply some to the config self.configWrapper.setBuildHal(True) self.configWrapper.setBuildFasta(True) def processExperiment(self): expXml = self.seqFile.toXMLElement() #create the cactus disk cdElem = ET.SubElement(expXml, "cactus_disk") database = self.options.database assert database == "kyoto_tycoon" or database == "tokyo_cabinet" confElem = ET.SubElement(cdElem, "st_kv_database_conf") confElem.attrib["type"] = database ET.SubElement(confElem, database) self.expWrapper = ExperimentWrapper(expXml) if not os.path.exists(self.workingDir): os.makedirs(self.workingDir) def writeXml(self): assert os.path.isdir(self.workingDir) configPath = absSymPath( os.path.join(self.workingDir, "config.xml")) expPath = absSymPath( os.path.join(self.workingDir, "expTemplate.xml")) self.expWrapper.setConfigPath(configPath) self.configWrapper.writeXML(configPath) self.expWrapper.writeXML(expPath) projPath = os.path.join(self.workingDir, ProjectWrapper.alignmentDirName) if len(self.seqFile.outgroups) == 0: # No outgroups specified, assume the default outgroup set outgroups = None else: outgroups = self.seqFile.outgroups runCreateMultiCactusProject(expPath, projPath, fixNames=0, outgroupNames=outgroups, root=self.options.root)
class ProjectWrapper: alignmentDirName = 'progressiveAlignment' def __init__(self, options, seqFile, workingDir): self.options = options self.seqFile = seqFile self.workingDir = workingDir self.configWrapper = None self.expWrapper = None self.processConfig() self.processExperiment() def processConfig(self): # read in the default right out of cactus if self.options.configFile is not None: configPath = self.options.configFile else: dir = cactusRootPath() configPath = os.path.join(dir, "cactus_progressive_config.xml") configXml = ET.parse(configPath).getroot() self.configWrapper = ConfigWrapper(configXml) # here we can go through the options and apply some to the config self.configWrapper.setBuildHal(True) self.configWrapper.setBuildFasta(True) if self.options.outputMaf is not None: self.configWrapper.setBuildMaf(True) self.configWrapper.setJoinMaf(True) # pre-emptively turn down maxParallelSubtree for singleMachine # mode if not enough threads are provided to support it. Probably # need to do something for other ?combined? batch systems? if self.options.batchSystem == 'singleMachine' and \ self.options.database == 'kyoto_tycoon': if int(self.options.maxThreads) < \ self.configWrapper.getMaxParallelSubtrees() * 3: self.configWrapper.setMaxParallelSubtrees( max(1, int(self.options.maxThreads) / 3)) # this is a little hack to effectively toggle back to the # non-progressive version of cactus (as published in Gen. Res. 2011) # from the high-level interface. if self.options.legacy is True: self.configWrapper.setSubtreeSize(sys.maxint) def processExperiment(self): expXml = self.seqFile.toXMLElement() #create the cactus disk cdElem = ET.SubElement(expXml, "cactus_disk") database = self.options.database assert database == "kyoto_tycoon" or database == "tokyo_cabinet" confElem = ET.SubElement(cdElem, "st_kv_database_conf") confElem.attrib["type"] = database dbElem = ET.SubElement(confElem, database) self.expWrapper = ExperimentWrapper(expXml) if self.options.database == "kyoto_tycoon": self.expWrapper.setDbPort(str(self.options.ktPort)) if self.options.ktHost is not None: self.expWrapper.setDbHost(self.options.ktHost) if self.options.ktType == 'memory': self.expWrapper.setDbInMemory(True) self.expWrapper.setDbSnapshot(False) elif self.options.ktType == 'snapshot': self.expWrapper.setDbInMemory(True) self.expWrapper.setDbSnapshot(True) else: assert self.options.ktType == 'disk' self.expWrapper.setDbInMemory(False) self.expWrapper.setDbSnapshot(False) # sonlib doesn't allow for spaces in attributes in the db conf # which renders this options useless # if self.options.ktOpts is not None: # self.expWrapper.setDbServerOptions(self.options.ktOpts) if self.options.ktCreateTuning is not None: self.expWrapper.setDbCreateTuningOptions( self.options.ktCreateTuning) if self.options.ktOpenTuning is not None: self.expWrapper.setDbReadTuningOptions( self.options.ktOpenTuning) #set the sequence output directory outSeqDir = os.path.join(self.workingDir, "sequenceData") if os.path.exists(outSeqDir) and self.options.overwrite: system("rm -rf %s" % outSeqDir) if not os.path.exists(outSeqDir): system("mkdir %s" % outSeqDir) self.expWrapper.setOutputSequenceDir(os.path.join(self.workingDir, "sequenceData")) def writeXml(self): assert os.path.isdir(self.workingDir) configPath = os.path.abspath( os.path.join(self.workingDir, "config.xml")) expPath = os.path.abspath( os.path.join(self.workingDir, "expTemplate.xml")) self.expWrapper.setConfigPath(configPath) self.configWrapper.writeXML(configPath) self.expWrapper.writeXML(expPath) projPath = os.path.join(self.workingDir, ProjectWrapper.alignmentDirName) if os.path.exists(projPath) and self.options.overwrite: system("rm -rf %s" % projPath) if self.options.outputMaf is True: fixNames=1 else: fixNames=0 if os.path.exists(projPath): if not self.isSameAsExisting(expPath, projPath, fixNames): raise RuntimeError("Existing project %s not " % projPath+ "compatible with current input. Please " "erase the working directory or rerun " "with the --overwrite option to start " "from scratch.") else: logPath = os.path.join(self.workingDir, 'cactus.log') logFile = open(logPath, "a") logFile.write("\nContinuing existing alignment. Use " "--overwrite or erase the working directory to " "force restart from scratch.\n") logFile.close() else: cmd = "cactus_createMultiCactusProject.py %s %s --fixNames=%d" % ( expPath, projPath, fixNames) if len(self.seqFile.outgroups) > 0: cmd += " --outgroupNames " + ",".join(self.seqFile.outgroups) if self.options.rootOutgroupDist: cmd += " --rootOutgroupDist %f" % self.options.rootOutgroupDist cmd += " --rootOutgroupPath %s" % self.options.rootOutgroupPath system(cmd) # create a project in a dummy directory. check if the # project xml is the same as the current project. # we do this to see if we should start fresh or try to # work with the existing project when the overwrite flag is off def isSameAsExisting(self, expPath, projPath, fixNames): if not os.path.exists(projPath): return False oldPath = os.path.dirname(projPath + "/") tempPath = "%s_temp" % oldPath if os.path.exists(tempPath): system("rm -rf %s" % tempPath) cmd = "cactus_createMultiCactusProject.py %s %s --fixNames=%d" % ( expPath, tempPath, fixNames) if len(self.seqFile.outgroups) > 0: cmd += " --outgroupNames " + ",".join(self.seqFile.outgroups) if self.options.rootOutgroupDist: cmd += " --rootOutgroupDist %f" % self.options.rootOutgroupDist cmd += " --rootOutgroupPath %s" % self.options.rootOutgroupPath system(cmd) projFilePathNew = os.path.join(tempPath,'%s_temp_project.xml' % self.alignmentDirName) projFilePathOld = os.path.join(oldPath, '%s_project.xml' % self.alignmentDirName) newFile = [line for line in open(projFilePathNew, "r")] oldFile = [line for line in open(projFilePathOld, "r")] areSame = True if len(newFile) != len(oldFile): areSame = False for newLine, oldLine in zip(newFile, oldFile): if newLine.replace(tempPath, oldPath) != oldLine: areSame = False system("rm -rf %s" % tempPath) return areSame
class ProjectWrapper: alignmentDirName = 'progressiveAlignment' def __init__(self, options, seqFile, workingDir): self.options = options self.seqFile = seqFile self.workingDir = workingDir self.configWrapper = None self.expWrapper = None self.processConfig() self.processExperiment() def processConfig(self): # read in the default right out of cactus if self.options.configFile is not None: configPath = self.options.configFile else: dir = cactusRootPath() configPath = os.path.join(dir, "cactus_progressive_config.xml") configXml = ET.parse(configPath).getroot() self.configWrapper = ConfigWrapper(configXml) # here we can go through the options and apply some to the config self.configWrapper.setBuildHal(True) self.configWrapper.setBuildFasta(True) if self.options.outputMaf is not None: self.configWrapper.setBuildMaf(True) self.configWrapper.setJoinMaf(True) # pre-emptively turn down maxParallelSubtree for singleMachine # mode if not enough threads are provided to support it. Probably # need to do something for other ?combined? batch systems? if self.options.batchSystem == 'singleMachine' and \ self.options.database == 'kyoto_tycoon': if int(self.options.maxThreads) < \ self.configWrapper.getMaxParallelSubtrees() * 3: self.configWrapper.setMaxParallelSubtrees( max(1, int(self.options.maxThreads) / 3)) # this is a little hack to effectively toggle back to the # non-progressive version of cactus (as published in Gen. Res. 2011) # from the high-level interface. if self.options.legacy is True: self.configWrapper.setSubtreeSize(sys.maxint) def processExperiment(self): expXml = self.seqFile.toXMLElement() #create the cactus disk cdElem = ET.SubElement(expXml, "cactus_disk") database = self.options.database assert database == "kyoto_tycoon" or database == "tokyo_cabinet" confElem = ET.SubElement(cdElem, "st_kv_database_conf") confElem.attrib["type"] = database dbElem = ET.SubElement(confElem, database) self.expWrapper = ExperimentWrapper(expXml) if self.options.database == "kyoto_tycoon": self.expWrapper.setDbPort(str(self.options.ktPort)) if self.options.ktHost is not None: self.expWrapper.setDbHost(self.options.ktHost) if self.options.ktType == 'memory': self.expWrapper.setDbInMemory(True) self.expWrapper.setDbSnapshot(False) elif self.options.ktType == 'snapshot': self.expWrapper.setDbInMemory(True) self.expWrapper.setDbSnapshot(True) else: assert self.options.ktType == 'disk' self.expWrapper.setDbInMemory(False) self.expWrapper.setDbSnapshot(False) # sonlib doesn't allow for spaces in attributes in the db conf # which renders this options useless # if self.options.ktOpts is not None: # self.expWrapper.setDbServerOptions(self.options.ktOpts) if self.options.ktCreateTuning is not None: self.expWrapper.setDbCreateTuningOptions( self.options.ktCreateTuning) if self.options.ktOpenTuning is not None: self.expWrapper.setDbReadTuningOptions( self.options.ktOpenTuning) #set the sequence output directory outSeqDir = os.path.join(self.workingDir, "sequenceData") if os.path.exists(outSeqDir) and self.options.overwrite: system("rm -rf %s" % outSeqDir) if not os.path.exists(outSeqDir): system("mkdir %s" % outSeqDir) self.expWrapper.setOutputSequenceDir( os.path.join(self.workingDir, "sequenceData")) def writeXml(self): assert os.path.isdir(self.workingDir) configPath = absSymPath(os.path.join(self.workingDir, "config.xml")) expPath = absSymPath(os.path.join(self.workingDir, "expTemplate.xml")) self.expWrapper.setConfigPath(configPath) self.configWrapper.writeXML(configPath) self.expWrapper.writeXML(expPath) projPath = os.path.join(self.workingDir, ProjectWrapper.alignmentDirName) if os.path.exists(projPath) and self.options.overwrite: system("rm -rf %s" % projPath) if self.options.outputMaf is True: fixNames = 1 else: fixNames = 0 if os.path.exists(projPath): if not self.isSameAsExisting(expPath, projPath, fixNames): raise RuntimeError("Existing project %s not " % projPath + "compatible with current input. Please " "erase the working directory or rerun " "with the --overwrite option to start " "from scratch.") else: logPath = os.path.join(self.workingDir, 'cactus.log') logFile = open(logPath, "a") logFile.write("\nContinuing existing alignment. Use " "--overwrite or erase the working directory to " "force restart from scratch.\n") logFile.close() else: cmd = "cactus_createMultiCactusProject.py \"%s\" \"%s\" --fixNames=%d" % ( expPath, projPath, fixNames) if len(self.seqFile.outgroups) > 0: cmd += " --outgroupNames " + ",".join(self.seqFile.outgroups) if self.options.rootOutgroupDists: cmd += " --rootOutgroupDists %s" % self.options.rootOutgroupDists cmd += " --rootOutgroupPaths %s" % self.options.rootOutgroupPaths if self.options.root is not None: cmd += " --root %s" % self.options.root system(cmd) # create a project in a dummy directory. check if the # project xml is the same as the current project. # we do this to see if we should start fresh or try to # work with the existing project when the overwrite flag is off def isSameAsExisting(self, expPath, projPath, fixNames): if not os.path.exists(projPath): return False oldPath = os.path.dirname(projPath + "/") tempPath = "%s_temp" % oldPath # Fix for relative directories if oldPath[0:2] == './': oldPath = oldPath[2:] if tempPath[0:2] == './': tempPath = tempPath[2:] if os.path.exists(tempPath): system("rm -rf %s" % tempPath) cmd = "cactus_createMultiCactusProject.py %s %s --fixNames=%d" % ( expPath, tempPath, fixNames) if len(self.seqFile.outgroups) > 0: cmd += " --outgroupNames " + ",".join(self.seqFile.outgroups) if self.options.rootOutgroupDists: cmd += " --rootOutgroupDists %s" % self.options.rootOutgroupDists cmd += " --rootOutgroupPaths %s" % self.options.rootOutgroupPaths if self.options.root is not None: cmd += " --root %s" % self.options.root system(cmd) projFilePathNew = os.path.join( tempPath, '%s_temp_project.xml' % self.alignmentDirName) projFilePathOld = os.path.join( oldPath, '%s_project.xml' % self.alignmentDirName) newFile = [line for line in open(projFilePathNew, "r")] oldFile = [line for line in open(projFilePathOld, "r")] areSame = True if len(newFile) != len(oldFile): areSame = False for newLine, oldLine in zip(newFile, oldFile): if newLine.replace(tempPath, oldPath) != oldLine: areSame = False system("rm -rf %s" % tempPath) return areSame