def testFastaReadWriteC(self): """Tests consistency with C version of this function. """ tempFile = getTempFile() self.tempFiles.append(tempFile) tempFile2 = getTempFile() self.tempFiles.append(tempFile2) for test in xrange(0, self.testNo): fastaNumber = random.choice(xrange(10)) l = [ getRandomSequence() for i in xrange(fastaNumber) ] fileHandle = open(tempFile, 'w') for name, seq in l: fastaWrite(fileHandle, name, seq) fileHandle.close() command = "sonLib_fastaCTest %s %s" % (tempFile, tempFile2) print command system(command) fileHandle = open(tempFile2, 'r') l.reverse() for i in fastaRead(fileHandle): name, seq = i assert i == l.pop() fastaWrite(sys.stdout, name, seq) fileHandle.close()
def makeHub(self): """Make an assembly hub for the test set, and place it in outputDir/hub.""" cmd = "hal2assemblyHub.py --hub %s --longLabel %s --shortLabel %s %s --jobTree %s/jobTree %s" % (self.label, self.label, self.label, self.hal, getTempDirectory(), os.path.join(self.outputDir, "hub")) if self.getOption("Evaluation", "misalignmentWigTrack") is not None: cmd += " --wigDirs %s --nowigLiftover" % ",".join(glob(os.path.join(self.wigDir, '*'))) system(cmd)
def getCoalescences(self): """Runs the "correct-coalescences" evaluation on the test set. The reference genome is given by the coalescenceRefGenome option in the config file. """ refGenome = self.getOption("Evaluation", "coalescenceRefGenome") system("scoreHalPhylogenies.py --jobTree %s/jobTree %s %s %s" % \ (getTempDirectory(), self.hal, refGenome, os.path.join(self.outputDir, "coalescences.xml")))
def align(self, progressiveCactusDir, configFile): """Run the actual alignment.""" os.chdir(self.path) configFile = nameValue("config", configFile) root = nameValue("root", self.getOption("Alignment", "root")) system("%s/bin/runProgressiveCactus.sh --stats %s %s %s %s %s" % ( progressiveCactusDir, configFile, root, self.seqFile, self.workDir, self.hal)) # Copy the alignment log to the output directory system("cp %s %s" % (os.path.join(self.workDir, "cactus.log"), self.outputDir))
def makeDotplot(self): """Puts a dotplot in dotplot.pdf, given the dotplot option The dotplot option has the format: "genomeX.seqX:startX-endX,genomeY.seqY:startY-endY" """ dotplotString = self.getOption("Evaluation", "dotplot") match = re.match( r'(.*?)\.([^:,]*),(.*?)\.([^:]*)', # r'(.*?)\.([^:,]*):?([0-9]*)?-?([0-9]*)?,(.*?)\.([^:]*):?([0-9]*)?-?([0-9]*)?', dotplotString) genomeX, seqX, genomeY, seqY = match.groups() tempFile = os.path.join(self.workDir, "tmp.dotplot") system("runDotplot.py %s %s %s %s %s > %s" % \ (self.hal, genomeX, seqX, genomeY, seqY, tempFile)) system("plotDotplot.R %s %s" % (tempFile, os.path.join(self.outputDir, "dotplot.pdf")))
def getMafComparatorXML(self): """Find the precision and recall relative to the true alignment by running mafComparator. Assumes that the test set config has specified a true MAF containing only sequence names (not UCSC-styled "genome.chr" names). Also parses the "wiggle" parts of the XML into proper .wig files, if the misalignmentWigTrack option is enabled in the test set's config. """ truth = self.getOption('Evaluation', 'truth') # Extract the maf for our alignment test = os.path.join(getTempDirectory(), 'test.maf') system("hal2maf --onlySequenceNames --global --noAncestors %s %s" % \ (self.hal, test)) xmlPath = os.path.join(self.outputDir, "mafComparator.xml") comparatorCmd = "mafComparator --samples 20000000 --maf1 %s --maf2 %s --out %s" % (truth, test, xmlPath) if self.getOption("Evaluation", "misalignmentWigTrack") is not None: # Add the options to generate the requested wiggle track comparatorCmd += " " + nameValue("wigglePairs", self.getOption("Evaluation", "misalignmentWigTrack")) comparatorCmd += " --wiggleBinLength 1" system(comparatorCmd) if self.getOption("Evaluation", "misalignmentWigTrack") is not None: # Extract the wiggle files genome = getGenomeForSequence(self.hal, self.getOption("Evaluation", "misalignmentWigTrack").split(":")[0]) system("mkdir -p %s %s" % (os.path.join(self.wigDir, "underalignment", genome), os.path.join(self.wigDir, "overalignment", genome))) underalignmentPath = os.path.join(self.wigDir, "underalignment", genome, genome + ".wig") overalignmentPath = os.path.join(self.wigDir, "overalignment", genome, genome + ".wig") getWigsFromXML(xmlPath, underalignmentPath, overalignmentPath)
def testCigarReadWrite(self): """Tests the C code for reading and writing cigars against the python parser for cigars. """ tempFile = getTempFile() self.tempFiles.append(tempFile) for test in xrange(0, self.testNo): pairwiseAlignmentNumber = random.choice(xrange(10)) l = [ getRandomPairwiseAlignment() for i in xrange(pairwiseAlignmentNumber) ] fileHandle = open(tempFile, 'w') keepProbs = random.random() > 0.5 if keepProbs == False: for pA in l: for op in pA.operationList: op.score = 0.0 for pairwiseAlignment in l: cigarWrite(fileHandle, pairwiseAlignment, keepProbs) fileHandle.close() #Now call sonLib_cigarsTest and read and write chains command = "sonLib_cigarTest %s %s" % (tempFile, keepProbs) #return system(command) #Now check the chain is okay fileHandle = open(tempFile, 'r') l.reverse() for pairwiseAlignment in cigarRead(fileHandle): pairwiseAlignment2 = l.pop() cigarWrite(sys.stdout, pairwiseAlignment, keepProbs) cigarWrite(sys.stdout, pairwiseAlignment2, keepProbs) assert pairwiseAlignment == pairwiseAlignment2 assert len(l) == 0 fileHandle.close()
def pipeline(target, opts): tempDir = target.getGlobalTempDir() opts.progressiveCactusDir = tempDir # setup progressiveCactus to point to the right commit, and run # make initializeProgressiveCactus(opts) # FIXME this is terrible opts.progressiveCactusDir = os.path.join(tempDir, "progressiveCactus") tests = setupTestSets(opts) # ensure our output dir exists, and redirect our stderr there for # logging purposes. if not os.path.isdir(opts.outputDir): os.mkdir(opts.outputDir) sys.stderr = open(os.path.join(opts.outputDir, "log"), 'w') for test in tests: target.addChildTarget(test) # Put git commit in the output dir os.chdir(opts.progressiveCactusDir) system("git rev-parse HEAD > %s/progressiveCactus_version" % opts.outputDir) os.chdir(os.path.join(opts.progressiveCactusDir, "submodules/cactus")) system("git rev-parse HEAD > %s/cactus_version" % opts.outputDir) # Put config in the output dir if opts.cactusConfigFile is not None: system("cp %s %s/config.xml" % (opts.cactusConfigFile, opts.outputDir)) else: # we used the default config system("cp %s %s/config.xml" % (os.path.join(opts.progressiveCactusDir, "submodules/cactus/cactus_progressive_config.xml"), opts.outputDir))
def initializeProgressiveCactus(opts): """Points progressiveCactus to the correct commit and compiles.""" os.chdir(opts.progressiveCactusDir) system("git clone https://github.com/glennhickey/progressiveCactus.git") os.chdir("progressiveCactus") system("git fetch") system("git checkout %s" % (opts.progressiveCactusBranch)) system("git pull") system("git submodule update --init --recursive") if opts.cactusBranch is not None: os.chdir("submodules/cactus") system("git fetch") system("git checkout %s" % (opts.cactusBranch)) os.chdir(opts.progressiveCactusDir) system("make")
def getCoverage(self): """Report all-by-all coverage to outputDir/coverage.""" system("halStats --allCoverage %s > %s" % (self.hal, os.path.join(self.outputDir, "coverage")))
def tearDown(self): unittest.TestCase.tearDown(self) system("rm -rf %s" % self.tempDir) for tempFile in self.tempFiles: os.remove(tempFile)