def testCactusRealignDummy(self): """Runs cactus realign using the "rescoreOriginalAlignment" mode and checks the output is equivalent to what you'd get by just running lastz. """ for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments) realignOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignOutput, realignArguments=self.defaultRealignArguments + " --rescoreOriginalAlignment") for realignLine, lastzLine in zip( [i for i in open(lastzOutput, 'r') if i != ''], [i for i in open(realignOutput, 'r') if i != '']): realignCigar = cigarReadFromString(realignLine) lastzCigar = cigarReadFromString(lastzLine) self.assertTrue(realignCigar != None) self.assertTrue(realignCigar == lastzCigar)
def run(self, fileStore): seqFile1 = fileStore.readGlobalFile(self.seqFileID1) seqFile2 = fileStore.readGlobalFile(self.seqFileID2) if self.blastOptions.compressFiles: seqFile1 = decompressFastaFile(seqFile1, fileStore.getLocalTempFile()) seqFile2 = decompressFastaFile(seqFile2, fileStore.getLocalTempFile()) blastResultsFile = fileStore.getLocalTempFile() runLastz(seqFile1, seqFile2, blastResultsFile, lastzArguments=self.blastOptions.lastzArguments, gpuLastz=self.blastOptions.gpuLastz) if self.blastOptions.realign: realignResultsFile = fileStore.getLocalTempFile() runCactusRealign( seqFile1, seqFile2, inputAlignmentsFile=blastResultsFile, outputAlignmentsFile=realignResultsFile, realignArguments=self.blastOptions.realignArguments) blastResultsFile = realignResultsFile resultsFile = fileStore.getLocalTempFile() cactus_call(parameters=[ "cactus_blast_convertCoordinates", blastResultsFile, resultsFile, str(self.blastOptions.roundsOfCoordinateConversion) ]) logger.info("Ran the blast okay") return fileStore.writeGlobalFile(resultsFile)
def testCactusRealign(self): """Runs cactus realign using the default parameters and checks that the realigned output cigars align the same subsequences. """ for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments, work_dir=self.tempDir) realignOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignOutput, realignArguments=self.defaultRealignArguments, work_dir=self.tempDir) for realignLine, lastzLine in zip( [i for i in open(lastzOutput, 'r') if i != ''], [i for i in open(realignOutput, 'r') if i != '']): realignCigar = cigarReadFromString(realignLine) lastzCigar = cigarReadFromString(lastzLine) self.assertTrue(realignCigar.sameCoordinates(lastzCigar))
def testCactusRealignSplitSequences(self): """Runs cactus realign, splitting indels longer than 100bp, and check that the coverage from the results is the same as the coverage from realigning with no arguments..""" for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments) realignOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignOutput, realignArguments=self.defaultRealignArguments) splitRealignOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=splitRealignOutput, realignArguments=self.defaultRealignArguments + " --splitIndelsLongerThanThis 100") # Check coverage on seqFile1 splitRealignCoverage = runCactusCoverage(seqFile1, splitRealignOutput) realignCoverage = runCactusCoverage(seqFile1, realignOutput) self.assertTrue(splitRealignCoverage == realignCoverage) # Check coverage on seqFile2 splitRealignCoverage = runCactusCoverage(seqFile2, splitRealignOutput) realignCoverage = runCactusCoverage(seqFile2, realignOutput) self.assertTrue(splitRealignCoverage == realignCoverage) os.remove(realignOutput) os.remove(splitRealignOutput)
def testCactusRealign(self): """Runs cactus realign using the default parameters and checks that the realigned output cigars align the same subsequences. """ for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments) realignOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile = lastzOutput, outputAlignmentsFile = realignOutput, realignArguments=self.defaultRealignArguments) for realignLine, lastzLine in zip([ i for i in open(lastzOutput, 'r') if i != '' ], [ i for i in open(realignOutput, 'r') if i != '' ]): realignCigar = cigarReadFromString(realignLine) lastzCigar = cigarReadFromString(lastzLine) self.assertTrue(realignCigar.sameCoordinates(lastzCigar))
def testCactusRealignDummy(self): """Runs cactus realign using the "rescoreOriginalAlignment" mode and checks the output is equivalent to what you'd get by just running lastz. """ for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments) realignOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile = lastzOutput, outputAlignmentsFile = realignOutput, realignArguments=self.defaultRealignArguments + " --rescoreOriginalAlignment") for realignLine, lastzLine in zip([ i for i in open(lastzOutput, 'r') if i != '' ], [ i for i in open(realignOutput, 'r') if i != '' ]): realignCigar = cigarReadFromString(realignLine) lastzCigar = cigarReadFromString(lastzLine) self.assertTrue(realignCigar != None) self.assertTrue(realignCigar == lastzCigar)
def testCactusRealignRescoreByIdentityAndProb(self): """Runs cactus realign using the default parameters and checks that the realigned output cigars align the same subsequences. """ for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments, work_dir=self.tempDir) realignByIdentityOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignByIdentityOutput, realignArguments=self.defaultRealignArguments + " --rescoreByIdentity", work_dir=self.tempDir) realignByPosteriorProbOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignByPosteriorProbOutput, realignArguments=self.defaultRealignArguments + " --rescoreByPosteriorProb", work_dir=self.tempDir) realignByIdentityIgnoringGapsOutput = getTempFile( rootDir=self.tempDir) runCactusRealign( seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignByIdentityIgnoringGapsOutput, realignArguments=self.defaultRealignArguments + " --rescoreByIdentityIgnoringGaps", work_dir=self.tempDir) for realignLineByIdentity, realignLineByPosteriorProb, realignLineByIdentityIgnoringGaps, lastzLine in \ zip([ i for i in open(realignByIdentityOutput, 'r') if i != '' ], \ [ i for i in open(realignByPosteriorProbOutput, 'r') if i != '' ], \ [ i for i in open(realignByIdentityIgnoringGapsOutput, 'r') if i != '' ], \ [ i for i in open(lastzOutput, 'r') if i != '' ]): realignCigarByIdentity = cigarReadFromString( realignLineByIdentity) realignCigarByPosteriorProb = cigarReadFromString( realignLineByPosteriorProb) realignCigarByIdentityIgnoringGaps = cigarReadFromString( realignLineByIdentityIgnoringGaps) lastzCigar = cigarReadFromString(lastzLine) #Check scores are as expected self.assertTrue(realignCigarByIdentity.score >= 0) self.assertTrue(realignCigarByIdentity.score <= 100.0) self.assertTrue(realignCigarByPosteriorProb.score >= 0) self.assertTrue(realignCigarByPosteriorProb.score <= 100.0) self.assertTrue(realignCigarByIdentityIgnoringGaps.score >= 0) self.assertTrue( realignCigarByIdentityIgnoringGaps.score <= 100.0)
def run(self, fileStore): seqFile1 = fileStore.readGlobalFile(self.seqFileID1) seqFile2 = fileStore.readGlobalFile(self.seqFileID2) if self.blastOptions.compressFiles: seqFile1 = decompressFastaFile(seqFile1, fileStore.getLocalTempFile()) seqFile2 = decompressFastaFile(seqFile2, fileStore.getLocalTempFile()) blastResultsFile = fileStore.getLocalTempFile() runLastz(seqFile1, seqFile2, blastResultsFile, lastzArguments = self.blastOptions.lastzArguments) if self.blastOptions.realign: realignResultsFile = fileStore.getLocalTempFile() runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=blastResultsFile, outputAlignmentsFile=realignResultsFile, realignArguments=self.blastOptions.realignArguments) blastResultsFile = realignResultsFile resultsFile = fileStore.getLocalTempFile() cactus_call(parameters=["cactus_blast_convertCoordinates", blastResultsFile, resultsFile, str(self.blastOptions.roundsOfCoordinateConversion)]) logger.info("Ran the blast okay") return fileStore.writeGlobalFile(resultsFile)
def testCactusRealignRescoreByIdentityAndProb(self): """Runs cactus realign using the default parameters and checks that the realigned output cigars align the same subsequences. """ for seqFile1, seqFile2 in seqFilePairGenerator(): lastzOutput = getTempFile(rootDir=self.tempDir) runLastz(seqFile1, seqFile2, alignmentsFile=lastzOutput, lastzArguments=self.defaultLastzArguments) realignByIdentityOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignByIdentityOutput, realignArguments=self.defaultRealignArguments + " --rescoreByIdentity") realignByPosteriorProbOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignByPosteriorProbOutput, realignArguments=self.defaultRealignArguments + " --rescoreByPosteriorProb") realignByIdentityIgnoringGapsOutput = getTempFile(rootDir=self.tempDir) runCactusRealign(seqFile1, seqFile2, inputAlignmentsFile=lastzOutput, outputAlignmentsFile=realignByIdentityIgnoringGapsOutput, realignArguments=self.defaultRealignArguments + " --rescoreByIdentityIgnoringGaps") for realignLineByIdentity, realignLineByPosteriorProb, realignLineByIdentityIgnoringGaps, lastzLine in \ zip([ i for i in open(realignByIdentityOutput, 'r') if i != '' ], \ [ i for i in open(realignByPosteriorProbOutput, 'r') if i != '' ], \ [ i for i in open(realignByIdentityIgnoringGapsOutput, 'r') if i != '' ], \ [ i for i in open(lastzOutput, 'r') if i != '' ]): realignCigarByIdentity = cigarReadFromString(realignLineByIdentity) realignCigarByPosteriorProb = cigarReadFromString(realignLineByPosteriorProb) realignCigarByIdentityIgnoringGaps = cigarReadFromString(realignLineByIdentityIgnoringGaps) lastzCigar = cigarReadFromString(lastzLine) #Check scores are as expected self.assertTrue(realignCigarByIdentity.score >= 0) self.assertTrue(realignCigarByIdentity.score <= 100.0) self.assertTrue(realignCigarByPosteriorProb.score >= 0) self.assertTrue(realignCigarByPosteriorProb.score <= 100.0) self.assertTrue(realignCigarByIdentityIgnoringGaps.score >= 0) self.assertTrue(realignCigarByIdentityIgnoringGaps.score <= 100.0)