def testHaplotyperPath(self): Grid.useGrid = False expOutFile = "../testFiles/output/testPool/out.testPoolSL2.40ch11_22900-24100.BEAGLE.PL.phased.gz" TestHaplotyper.testPool.vcf = {} TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib") TestHaplotyper.testPool.addSample(TestHaplotyper.sample) TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool, TestHaplotyper.sample, TestHaplotyper.inputBam, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True) TestHaplotyper.haplotyper.callHaplotypes() createdOutFile = TestHaplotyper.testPool.beagleFiles.values( )[0].phasedFile self.assertEqual( os.path.abspath(createdOutFile), os.path.abspath(expOutFile), os.path.abspath(createdOutFile) + " not is " + os.path.abspath(expOutFile)) self.checkNoOfSnps(expOutFile)
def _executeBwa(self, forwardFq, reversedFq=None): """The method executeBwa executes the mapping with BWA. :param forwardFq: The forward fastq file to map against the reference genome :type forwardFq: an instance of :py:class:`FastqFile.FastqFile` :param reversedFq: The reversed fastq file to map against the reference genome :type reversedFq: an instance of :py:class:`FastqFile.FastqFile`, None if the data has no paired end reads. """ if os.path.exists(Program.config.getPath("refGenome") + ".pac") == False: exitStatus = subprocess.call("bwa index " + Program.config.getPath("refGenome")) if exitStatus == 1: print( "ERROR: Failed to create a bwa index file for the reference genome, do I have permissions to write in the directory of the reference genome?" ) exit(1) ##Build the command cmd = Program.config.getPath("bwa") # @UndefinedVariable if reversedFq == None: cmd = cmd + " samse " else: cmd = cmd + " sampe " + self.getProgramArguments("BWA") cmd = cmd + Program.config.getPath("refGenome") #add the .sai files to the command forwardMapped = self._bwaAlign(forwardFq) cmd = cmd + " " + forwardMapped if reversedFq != None: reversedMapped = self._bwaAlign(reversedFq) cmd = cmd + " " + reversedMapped #add the high quality reads to the command cmd = cmd + " " + forwardFq.fileName if reversedFq != None: cmd = cmd + " " + reversedFq.fileName #add the output file to the command forwardFq.sample.bam = BamFile.BamFile(forwardFq.pool, forwardFq.sample, sam=True) cmd = cmd + " > " + forwardFq.sample.bam.getFile() ##Execute the command self.execute(cmd, "BWA", forwardFq.sample.bam) ##Cleanup the mess self.execute("rm " + forwardMapped, "rm", forwardFq) if reversedFq != None: self.execute("rm " + reversedMapped, "rm", reversedFq)
def testAddMdTag(self): TestConversionTools.sample.bam = BamFile.BamFile( TestConversionTools.testPool, TestConversionTools.sample, TestConversionTools.bamFile) self.convTools.addMdTag(TestConversionTools.sample) self.assertTrue(os.path.exists(TestConversionTools.expBamOutFile), "output file not created...") output, error = Popen(Program.config.getPath("samtools") + " view -h " + TestConversionTools.expBamOutFile + " | wc -l", shell=True, stdout=PIPE, stderr=PIPE).communicate() self.assertEqual( output.rstrip(), "1283", "number of lines: " + output.rstrip() + " is not " + str(1283))
def testHaplotyperPathGrid(self): TestHaplotyper.testPool.vcf = {} TestHaplotyper.sample = Sample.Sample(TestHaplotyper.testPool, "testLib") TestHaplotyper.testPool.addSample(TestHaplotyper.sample) TestHaplotyper.sample.bam = BamFile.BamFile(TestHaplotyper.testPool, TestHaplotyper.sample, TestHaplotyper.inputBam, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True) Haplotyper.executeBeagleCluster(TestHaplotyper.testPool) # self.assertEqual(os.path.abspath(createdOutFile),os.path.abspath(expOutFile) , os.path.abspath(createdOutFile) + " not is " + os.path.abspath(expOutFile)) self.checkNoOfSnps( "../testFiles/output/testPool/SL2.40ch11_22900-24100_testPool_SL2.40ch11_22900-24100.vcf" )
def testSamtoolsMultiple(self): #add an extra sample to the pool TestSnvCaller.sample2 = Sample.Sample(TestSnvCaller.testPool, "testLib2") TestSnvCaller.sample2.bam = BamFile.BamFile(TestSnvCaller.testPool, TestSnvCaller.sample2, TestSnvCaller.inputBam) TestSnvCaller.testPool.addSample(TestSnvCaller.sample2) #Execute and check execution output SamtoolsMpileup.SamtoolsMpileup(TestSnvCaller.testPool).callSnvs() outputFile = TestSnvCaller.testPool.vcf[None].fileName self.assertEqual( os.path.abspath(outputFile), os.path.abspath(TestSnvCaller.expVcfFile), os.path.abspath(outputFile) + " not is " + os.path.abspath(TestSnvCaller.expVcfFile)) #Check if the file contains exactly one snp self.checkNoOfSnps(TestSnvCaller.expVcfFile)
def setUp(self): for handler in logging.getLogger().handlers: handler.close() for delFile in os.listdir("../testFiles/output/"): file_path = os.path.join("../testFiles/output/", delFile) if os.path.isdir(file_path): shutil.rmtree(file_path) else: os.unlink(file_path) TestSnvCaller.testPool = Pool.Pool("testPool", "../testFiles/output/") Program.config.setPath("refGenome", "../testFiles/input/smallRefGenome.fa") TestSnvCaller.sample = Sample.Sample(TestSnvCaller.testPool, "testLib") TestSnvCaller.testPool.addSample(TestSnvCaller.sample) TestSnvCaller.sample.bam = BamFile.BamFile(TestSnvCaller.testPool, TestSnvCaller.sample, TestSnvCaller.inputBam, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True)
def testCreateBamIndex(self): indexFilesize = 96 TestConversionTools.sample.bam = BamFile.BamFile( TestConversionTools.testPool, TestConversionTools.sample, TestConversionTools.bamFile) self.convTools.sortBam(TestConversionTools.sample) self.convTools.createBamIndex(TestConversionTools.sample) outputFile = TestConversionTools.sample.bam.fileName self.assertEqual( os.path.abspath(outputFile), os.path.abspath(TestConversionTools.expBamOutFile), os.path.abspath(outputFile) + " not is " + os.path.abspath(TestConversionTools.expBamOutFile)) self.assertTrue( os.path.isfile(TestConversionTools.expBamOutFile + ".bai"), os.path.abspath(TestConversionTools.expBamOutFile + ".bai") + " is not created") self.assertEqual( indexFilesize, os.path.getsize(TestConversionTools.expBamOutFile + ".bai"), "filesize: " + str(os.path.getsize(TestConversionTools.expBamOutFile + ".bai")) + " is not " + str(indexFilesize))
def setbam(self, fileName): """Setter for setting the bam file as a :py:class:`FastqFile.FastqFile` """ self.bam = BamFile.BamFile(self.pool, self, fileName)
def findFastqFiles(self, directory, inFormat): """The method findFastqFiles finds all fastq files recursively in a directory, from each directory with fastq files a sample is created. :param directory: the directory where the user hid his fastq files :type directory: str -- path to the directory """ fastqFiles = [] for fileName in os.listdir(directory): fileName = directory + "/" + fileName if os.path.isdir(fileName): self.findFastqFiles(fileName, inFormat) else: if inFormat == "bam": if fileName.endswith(".bam") or fileName.endswith( ".bam.gz"): newSamp = Sample.Sample( self.pool, os.path.basename(os.path.splitext(fileName)[0])) self.samples.append(newSamp) newSamp.bam = BamFile.BamFile(self.pool, newSamp, fileName, sortedBam=True, headerLine=True, duplicates=False, mdTag=True, index=True) self.pool.addSample(newSamp) elif inFormat == "fq": if fileName.endswith(".fq") or fileName.endswith(".fq.gz"): fastqFiles.append(fileName) elif inFormat == "vcf": if fileName.endswith(".vcf") or fileName.endswith( ".vcf.gz"): if len(os.listdir(directory)) == 1: chrom = None else: chrom = self.getChromosomeFromVcf(fileName) self.pool.vcf[chrom] = VcfFile.VcfFile(self.pool, fileName, bcf=False, filtered=True, phased=True, chrom=chrom) elif fileName.endswith(".bcf") or fileName.endswith( ".bcf.gz"): chrom = Tools.getChromosomeOfFile( Program.config.getPath("refGenome"), fileName) self.pool.vcf[chrom] = VcfFile.VcfFile(self.pool, fileName, bcf=True, filtered=True, phased=True, chrom=chrom) if inFormat == "bam" or inFormat == "vcf": return if len(fastqFiles) > 0: #create a library name from the file name libName = os.path.basename(fastqFiles[0]) if libName.endswith("_1.fq") or libName.endswith("_2.fq"): libName = libName[:-5] if libName.endswith("_1.fq.gz") or libName.endswith("_2.fq.gz"): libName = libName[:-8] else: libName = libName[:-3] #create the sample sample = Sample.Sample(self.pool, libName) self.pool.addSample(sample) #add the fastq files to the sample if len(fastqFiles) == 1: sample.setForwardFq(fastqFiles[0]) elif len(fastqFiles) == 2: sample.setForwardFq(fastqFiles[0]) sample.setReversedFq(fastqFiles[1]) elif len(fastqFiles) > 2: if fastqFiles[0].endswith("_1.fq"): suffix = "_1.fq" elif fastqFiles[0].endswith("_1.fq.gz"): suffix = "_1.fq.gz" else: print( "WARNING: files do not end with _1.fq or _1.fq.gz or _2.fq or _2.fq.gz, using all files in one directory as 1 sample with only forward reads" ) suffix = fastqFiles[0][-3:] #create a list of forward fastq files and one of reversed fastq files forward = [] reversedFastq = [] for fastqFile in fastqFiles: if fastqFile.endswith(suffix): forward.append(fastqFile) else: reversedFastq.append(fastqFile) #Convert files to fastqFile objects for i in range(len(forward)): forward[i] = FastqFile.FastqFile( self.pool, sample, forward[i]) for i in range(len(reversedFastq)): reversedFastq[i] = FastqFile.FastqFile( self.pool, sample, reversedFastq[i], forward=False) #add the fastq files to the sample sample.forwardFq = forward sample.reversedFq = reversedFastq