def buildPicardStats(infile, outfile): '''build alignment stats using picard. Note that picards counts reads but they are in fact alignments. ''' if PARAMS["pool_reads"]: reffile = os.path.join( os.path.dirname(infile), "agg-agg-agg.filtered.contigs.fa") else: reffile = P.snip(infile, ".bam") + ".fa" PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, reffile)
def runPicardOnRealigned(infile, outfile): to_cluster = USECLUSTER job_options = getGATKOptions() # TS no multithreading so why 6 threads? # job_threads = 6 tmpdir_gatk = P.getTempDir('/ifs/scratch') # threads = PARAMS["gatk_threads"] outfile_tumor = outfile.replace("Control", PARAMS["mutect_tumour"]) infile_tumor = infile.replace("Control", PARAMS["mutect_tumour"]) track = P.snip(os.path.basename(infile), ".bam") track_tumor = track.replace("Control", PARAMS["mutect_tumour"]) genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"]) PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome) PipelineMappingQC.buildPicardAlignmentStats(infile_tumor, outfile_tumor, genome) # check above functions then remove statement statement = ''' cat %(infile)s | python %%(scriptsdir)s/bam2bam.py -v 0 --method=set-sequence | CollectMultipleMetrics INPUT=/dev/stdin REFERENCE_SEQUENCE=%%(bwa_index_dir)s/%%(genome)s.fa ASSUME_SORTED=true OUTPUT=%(outfile)s VALIDATION_STRINGENCY=SILENT >& %(outfile)s; cat %(infile_tumor)s | python %%(scriptsdir)s/bam2bam.py -v 0 --method=set-sequence --output-sam | CollectMultipleMetrics INPUT=/dev/stdin REFERENCE_SEQUENCE=%%(bwa_index_dir)s/%%(genome)s.fa ASSUME_SORTED=true OUTPUT=%(outfile_tumor)s VALIDATION_STRINGENCY=SILENT >& %(outfile_tumor)s;''' % locals()
def runPicardOnRealigned(infile, outfile): to_cluster = USECLUSTER job_memory = PARAMS["gatk_memory"] tmpdir_gatk = P.getTempDir() outfile_tumor = outfile.replace(PARAMS["sample_control"], PARAMS["sample_tumour"]) infile_tumor = infile.replace(PARAMS["sample_control"], PARAMS["sample_tumour"]) track = P.snip(os.path.basename(infile), ".bam") track_tumor = track.replace(PARAMS["sample_control"], PARAMS["sample_tumour"]) genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"]) PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome) PipelineMappingQC.buildPicardAlignmentStats(infile_tumor, outfile_tumor, genome)
def runPicardOnRealigned(infile, outfile): to_cluster = USECLUSTER job_memory = PARAMS["gatk_memory"] tmpdir_gatk = P.getTempDir() outfile_tumor = outfile.replace( PARAMS["sample_control"], PARAMS["sample_tumour"]) infile_tumor = infile.replace( PARAMS["sample_control"], PARAMS["sample_tumour"]) track = P.snip(os.path.basename(infile), ".bam") track_tumor = track.replace( PARAMS["sample_control"], PARAMS["sample_tumour"]) genome = "%s/%s.fa" % (PARAMS["bwa_index_dir"], PARAMS["genome"]) PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, genome) PipelineMappingQC.buildPicardAlignmentStats(infile_tumor, outfile_tumor, genome)
def buildPicardAlignmentStats(infile, outfile): '''Gather BAM file alignment statistics using Picard ''' PipelineMappingQC.buildPicardAlignmentStats( infile, outfile, os.path.join(PARAMS["bwa_index_dir"], PARAMS["genome"] + ".fa"))
def buildPicardAlignmentStats(infile, outfile): '''Gather BAM file alignment statistics using Picard ''' PipelineMappingQC.buildPicardAlignmentStats(infile, outfile, os.path.join(PARAMS["bwa_index_dir"], PARAMS["genome"] + ".fa"))