def fastq_convertion_and_pairs_capture(self): """ Convert cram2.0 file to fastq file with samtools1.1 and picard """ jobs = [] cff_file = self.args.cff.name cram_file = self.args.cff.name #out_dir = os.path.join("fusion_pairs_capture", "cram_fastq") for readset in self.readsets: out_dir = os.path.join("fusion_pairs_capture", "captured_bam", readset.sample.name) if not readset.fastq1: if readset.cram: # convert cram to bam then to fastq, fastq and bam are saved on localhd out_bam = os.path.join("$TMPDIR", os.path.basename(readset.cram)+".bam") fastq1 = out_bam + ".1.fastq" fastq2 = out_bam + ".2.fastq" cram2bam_job = samtools_1_1.view(readset.cram, out_bam, "-b") bam2fastq_job = picard.sam_to_fastq(out_bam, fastq1, fastq2) # bwa aln fastqs to capture reference out_bam = os.path.join(out_dir, "captured.bam") ref = os.path.join("fusion_pairs_capture", "fusion_refs", os.path.basename(cff_file)+".fa") capture_job = bwa_fusion_reads_capture.bwa_fusion_reads_capture(fastq1, fastq2, ref, out_bam, read_group=None, ini_section='bwa_fusion_reads_capture') job = concat_jobs([ Job(command="mkdir -p " + out_dir), cram2bam_job, bam2fastq_job, capture_job ], name="convert_cram_to_fastq") jobs.append(job) else: raise Exception("Error: CRAM file not available for readset \"" + readset.name + "\"!") else: fastq1 = readset.fastq1 fastq2 = readset.fastq2 # bwa aln fastqs to capture reference out_bam = os.path.join(out_dir, "captured.bam") ref = os.path.join("fusion_pairs_capture", "fusion_refs", os.path.basename(cff_file)+".genes.fa") capture_job = bwa_fusion_reads_capture.bwa_fusion_reads_capture(fastq1, fastq2, ref, out_bam, read_group=None, ini_section='bwa_fusion_reads_capture') job = concat_jobs([ Job(command="mkdir -p " + out_dir), capture_job ], name="bwa_fusion_reads_capture") jobs.append(job) return jobs
def picard_sam_to_fastq(self): """ Convert SAM/BAM files from the input readset file into FASTQ format if FASTQ files are not already specified in the readset file. Do nothing otherwise. rerwritten from common.Illumina.picard_sam_to_fastq, make directory for this step under result folder in case the orginal bam file directory is not writtable """ jobs = [] for readset in self.readsets: # If readset FASTQ files are available, skip this step if not readset.fastq1: if readset.cram: # convert cram to bam then to fastq. fastq and bam are saved on localhd out_bam = os.path.join("$TMPDIR", os.path.basename(readset.cram)+".bam") cram2bam_job = samtools_1_1.view(readset.cram, out_bam) if readset.run_type == "PAIRED_END": out_dir = os.path.join("fusions", "picard_sam_to_fastq", readset.sample.name) fastq1 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair1.fastq.gz", out_bam))) fastq2 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair2.fastq.gz", out_bam))) else: raise Exception("Error: run type \"" + readset.run_type + "\" is invalid for readset \"" + readset.name + "\" (should be PAIRED_END or SINGLE_END)!") picard_job = picard.sam_to_fastq(out_bam, fastq1, fastq2) job = concat_jobs([ Job(command="mkdir -p " + out_dir), cram2bam_job, picard_job ], name= "picard_sam_to_fastq." + readset.name) jobs.append(job) elif readset.bam: if readset.run_type == "PAIRED_END": out_dir = os.path.join("fusions", "picard_sam_to_fastq", readset.sample.name) fastq1 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair1.fastq.gz", readset.bam))) fastq2 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair2.fastq.gz", readset.bam))) else: raise Exception("Error: run type \"" + readset.run_type + "\" is invalid for readset \"" + readset.name + "\" (should be PAIRED_END or SINGLE_END)!") picard_job = picard.sam_to_fastq(readset.bam, fastq1, fastq2) job = concat_jobs([ Job(command="mkdir -p " + out_dir), picard_job ], name= "picard_sam_to_fastq." + readset.name) jobs.append(job) else: raise Exception("Error: BAM file not available for readset \"" + readset.name + "\"!") return jobs