Exemplo n.º 1
0
	def fastq_convertion_and_pairs_capture(self):
		"""
		Convert cram2.0 file to fastq file with samtools1.1 and picard
		"""
		jobs = []
		cff_file = self.args.cff.name
		cram_file = self.args.cff.name
		#out_dir = os.path.join("fusion_pairs_capture", "cram_fastq")
		for readset in self.readsets:
			out_dir = os.path.join("fusion_pairs_capture", "captured_bam", readset.sample.name)
			if not readset.fastq1:
				if readset.cram:
					# convert cram to bam then to fastq, fastq and bam are saved on localhd
					out_bam = os.path.join("$TMPDIR", os.path.basename(readset.cram)+".bam")
					fastq1 = out_bam + ".1.fastq"
					fastq2 = out_bam + ".2.fastq"
					cram2bam_job = samtools_1_1.view(readset.cram, out_bam, "-b")
					bam2fastq_job = picard.sam_to_fastq(out_bam, fastq1, fastq2)

					# bwa aln fastqs to capture reference
					out_bam = os.path.join(out_dir, "captured.bam")
					ref = os.path.join("fusion_pairs_capture", "fusion_refs", os.path.basename(cff_file)+".fa")
					capture_job = bwa_fusion_reads_capture.bwa_fusion_reads_capture(fastq1, fastq2, ref, out_bam, read_group=None, ini_section='bwa_fusion_reads_capture')

					job = concat_jobs([
						Job(command="mkdir -p " + out_dir),
						cram2bam_job,
						bam2fastq_job,
						capture_job
					], name="convert_cram_to_fastq")

					jobs.append(job)

				else:
					raise Exception("Error: CRAM file not available for readset \"" + readset.name + "\"!")
			else:
				fastq1 = readset.fastq1
				fastq2 = readset.fastq2

				# bwa aln fastqs to capture reference
				out_bam = os.path.join(out_dir, "captured.bam")
				ref = os.path.join("fusion_pairs_capture", "fusion_refs", os.path.basename(cff_file)+".genes.fa")
				capture_job = bwa_fusion_reads_capture.bwa_fusion_reads_capture(fastq1, fastq2, ref, out_bam, read_group=None, ini_section='bwa_fusion_reads_capture')

				job = concat_jobs([
					Job(command="mkdir -p " + out_dir),
					capture_job
				], name="bwa_fusion_reads_capture")
				
				jobs.append(job)

		return jobs
Exemplo n.º 2
0
    def picard_sam_to_fastq(self):
        """
        Convert SAM/BAM files from the input readset file into FASTQ format
        if FASTQ files are not already specified in the readset file. Do nothing otherwise.
        rerwritten from common.Illumina.picard_sam_to_fastq, make directory for this step under result folder in case the orginal bam file directory is not writtable
        """
        jobs = []
        for readset in self.readsets:
            # If readset FASTQ files are available, skip this step
            if not readset.fastq1:
                if readset.cram:
                    # convert cram to bam then to fastq. fastq and bam are saved on localhd
                    out_bam = os.path.join("$TMPDIR", os.path.basename(readset.cram)+".bam")
                    cram2bam_job = samtools_1_1.view(readset.cram, out_bam)
                    if readset.run_type == "PAIRED_END":
                        out_dir = os.path.join("fusions", "picard_sam_to_fastq", readset.sample.name)
                        fastq1 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair1.fastq.gz", out_bam)))
                        fastq2 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair2.fastq.gz", out_bam)))
                    else:
                        raise Exception("Error: run type \"" + readset.run_type +
                        "\" is invalid for readset \"" + readset.name + "\" (should be PAIRED_END or SINGLE_END)!")

                    picard_job = picard.sam_to_fastq(out_bam, fastq1, fastq2)
                    job = concat_jobs([
                        Job(command="mkdir -p " + out_dir),
                        cram2bam_job,
                        picard_job
                    ], name= "picard_sam_to_fastq." + readset.name)
                    jobs.append(job)
                elif readset.bam:
                    if readset.run_type == "PAIRED_END":
                        out_dir = os.path.join("fusions", "picard_sam_to_fastq", readset.sample.name)
                        fastq1 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair1.fastq.gz", readset.bam)))
                        fastq2 = os.path.join(out_dir, os.path.basename(re.sub("\.bam$", ".pair2.fastq.gz", readset.bam)))
                    else:
                        raise Exception("Error: run type \"" + readset.run_type +
                        "\" is invalid for readset \"" + readset.name + "\" (should be PAIRED_END or SINGLE_END)!")

                    picard_job = picard.sam_to_fastq(readset.bam, fastq1, fastq2)
                    job = concat_jobs([
                        Job(command="mkdir -p " + out_dir),
                        picard_job
                    ], name= "picard_sam_to_fastq." + readset.name)
                    jobs.append(job)
                else:
                    raise Exception("Error: BAM file not available for readset \"" + readset.name + "\"!")
        return jobs