def extract_bam_unmap(self): jobs = [] for sample in self.samples: sclip_directory = os.path.join("sclip", sample.name) sclip_file_prefix = os.path.join("sclip", sample.name, sample.name + ".") extract_directory = os.path.join("extract", sample.name) extract_file_prefix = os.path.join("extract", sample.name, sample.name + ".") jobMkdir = Job(command="if [ ! -d " + extract_directory + " ]; then mkdir -p " + extract_directory + "; fi") ## extract Orphan job = concat_jobs([ jobMkdir, concat_jobs([ samtools.view(sclip_file_prefix + "scOthers.bam", extract_file_prefix + "ORPHAN.bam", "-b -h -f 12 -F 256"), samtools.sort(extract_file_prefix + "ORPHAN.bam", extract_file_prefix + "ORPHAN.sName", True) ]) ], name="extract_bam_ORPHAN_" + sample.name) jobs.append(job) ## extract OEA close to sclip job = concat_jobs([ jobMkdir, concat_jobs([ samtools.view(sclip_file_prefix + "sc.bam", extract_file_prefix + "OEAUNMAP.1.bam", "-b -h -f 68 -F 264"), samtools.sort(extract_file_prefix + "OEAUNMAP.1.bam", extract_file_prefix + "OEAUNMAP.1.sName", True) ]) ], name="extract_bam_OEAUNMAP1_" + sample.name) jobs.append(job) job = concat_jobs([ jobMkdir, concat_jobs([ samtools.view(sclip_file_prefix + "sc.bam", extract_file_prefix + "OEAUNMAP.2.bam", "-b -h -f 132 -F 264"), samtools.sort(extract_file_prefix + "OEAUNMAP.2.bam", extract_file_prefix + "OEAUNMAP.2.sName", True) ]) ], name="extract_bam_OEAUNMAP2_" + sample.name) jobs.append(job) job = concat_jobs([ jobMkdir, concat_jobs([ samtools.view(sclip_file_prefix + "sc.bam", extract_file_prefix + "OEAMAP.bam", "-b -h -f 8 -F 1284"), samtools.sort(extract_file_prefix + "OEAMAP.bam", extract_file_prefix + "OEAMAP.sName", True) ]) ], name="extract_bam_OEAMAP_" + sample.name) jobs.append(job) return jobs
def samtools_bam_sort(self): """ Sorts bam by readname prior to picard_sam_to_fastq step in order to minimize memory consumption. If bam file is small and the memory requirements are reasonable, this step can be skipped. """ jobs = [] for readset in self.readsets: # If readset FASTQ files are available, skip this step if not readset.fastq1: if readset.bam: sortedBamPrefix = re.sub("\.bam$", ".sorted", readset.bam.strip()) job = samtools.sort(readset.bam, sortedBamPrefix, sort_by_name=True) job.name = "samtools_bam_sort." + readset.name job.removable_files = [sortedBamPrefix + ".bam"] job.samples = [readset.sample] jobs.append(job) else: raise Exception( "Error: BAM file not available for readset \"" + readset.name + "\"!") return jobs
def create_hic_file(self): """ A .hic file is created per sample in order to visualize in JuiceBox, WashU epigenome browser or as input for other tools. For more detailed information about the JuiceBox visit: [JuiceBox] (http://www.aidenlab.org/software.html) """ jobs = [] for sample in self.samples: sample_input = os.path.join(self.output_dirs['bams_output_directory'], sample.name, sample.name + ".merged.bam") sortedBamPrefix = re.sub("\.merged.bam", ".merged.sorted", sample_input.strip()) sortedBam = sortedBamPrefix + ".bam" hic_output = os.path.join(self.output_dirs['hicfiles_output_directory'], sample.name + ".hic") job = concat_jobs([ Job(command="mkdir -p " + self.output_dirs['hicfiles_output_directory']), samtools.sort(sample_input, sortedBamPrefix, sort_by_name=True), hic.create_input(sortedBam, sample.name), hic.create_hic(sample.name + ".juicebox.input.sorted", hic_output, self.genome) ]) job.name = "create_hic_file." + sample.name job.samples = [sample] jobs.append(job) return jobs
def create_hic_file(self): """ A .hic file is created per sample in order to visualize in JuiceBox, WashU epigenome browser or as input for other tools. For more detailed information about the JuiceBox visit: [JuiceBox] (http://www.aidenlab.org/software.html) """ jobs = [] for sample in self.samples: sample_input = os.path.join( self.output_dirs['bams_output_directory'], sample.name, sample.name + ".merged.bam") sortedBamPrefix = re.sub("\.merged.bam", ".merged.sorted", sample_input.strip()) sortedBam = sortedBamPrefix + ".bam" hic_output = os.path.join( self.output_dirs['hicfiles_output_directory'], sample.name + ".hic") command_sort = samtools.sort(sample_input, sortedBamPrefix, sort_by_name=True) command_input = Job( input_files=[sortedBam], output_files=[ sample.name + ".juicebox.input", sample.name + ".juicebox.input.sorted" ], module_entries=[["create_hic_file", "module_mugqic_tools"]], name="create_hic_file." + sample.name, command="bash {CreateHicFileInput} {sortedBam} {name} {tmpDir}" .format(CreateHicFileInput='CreateHicFileInput.sh', sortedBam=sortedBam, name=sample.name, tmpDir=os.path.expandvars("$(pwd)")), removable_files=[ sample.name + ".juicebox.input", sample.name + ".juicebox.input.sorted", sortedBam ]) command_juicebox = Job( input_files=[sample.name + ".juicebox.input.sorted"], output_files=[hic_output], module_entries=[["create_hic_file", "module_java"]], name="create_hic_file." + sample.name, command= "mkdir -p {hic_output} && java -jar {juicer} pre -q {q} {name} {output} {assembly}" .format( hic_output=self.output_dirs['hicfiles_output_directory'], juicer=os.path.expandvars( config.param('create_hic_file', 'JuicerPath')), q=config.param('create_hic_file', 'q'), name=sample.name + ".juicebox.input.sorted", output=hic_output, assembly=self.genome)) job = concat_jobs([command_sort, command_input, command_juicebox]) job.name = "create_hic_file." + sample.name jobs.append(job) return jobs