def coverage(data): bed_file = dd.get_coverage_experimental(data) if not bed_file: return data work_dir = os.path.join(dd.get_work_dir(data), "report", "coverage") with chdir(work_dir): in_bam = data['work_bam'] sample = os.path.splitext(os.path.basename(in_bam))[0] logger.debug("doing coverage for %s" % sample) region_bed = pybedtools.BedTool(bed_file) parse_file = os.path.join(sample + "_coverage.bed") parse_total_file = os.path.join(sample + "_cov_total.tsv") if not file_exists(parse_file): total_cov = cov_class(0, None, sample) bam_api = pysam.AlignmentFile(in_bam) with file_transaction(parse_file) as out_tx: with open(out_tx, 'w') as out_handle: print >>out_handle, "#chrom\tstart\tend\tregion\treads\tstrand\tsize\tsample\tmean\tsdt\tq10\tq20\tq4\tq50" with tmpfile() as tx_tmp_file: # tx_tmp_file = "tmpintersect" for line in region_bed: chrom = line.chrom start = max(line.start, 0) end = line.end region_file = pybedtools.BedTool(str(line), from_string=True).saveas().fn coords = "%s:%s-%s" % (chrom, start, end) cmd = ("samtools view -b {in_bam} {coords} | " "bedtools coverage -a {region_file} -b - -hist > {tx_tmp_file}") _silence_run(cmd.format(**locals())) total_cov = _get_exome_coverage_stats(os.path.abspath(tx_tmp_file), sample, out_tx, total_cov) total_cov.write_coverage(parse_total_file) data['coverage'] = os.path.abspath(parse_file) return data
def coverage(data): AVERAGE_REGION_STRING_LENGTH = 100 bed_file = dd.get_coverage_experimental(data) if not bed_file: return data work_dir = os.path.join(dd.get_work_dir(data), "report", "coverage") batch_size = max_command_length() / AVERAGE_REGION_STRING_LENGTH with chdir(work_dir): in_bam = data['work_bam'] sample = dd.get_sample_name(data) logger.debug("doing coverage for %s" % sample) region_bed = pybedtools.BedTool(bed_file) parse_file = os.path.join(sample + "_coverage.bed") parse_total_file = os.path.join(sample + "_cov_total.tsv") if not file_exists(parse_file): total_cov = cov_class(0, None, sample) with file_transaction(parse_file) as out_tx: with open(out_tx, 'w') as out_handle: HEADER = ["#chrom", "start", "end", "region", "reads", "strand", "size", "sample", "mean", "sd", "cutoff10", "cutoff20", "cutoff4", "cutoff50"] out_handle.write("\t".join(HEADER) + "\n") with tmpfile() as tx_tmp_file: lcount = 0 for chunk in robust_partition_all(batch_size, region_bed): coord_batch = [] line_batch = "" for line in chunk: lcount += 1 chrom = line.chrom start = max(line.start, 0) end = line.end coords = "%s:%s-%s" % (chrom, start, end) coord_batch.append(coords) line_batch += str(line) if not coord_batch: continue region_file = pybedtools.BedTool(line_batch, from_string=True).saveas().fn coord_string = " ".join(coord_batch) cmd = ("samtools view -b {in_bam} {coord_string} | " "bedtools coverage -a {region_file} -b - " "-hist > {tx_tmp_file}") _silence_run(cmd.format(**locals())) total_cov = _get_exome_coverage_stats(os.path.abspath(tx_tmp_file), sample, out_tx, total_cov) logger.debug("Processed %d regions." % lcount) total_cov.write_coverage(parse_total_file) data['coverage'] = os.path.abspath(parse_file) return data
def coverage(data): bed_file = dd.get_coverage_experimental(data) if not bed_file: return data work_dir = os.path.join(dd.get_work_dir(data), "report", "coverage") with chdir(work_dir): in_bam = data['work_bam'] sample = os.path.splitext(os.path.basename(in_bam))[0] logger.debug("doing coverage for %s" % sample) region_bed = pybedtools.BedTool(bed_file) parse_file = os.path.join(sample + "_coverage.bed") parse_total_file = os.path.join(sample + "_cov_total.tsv") if not file_exists(parse_file): total_cov = cov_class(0, None, sample) bam_api = pysam.AlignmentFile(in_bam) with file_transaction(parse_file) as out_tx: with open(out_tx, 'w') as out_handle: print >> out_handle, "#chrom\tstart\tend\tregion\treads\tstrand\tsize\tsample\tmean\tsdt\tq10\tq20\tq4\tq50" with tmpfile() as tx_tmp_file: # tx_tmp_file = "tmpintersect" for line in region_bed: chrom = line.chrom start = max(line.start, 0) end = line.end region_file = pybedtools.BedTool( str(line), from_string=True).saveas().fn coords = "%s:%s-%s" % (chrom, start, end) cmd = ( "samtools view -b {in_bam} {coords} | " "bedtools coverage -a {region_file} -b - -hist > {tx_tmp_file}" ) _silence_run(cmd.format(**locals())) total_cov = _get_exome_coverage_stats( os.path.abspath(tx_tmp_file), sample, out_tx, total_cov) total_cov.write_coverage(parse_total_file) data['coverage'] = os.path.abspath(parse_file) return data