def combine_bed_by_size(input_beds, sample, work_dir, data, delim=","): """Combine a set of BED files, breaking into individual size chunks. """ out_file = os.path.join(work_dir, "%s-ensemble.bed" % sample) if len(input_beds) > 0: size_beds = [] for e_start, e_end in validate.EVENT_SIZES: base, ext = os.path.splitext(out_file) size_out_file = "%s-%s_%s%s" % (base, e_start, e_end, ext) if not utils.file_exists(size_out_file): with file_transaction(data, size_out_file) as tx_out_file: with shared.bedtools_tmpdir(data): all_file = "%s-all.bed" % utils.splitext_plus(tx_out_file)[0] has_regions = False with open(all_file, "w") as out_handle: for line in fileinput.input(input_beds): chrom, start, end, event_str = line.split()[:4] event = event_str.split("_", 1)[0] size = int(end) - int(start) if size >= e_start and size < e_end or event == "BND": out_handle.write(line) has_regions = True if has_regions: pybedtools.BedTool(all_file).sort(stream=True)\ .merge(c=4, o="distinct", delim=delim).saveas(tx_out_file) if utils.file_exists(size_out_file): ann_size_out_file = annotate.add_genes(size_out_file, data) size_beds.append(ann_size_out_file) if len(size_beds) > 0: out_file = bedutils.combine(size_beds, out_file, data) return out_file
def summarize(calls, data): """Summarize results from multiple callers into a single flattened BED file. """ import pybedtools sample = tz.get_in(["rgnames", "sample"], data) work_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "structural", sample, "ensemble")) out_file = os.path.join(work_dir, "%s-ensemble.bed" % sample) with shared.bedtools_tmpdir(data): input_beds = filter(lambda x: x is not None, [_create_bed(c, out_file, data) for c in calls]) if len(input_beds) > 0: size_beds = [] for e_start, e_end in validate.EVENT_SIZES: base, ext = os.path.splitext(out_file) size_out_file = "%s-%s_%s%s" % (base, e_start, e_end, ext) if not utils.file_exists(size_out_file): with file_transaction(data, size_out_file) as tx_out_file: with shared.bedtools_tmpdir(data): all_file = "%s-all.bed" % utils.splitext_plus(tx_out_file)[0] with open(all_file, "w") as out_handle: for line in fileinput.input(input_beds): chrom, start, end = line.split()[:3] size = int(end) - int(start) if size >= e_start and size < e_end: out_handle.write(line) pybedtools.BedTool(all_file).sort(stream=True)\ .merge(c=4, o="distinct", delim=",").saveas(tx_out_file) size_beds.append(size_out_file) out_file = bedutils.combine(size_beds, out_file, data["config"]) if utils.file_exists(out_file): bedprep_dir = utils.safe_makedir(os.path.join(os.path.dirname(out_file), "bedprep")) calls.append({"variantcaller": "ensemble", "vrn_file": bedutils.clean_file(out_file, data, bedprep_dir=bedprep_dir)}) return calls
def combine_bed(*args): return bedutils.combine(*args)