示例#1
0
def combine_bed_by_size(input_beds, sample, work_dir, data, delim=","):
    """Combine a set of BED files, breaking into individual size chunks.
    """
    out_file = os.path.join(work_dir, "%s-ensemble.bed" % sample)
    if len(input_beds) > 0:
        size_beds = []
        for e_start, e_end in validate.EVENT_SIZES:
            base, ext = os.path.splitext(out_file)
            size_out_file = "%s-%s_%s%s" % (base, e_start, e_end, ext)
            if not utils.file_exists(size_out_file):
                with file_transaction(data, size_out_file) as tx_out_file:
                    with shared.bedtools_tmpdir(data):
                        all_file = "%s-all.bed" % utils.splitext_plus(tx_out_file)[0]
                        has_regions = False
                        with open(all_file, "w") as out_handle:
                            for line in fileinput.input(input_beds):
                                chrom, start, end, event_str = line.split()[:4]
                                event = event_str.split("_", 1)[0]
                                size = int(end) - int(start)
                                if size >= e_start and size < e_end or event == "BND":
                                    out_handle.write(line)
                                    has_regions = True
                        if has_regions:
                            pybedtools.BedTool(all_file).sort(stream=True)\
                              .merge(c=4, o="distinct", delim=delim).saveas(tx_out_file)
            if utils.file_exists(size_out_file):
                ann_size_out_file = annotate.add_genes(size_out_file, data)
                size_beds.append(ann_size_out_file)
        if len(size_beds) > 0:
            out_file = bedutils.combine(size_beds, out_file, data)
    return out_file
示例#2
0
def combine_bed_by_size(input_beds, sample, work_dir, data, delim=","):
    """Combine a set of BED files, breaking into individual size chunks.
    """
    out_file = os.path.join(work_dir, "%s-ensemble.bed" % sample)
    if len(input_beds) > 0:
        size_beds = []
        for e_start, e_end in validate.EVENT_SIZES:
            base, ext = os.path.splitext(out_file)
            size_out_file = "%s-%s_%s%s" % (base, e_start, e_end, ext)
            if not utils.file_exists(size_out_file):
                with file_transaction(data, size_out_file) as tx_out_file:
                    with shared.bedtools_tmpdir(data):
                        all_file = "%s-all.bed" % utils.splitext_plus(tx_out_file)[0]
                        has_regions = False
                        with open(all_file, "w") as out_handle:
                            for line in fileinput.input(input_beds):
                                chrom, start, end, event_str = line.split()[:4]
                                event = event_str.split("_", 1)[0]
                                size = int(end) - int(start)
                                if size >= e_start and size < e_end or event == "BND":
                                    out_handle.write(line)
                                    has_regions = True
                        if has_regions:
                            pybedtools.BedTool(all_file).sort(stream=True)\
                              .merge(c=4, o="distinct", delim=delim).saveas(tx_out_file)
            if utils.file_exists(size_out_file):
                ann_size_out_file = annotate.add_genes(size_out_file, data)
                size_beds.append(ann_size_out_file)
        if len(size_beds) > 0:
            out_file = bedutils.combine(size_beds, out_file, data)
    return out_file
示例#3
0
def summarize(calls, data):
    """Summarize results from multiple callers into a single flattened BED file.
    """
    import pybedtools
    sample = tz.get_in(["rgnames", "sample"], data)
    work_dir = utils.safe_makedir(os.path.join(data["dirs"]["work"], "structural",
                                               sample, "ensemble"))
    out_file = os.path.join(work_dir, "%s-ensemble.bed" % sample)
    with shared.bedtools_tmpdir(data):
        input_beds = filter(lambda x: x is not None,
                            [_create_bed(c, out_file, data) for c in calls])
    if len(input_beds) > 0:
        size_beds = []
        for e_start, e_end in validate.EVENT_SIZES:
            base, ext = os.path.splitext(out_file)
            size_out_file = "%s-%s_%s%s" % (base, e_start, e_end, ext)
            if not utils.file_exists(size_out_file):
                with file_transaction(data, size_out_file) as tx_out_file:
                    with shared.bedtools_tmpdir(data):
                        all_file = "%s-all.bed" % utils.splitext_plus(tx_out_file)[0]
                        with open(all_file, "w") as out_handle:
                            for line in fileinput.input(input_beds):
                                chrom, start, end = line.split()[:3]
                                size = int(end) - int(start)
                                if size >= e_start and size < e_end:
                                    out_handle.write(line)
                        pybedtools.BedTool(all_file).sort(stream=True)\
                          .merge(c=4, o="distinct", delim=",").saveas(tx_out_file)
            size_beds.append(size_out_file)
        out_file = bedutils.combine(size_beds, out_file, data["config"])
    if utils.file_exists(out_file):
        bedprep_dir = utils.safe_makedir(os.path.join(os.path.dirname(out_file), "bedprep"))
        calls.append({"variantcaller": "ensemble",
                      "vrn_file": bedutils.clean_file(out_file, data, bedprep_dir=bedprep_dir)})
    return calls
示例#4
0
def combine_bed(*args):
    return bedutils.combine(*args)
示例#5
0
def combine_bed(*args):
    return bedutils.combine(*args)