示例#1
0
def run(items):
    """Run MetaSV if we have enough supported callers, adding output to the set of calls.
    """
    assert len(items) == 1, "Expect one input to MetaSV ensemble calling"
    data = items[0]
    work_dir = _sv_workdir(data)
    out_file = os.path.join(work_dir, "variants.vcf.gz")
    cmd = _get_cmd() + ["--sample", dd.get_sample_name(data), "--reference", dd.get_ref_file(data),
                        "--bam", dd.get_align_bam(data), "--outdir", work_dir]
    methods = []
    for call in data.get("sv", []):
        if call["variantcaller"] in SUPPORTED and call["variantcaller"] not in methods:
            methods.append(call["variantcaller"])
            cmd += ["--%s_vcf" % call["variantcaller"], call.get("vcf_file", call["vrn_file"])]
    if len(methods) >= MIN_CALLERS:
        if not utils.file_exists(out_file):
            tx_work_dir = utils.safe_makedir(os.path.join(work_dir, "raw"))
            ins_stats = shared.calc_paired_insert_stats_save(dd.get_align_bam(data),
                                                             os.path.join(tx_work_dir, "insert-stats.yaml"))
            cmd += ["--workdir", tx_work_dir, "--num_threads", str(dd.get_num_cores(data))]
            cmd += ["--spades", utils.which("spades.py"), "--age", utils.which("age_align")]
            cmd += ["--assembly_max_tools=1", "--assembly_pad=500"]
            cmd += ["--boost_sc", "--isize_mean", ins_stats["mean"], "--isize_sd", ins_stats["std"]]
            do.run(cmd, "Combine variant calls with MetaSV")
        filters = ("(NUM_SVTOOLS = 1 && ABS(SVLEN)>50000) || "
                   "(NUM_SVTOOLS = 1 && ABS(SVLEN)<4000 && BA_FLANK_PERCENT>80) || "
                   "(NUM_SVTOOLS = 1 && ABS(SVLEN)<4000 && BA_NUM_GOOD_REC=0) || "
                   "(ABS(SVLEN)<4000 && BA_NUM_GOOD_REC>2)")
        filter_file = vfilter.cutoff_w_expression(out_file, filters,
                                                  data, name="ReassemblyStats", limit_regions=None)
        effects_vcf, _ = effects.add_to_vcf(filter_file, data, "snpeff")
        data["sv"].append({"variantcaller": "metasv",
                           "vrn_file": effects_vcf or filter_file})
    return [data]
示例#2
0
def _filter_by_support(in_file, data):
    """Filter call file based on supporting evidence, adding FILTER annotations to VCF.

    Filters based on the following criteria:
      - Minimum read support for the call (SU = total support)
      - Large calls need split read evidence.
    """
    rc_filter = ("FORMAT/SU < 4 || "
                 "(FORMAT/SR == 0 && FORMAT/SU < 15 && ABS(SVLEN)>50000) || "
                 "(FORMAT/SR == 0 && FORMAT/SU < 5 && ABS(SVLEN)<2000) || "
                 "(FORMAT/SR == 0 && FORMAT/SU < 15 && ABS(SVLEN)<300)")
    return vfilter.cutoff_w_expression(in_file, rc_filter, data, name="ReadCountSupport",
                                       limit_regions=None)
示例#3
0
def _filter_by_support(in_file, data):
    """Filter call file based on supporting evidence, adding FILTER annotations to VCF.

    Filters based on the following criteria:
      - Minimum read support for the call (SU = total support)
      - Large calls need split read evidence.
    """
    rc_filter = ("FORMAT/SU < 4 || "
                 "(FORMAT/SR == 0 && FORMAT/SU < 15 && ABS(SVLEN)>50000) || "
                 "(FORMAT/SR == 0 && FORMAT/SU < 5 && ABS(SVLEN)<2000) || "
                 "(FORMAT/SR == 0 && FORMAT/SU < 15 && ABS(SVLEN)<300)")
    return vfilter.cutoff_w_expression(in_file, rc_filter, data, name="ReadCountSupport",
                                       limit_regions=None)