Пример #1
0
def run(items):
    """Perform detection of structural variations with delly.

    Performs post-call filtering with a custom filter tuned based
    on NA12878 Moleculo and PacBio data, using calls prepared by
    @ryanlayer and @cc2qe

    Filters using the high quality variant pairs (DV) compared with
    high quality reference pairs (DR).
    """
    work_dir = utils.safe_makedir(
        os.path.join(items[0]["dirs"]["work"], "structural",
                     items[0]["name"][-1], "delly"))
    # Add core request for delly
    config = copy.deepcopy(items[0]["config"])
    delly_config = utils.get_in(config, ("resources", "delly"), {})
    delly_config["cores"] = 1
    config["resources"]["delly"] = delly_config
    parallel = {
        "type": "local",
        "cores": config["algorithm"].get("num_cores", 1),
        "progs": ["delly"]
    }
    work_bams = run_multicore(_prep_subsampled_bams,
                              [(data, work_dir) for data in items], config,
                              parallel)
    ref_file = utils.get_in(items[0], ("reference", "fasta", "base"))
    sv_types = [
        "DEL", "DUP"
    ]  # "TRA" has invalid VCF END specifications that GATK doesn't like, "INV" very slow
    exclude_file = _get_full_exclude_file(items, work_dir)
    bytype_vcfs = run_multicore(
        _run_delly,
        [(work_bams, chrom, sv_type, ref_file, work_dir, items)
         for (chrom, sv_type) in itertools.product(
             sshared.get_sv_chroms(items, exclude_file), sv_types)], config,
        parallel)
    out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs)
    combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file,
                                               config)
    out = []
    for data in items:
        if "sv" not in data:
            data["sv"] = []
        base, ext = utils.splitext_plus(combo_vcf)
        sample = tz.get_in(["rgnames", "sample"], data)
        delly_sample_vcf = vcfutils.select_sample(
            combo_vcf, sample, "%s-%s%s" % (base, sample, ext), data["config"])
        delly_vcf = _delly_count_evidence_filter(delly_sample_vcf, data)
        data["sv"].append({
            "variantcaller": "delly",
            "vrn_file": delly_vcf,
            "exclude": exclude_file
        })
        out.append(data)
    return out
Пример #2
0
def run(items):
    """Perform detection of structural variations with delly.

    Performs post-call filtering with a custom filter tuned based
    on NA12878 Moleculo and PacBio data, using calls prepared by
    @ryanlayer and @cc2qe

    Filters using the high quality variant pairs (DV) compared with
    high quality reference pairs (DR).
    """
    work_dir = utils.safe_makedir(
        os.path.join(items[0]["dirs"]["work"], "structural",
                     dd.get_sample_name(items[0]), "delly"))
    # Add core request for delly
    config = copy.deepcopy(items[0]["config"])
    delly_config = utils.get_in(config, ("resources", "delly"), {})
    delly_config["cores"] = 1
    config["resources"]["delly"] = delly_config
    parallel = {
        "type": "local",
        "cores": config["algorithm"].get("num_cores", 1),
        "progs": ["delly"]
    }
    work_bams = [dd.get_align_bam(d) for d in items]
    ref_file = dd.get_ref_file(items[0])
    exclude_file = _get_full_exclude_file(items, work_bams, work_dir)
    bytype_vcfs = run_multicore(
        _run_delly, [(work_bams, chrom, ref_file, work_dir, items)
                     for chrom in sshared.get_sv_chroms(items, exclude_file)],
        config, parallel)
    out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs)
    combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file,
                                               config)
    out = []
    upload_counts = collections.defaultdict(int)
    for data in items:
        if "sv" not in data:
            data["sv"] = []
        base, ext = utils.splitext_plus(combo_vcf)
        final_vcf = sshared.finalize_sv(combo_vcf, data, items)
        if final_vcf:
            delly_vcf = _delly_count_evidence_filter(final_vcf, data)
            data["sv"].append({
                "variantcaller": "delly",
                "vrn_file": delly_vcf,
                "do_upload": upload_counts[final_vcf] ==
                0,  # only upload a single file per batch
                "exclude": exclude_file
            })
            upload_counts[final_vcf] += 1
        out.append(data)
    return out
Пример #3
0
def _combine_out_files(chr_files, work_dir, data):
    """Concatenate all CNV calls into a single file.
    """
    out_file = "%s.bed" % sshared.outname_from_inputs(chr_files)
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                for chr_file in chr_files:
                    with open(chr_file) as in_handle:
                        is_empty = in_handle.readline().startswith("track name=empty")
                    if not is_empty:
                        with open(chr_file) as in_handle:
                            shutil.copyfileobj(in_handle, out_handle)
    return out_file
Пример #4
0
def _combine_out_files(chr_files, work_dir, data):
    """Concatenate all CNV calls into a single file.
    """
    out_file = "%s.bed" % sshared.outname_from_inputs(chr_files)
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                for chr_file in chr_files:
                    with open(chr_file) as in_handle:
                        is_empty = in_handle.readline().startswith("track name=empty")
                    if not is_empty:
                        with open(chr_file) as in_handle:
                            shutil.copyfileobj(in_handle, out_handle)
    return out_file
Пример #5
0
def run(items):
    """Perform detection of structural variations with delly.

    Performs post-call filtering with a custom filter tuned based
    on NA12878 Moleculo and PacBio data, using calls prepared by
    @ryanlayer and @cc2qe

    Filters using the high quality variant pairs (DV) compared with
    high quality reference pairs (DR).
    """
    work_dir = utils.safe_makedir(os.path.join(items[0]["dirs"]["work"], "structural",
                                               items[0]["name"][-1], "delly"))
    # Add core request for delly
    config = copy.deepcopy(items[0]["config"])
    delly_config = utils.get_in(config, ("resources", "delly"), {})
    delly_config["cores"] = 1
    config["resources"]["delly"] = delly_config
    parallel = {"type": "local", "cores": config["algorithm"].get("num_cores", 1),
                "progs": ["delly"]}
    work_bams = run_multicore(_prep_subsampled_bams,
                              [(data, work_dir) for data in items],
                              config, parallel)
    ref_file = utils.get_in(items[0], ("reference", "fasta", "base"))
    sv_types = ["DEL", "DUP"]  # "TRA" has invalid VCF END specifications that GATK doesn't like, "INV" very slow
    exclude_file = _get_full_exclude_file(items, work_dir)
    bytype_vcfs = run_multicore(_run_delly,
                                [(work_bams, chrom, sv_type, ref_file, work_dir, items)
                                 for (chrom, sv_type)
                                 in itertools.product(sshared.get_sv_chroms(items, exclude_file), sv_types)],
                                config, parallel)
    out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs)
    combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file, config)
    out = []
    for data in items:
        if "sv" not in data:
            data["sv"] = []
        base, ext = utils.splitext_plus(combo_vcf)
        sample = tz.get_in(["rgnames", "sample"], data)
        delly_sample_vcf = vcfutils.select_sample(combo_vcf, sample,
                                                  "%s-%s%s" % (base, sample, ext), data["config"])
        delly_vcf = _delly_count_evidence_filter(delly_sample_vcf, data)
        data["sv"].append({"variantcaller": "delly", "vrn_file": delly_vcf,
                           "exclude": exclude_file})
        out.append(data)
    return out
Пример #6
0
def run(items):
    """Perform detection of structural variations with delly.

    Performs post-call filtering with a custom filter tuned based
    on NA12878 Moleculo and PacBio data, using calls prepared by
    @ryanlayer and @cc2qe

    Filters using the high quality variant pairs (DV) compared with
    high quality reference pairs (DR).
    """
    work_dir = utils.safe_makedir(os.path.join(items[0]["dirs"]["work"], "structural",
                                               dd.get_sample_name(items[0]), "delly"))
    # Add core request for delly
    config = copy.deepcopy(items[0]["config"])
    delly_config = utils.get_in(config, ("resources", "delly"), {})
    delly_config["cores"] = 1
    config["resources"]["delly"] = delly_config
    parallel = {"type": "local", "cores": config["algorithm"].get("num_cores", 1),
                "progs": ["delly"]}
    work_bams = [dd.get_align_bam(d) for d in items]
    ref_file = dd.get_ref_file(items[0])
    exclude_file = _get_full_exclude_file(items, work_bams, work_dir)
    bytype_vcfs = run_multicore(_run_delly,
                                [(work_bams, chrom, ref_file, work_dir, items)
                                 for chrom in sshared.get_sv_chroms(items, exclude_file)],
                                config, parallel)
    out_file = "%s.vcf.gz" % sshared.outname_from_inputs(bytype_vcfs)
    combo_vcf = vcfutils.combine_variant_files(bytype_vcfs, out_file, ref_file, config)
    out = []
    upload_counts = collections.defaultdict(int)
    for data in items:
        if "sv" not in data:
            data["sv"] = []
        base, ext = utils.splitext_plus(combo_vcf)
        final_vcf = sshared.finalize_sv(combo_vcf, data, items)
        if final_vcf:
            delly_vcf = _delly_count_evidence_filter(final_vcf, data)
            data["sv"].append({"variantcaller": "delly", "vrn_file": delly_vcf,
                               "do_upload": upload_counts[final_vcf] == 0,  # only upload a single file per batch
                               "exclude": exclude_file})
            upload_counts[final_vcf] += 1
        out.append(data)
    return out