示例#1
0
文件: genotype.py 项目: zeneofa/bcbio
def variantcall_sample(data, region=None, align_bams=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    if out_file is None or not os.path.exists(out_file) or not os.path.lexists(
            out_file):
        utils.safe_makedir(os.path.dirname(out_file))
        sam_ref = data["sam_ref"]
        config = data["config"]
        caller_fns = get_variantcallers()
        caller_fn = caller_fns[config["algorithm"].get("variantcaller",
                                                       "gatk")]
        if len(align_bams) == 1:
            items = [data]
        else:
            items = multi.get_orig_items(data)
            assert len(items) == len(align_bams)
        call_file = "%s-raw%s" % utils.splitext_plus(out_file)
        call_file = caller_fn(align_bams, items, sam_ref,
                              data["genome_resources"]["variation"], region,
                              call_file)
        if data["config"]["algorithm"].get("phasing", False) == "gatk":
            call_file = phasing.read_backed_phasing(call_file, align_bams,
                                                    sam_ref, region, config)
        utils.symlink_plus(call_file, out_file)
    if region:
        data["region"] = region
    data["vrn_file"] = out_file
    return [data]
示例#2
0
def variantcall_sample(data, region=None, align_bams=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    if out_file is None or not os.path.exists(out_file) or not os.path.lexists(
            out_file):
        utils.safe_makedir(os.path.dirname(out_file))
        ref_file = dd.get_ref_file(data)
        config = data["config"]
        caller_fns = get_variantcallers()
        caller_fn = caller_fns[config["algorithm"].get("variantcaller")]
        if len(align_bams) == 1:
            items = [data]
        else:
            items = multi.get_orig_items(data)
            assert len(items) == len(align_bams)
        assoc_files = tz.get_in(("genome_resources", "variation"), data, {})
        if not assoc_files: assoc_files = {}
        for bam_file in align_bams:
            bam.index(bam_file, data["config"], check_timestamp=False)
        out_file = caller_fn(align_bams, items, ref_file, assoc_files, region,
                             out_file)
    if region:
        data["region"] = region
    data["vrn_file"] = out_file
    return [data]
示例#3
0
def variantcall_sample(data, region=None, align_bams=None, out_file=None):
    """Parallel entry point for doing genotyping of a region of a sample.
    """
    if out_file is None or not os.path.exists(out_file) or not os.path.lexists(out_file):
        utils.safe_makedir(os.path.dirname(out_file))
        sam_ref = data["sam_ref"]
        config = data["config"]
        caller_fns = get_variantcallers()
        caller_fn = caller_fns[config["algorithm"].get("variantcaller", "gatk")]
        if len(align_bams) == 1:
            items = [data]
        else:
            items = multi.get_orig_items(data)
            assert len(items) == len(align_bams)
        assoc_files = tz.get_in(("genome_resources", "variation"), data, {})
        if not assoc_files: assoc_files = {}
        for bam_file in align_bams:
            bam.index(bam_file, data["config"], check_timestamp=False)
        do_phasing = data["config"]["algorithm"].get("phasing", False)
        call_file = "%s-raw%s" % utils.splitext_plus(out_file) if do_phasing else out_file
        call_file = caller_fn(align_bams, items, sam_ref, assoc_files, region, call_file)
        if do_phasing == "gatk":
            call_file = phasing.read_backed_phasing(call_file, align_bams, sam_ref, region, config)
            utils.symlink_plus(call_file, out_file)
    if region:
        data["region"] = region
    data["vrn_file"] = out_file
    return [data]
示例#4
0
def _get_orig_items(data):
    """Retrieve original items in a batch, handling CWL and standard cases.
    """
    if isinstance(data, dict):
        if dd.get_align_bam(data) and tz.get_in(["metadata", "batch"], data):
            return vmulti.get_orig_items(data)
        else:
            return [data]
    else:
        return data
示例#5
0
def _get_orig_items(data):
    """Retrieve original items in a batch, handling CWL and standard cases.
    """
    if isinstance(data, dict):
        if tz.get_in(["metadata", "batch"], data):
            return vmulti.get_orig_items(data)
        else:
            return [data]
    else:
        return data
示例#6
0
def _get_validate(data):
    """Retrieve items to validate, from single samples or from combined joint calls.
    """
    if data.get("vrn_file") and "validate" in data["config"]["algorithm"]:
        return data
    elif "group_orig" in data:
        for sub in multi.get_orig_items(data):
            if "validate" in sub["config"]["algorithm"]:
                return sub
    return None
示例#7
0
def _get_validate(data):
    """Retrieve items to validate, from single samples or from combined joint calls.
    """
    if data.get("vrn_file") and "validate" in data["config"]["algorithm"]:
        return data
    elif "group_orig" in data:
        for sub in multi.get_orig_items(data):
            if "validate" in sub["config"]["algorithm"]:
                sub_val = utils.deepish_copy(sub)
                sub_val["vrn_file"] = data["vrn_file"]
                return sub_val
    return None
示例#8
0
def _get_validate(data):
    """Retrieve items to validate, from single samples or from combined joint calls.
    """
    if data.get("vrn_file") and tz.get_in(["config", "algorithm", "validate"], data):
        return data
    elif "group_orig" in data:
        for sub in multi.get_orig_items(data):
            if "validate" in sub["config"]["algorithm"]:
                sub_val = utils.deepish_copy(sub)
                sub_val["vrn_file"] = data["vrn_file"]
                return sub_val
    return None
示例#9
0
def extract(data):
    """Extract germline calls for the given sample, if tumor/normal or prioritized.
    """
    if vcfutils.get_paired_phenotype(data):
        is_paired = dd.get_batches(data) and len(vmulti.get_orig_items(data)) > 1
        if is_paired:
            germline_vcf = _extract_germline(data["vrn_file"], data)
        else:
            germline_vcf = _remove_prioritization(data["vrn_file"], data)
        germline_vcf = vcfutils.bgzip_and_index(germline_vcf, data["config"])
        data["vrn_file_plus"] = {"germline": germline_vcf}
    return data
示例#10
0
def extract(data):
    """Extract germline calls for the given sample, if tumor/normal or prioritized.
    """
    if vcfutils.get_paired_phenotype(data):
        is_paired = dd.get_batches(data) and len(
            vmulti.get_orig_items(data)) > 1
        if is_paired:
            germline_vcf = _extract_germline(data["vrn_file"], data)
        else:
            germline_vcf = _remove_prioritization(data["vrn_file"], data)
        germline_vcf = vcfutils.bgzip_and_index(germline_vcf, data["config"])
        data["vrn_file_plus"] = {"germline": germline_vcf}
    return data
示例#11
0
def _do_prioritize(data):
    """Determine if we should perform prioritization.

    Currently done on tumor-only input samples.
    """
    if vcfutils.get_paired_phenotype(data):
        has_tumor = False
        has_normal = False
        orig_items = vmulti.get_orig_items(data) if tz.get_in(["metadata", "batch"], data) else [data]
        for sub_data in orig_items:
            if vcfutils.get_paired_phenotype(sub_data) == "tumor":
                has_tumor = True
            elif vcfutils.get_paired_phenotype(sub_data) == "normal":
                has_normal = True
        return has_tumor and not has_normal
示例#12
0
def _do_prioritize(data):
    """Determine if we should perform prioritization.

    Currently done on tumor-only input samples.
    """
    if vcfutils.get_paired_phenotype(data):
        has_tumor = False
        has_normal = False
        orig_items = vmulti.get_orig_items(data) if tz.get_in(
            ["metadata", "batch"], data) else [data]
        for sub_data in orig_items:
            if vcfutils.get_paired_phenotype(sub_data) == "tumor":
                has_tumor = True
            elif vcfutils.get_paired_phenotype(sub_data) == "normal":
                has_normal = True
        return has_tumor and not has_normal