Пример #1
0
def _calculate_sv_coverage_gatk(data, work_dir):
    """Calculate coverage in defined regions using GATK tools

    TODO: This does double calculations to get GATK4 compatible HDF read counts
    and then depth and gene annotations. Both are needed for creating heterogeneity inputs.
    Ideally replace with a single mosdepth coverage calculation, and creat GATK4 TSV format:

    CONTIG  START   END     COUNT
    chrM    1       1000    13268
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate
    # GATK compatible
    target_file = gatkcnv.collect_read_counts(data, work_dir)
    # heterogeneity compatible
    target_in = bedutils.clean_file(tz.get_in(["regions", "bins", "target"], data), data, bedprep_dir=work_dir)
    target_cov = coverage.run_mosdepth(data, "target-gatk", target_in)
    target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
    return target_file, target_cov_genes
Пример #2
0
def _calculate_sv_coverage_gatk(data, work_dir):
    """Calculate coverage in defined regions using GATK tools

    TODO: This does double calculations to get GATK4 compatible HDF read counts
    and then depth and gene annotations. Both are needed for creating heterogeneity inputs.
    Ideally replace with a single mosdepth coverage calculation, and creat GATK4 TSV format:

    CONTIG  START   END     COUNT
    chrM    1       1000    13268
    """
    from bcbio.variation import coverage
    from bcbio.structural import annotate
    # GATK compatible
    target_file = gatkcnv.collect_read_counts(data, work_dir)
    # heterogeneity compatible
    target_in = bedutils.clean_file(tz.get_in(["regions", "bins", "target"], data), data, bedprep_dir=work_dir)
    target_cov = coverage.run_mosdepth(data, "target-gatk", target_in)
    target_cov_genes = annotate.add_genes(target_cov.regions, data, max_distance=0)
    return target_file, target_cov_genes