Exemplo n.º 1
0
def _get_maxcov_downsample(data):
    """Calculate maximum coverage downsampling for whole genome samples.

    Returns None if we're not doing downsampling.
    """
    from bcbio.bam import ref
    from bcbio.ngsalign import alignprep, bwa
    from bcbio.variation import coverage
    params = {"min_coverage_for_downsampling": 10,
              "maxcov_downsample_multiplier": dd.get_maxcov_downsample(data)}
    fastq_file = data["files"][0]
    num_reads = alignprep.total_reads_from_grabix(fastq_file)
    if num_reads and params["maxcov_downsample_multiplier"] and params["maxcov_downsample_multiplier"] > 0:
        vrs = dd.get_variant_regions_merged(data)
        total_size = sum([c.size for c in ref.file_contigs(dd.get_ref_file(data), data["config"])])
        if vrs:
            callable_size = pybedtools.BedTool(vrs).total_coverage()
            genome_cov_pct = callable_size / float(total_size)
        else:
            callable_size = total_size
            genome_cov_pct = 1.0
        if (genome_cov_pct > coverage.GENOME_COV_THRESH
              and dd.get_coverage_interval(data) in ["genome", None, False]):
            total_counts, total_sizes = 0, 0
            for count, size in bwa.fastq_size_output(fastq_file, 5000):
                total_counts += int(count)
                total_sizes += (int(size) * int(count))
            read_size = float(total_sizes) / float(total_counts)
            avg_cov = float(num_reads * read_size) / callable_size
            if avg_cov >= params["min_coverage_for_downsampling"]:
                return int(avg_cov * params["maxcov_downsample_multiplier"])
    return None
Exemplo n.º 2
0
def get_downsample_params(data):
    ds_mult = dd.get_maxcov_downsample(data)
    if ds_mult and ds_mult > 0:
        return {
            "min_coverage_for_downsampling": 10,
            "maxcov_downsample_multiplier": ds_mult
        }
Exemplo n.º 3
0
def get_downsample_params(data):
    ds_mult = dd.get_maxcov_downsample(data)
    if ds_mult and ds_mult > 0:
        return {"min_coverage_for_downsampling": 10,
                "maxcov_downsample_multiplier": ds_mult}