Пример #1
0
def get_distributions_by_type(records, variant_types, field, bins):
    num_bins = len(bins)
    counts = {}
    types_set = set(variant_types)
    for type in variant_types:
        counts[type] = [0] * (num_bins + 1)
    for record in records:
        type = vu.get_sv_type(record, types_set)
        val = vu.get_info_field(record, field)
        idx = get_distribution_index(val, bins, num_bins)
        counts[type][idx] += 1
    return counts
Пример #2
0
def create_trees_from_records(records, variant_types, contigs, padding=0):
    trees = {}
    variant_types_set = set(variant_types)
    for type in variant_types:
        trees[type] = {}
        for contig in contigs:
            trees[type][contig] = IntervalTree()
    for record in records:
        type = vu.get_sv_type(record, variant_types_set)
        contig = record.chrom
        length = vu.get_record_length(record)
        trees[type][contig].addi(record.start - padding,
                                 record.start + length + padding)
    return trees
Пример #3
0
def collect_evidence_fields(records, variant_types):
    evidence_counts = {}
    for variant_type in variant_types:
        evidence_counts[variant_type] = {}
        for evidence_type in EVIDENCE_TYPES:
            evidence_counts[variant_type][evidence_type] = 0
    variant_types_set = set(variant_types)
    evidence_types_set = set(EVIDENCE_TYPES)
    for record in records:
        variant_type = vu.get_sv_type(record, variant_types_set)
        evidence_types = vu.get_evidence_types(record, evidence_types_set)
        for evidence_type in evidence_types:
            evidence_counts[variant_type][evidence_type] += 1
    return evidence_counts
Пример #4
0
def get_allele_frequency_counts(records, header, variant_types):
    num_samples = float(len(header.samples))
    allele_freq = {}
    num_singletons = {}
    types_set = set(variant_types)
    # Don't calculate MCNV AF since non-ref alleles cannot be determined without chromosome ploidy
    af_types = types_set - set(["CNV"])
    for type in af_types:
        allele_freq[type] = []
        num_singletons[type] = 0
    for record in records:
        type = vu.get_sv_type(record, types_set)
        if type not in af_types:
            continue
        af = 0
        for sample in record.samples.values():
            for val in sample["GT"]:
                if val is not None and val > 0:
                    af += 1
        if af == 1:
            num_singletons[type] += 1
        allele_freq[type].append(af / num_samples)
    allele_freq_counts = {}
    num_bins = len(AF_BINS)
    for type in af_types:
        allele_freq_counts[type] = [0] * (num_bins + 1)
        for val in allele_freq[type]:
            idx = get_distribution_index(val, AF_BINS, num_bins)
            allele_freq_counts[type][idx] += 1
    return allele_freq_counts, num_singletons
Пример #5
0
def get_count_by_type(records, variant_types):
    counts = {}
    types_set = set(variant_types)
    for type in variant_types:
        counts[type] = 0
    for record in records:
        type = vu.get_sv_type(record, types_set)
        counts[type] += 1
    return counts