예제 #1
0
def keep_variable_variations(variations,
                             max_alleles,
                             filter_id='variable_variations'):
    gts = variations[GT_FIELD]
    some_not_missing_gts = va.any(gts != MISSING_INT, axis=2)
    selected_vars1 = va.any(some_not_missing_gts, axis=1)
    allele_counts = count_alleles(gts,
                                  max_alleles=max_alleles,
                                  count_missing=False)
    num_alleles_per_snp = va.sum(allele_counts > 0, axis=1)
    selected_vars2 = num_alleles_per_snp > 1

    selected_vars = va.logical_and(selected_vars1, selected_vars2)

    selected_variations = variations.get_vars(selected_vars)

    num_selected_vars = va.count_nonzero(selected_vars)
    num_filtered = va.count_nonzero(va.logical_not(selected_vars))

    flt_stats = {N_KEPT: num_selected_vars, N_FILTERED_OUT: num_filtered}

    return {
        FLT_VARS: selected_variations,
        FLT_ID: filter_id,
        FLT_STATS: flt_stats
    }
예제 #2
0
def _call_is_hom_in_memory(gts):
    is_hom = va.create_full_array_in_memory(gts.shape[:-1],
                                            True,
                                            dtype=numpy.bool)
    for idx in range(1, gts.shape[2]):
        is_hom = va.logical_and(gts[:, :, idx] == gts[:, :, idx - 1], is_hom)
    return is_hom
예제 #3
0
def gts_as_mat012(gts):
    '''It transforms the GT matrix into 0 (major allele h**o), 1 (het),
       2(other hom)'''
    gts012 = va.sum(gts, axis=2)
    gts012[va.any(gts == MISSING_INT, axis=2)] = MISSING_INT
    gts012[gts012 >= 1 ] = 2
    gts012[va.logical_and(gts012 == 2, va.any(gts == 0, axis=2))] = 1

    return gts012
예제 #4
0
def _select_variations_in_region(variations, regions):
    chroms = variations[CHROM_FIELD]
    poss = variations[POS_FIELD]

    in_any_region = None
    for region in regions:
        desired_chrom = region[0]
        if isinstance(desired_chrom, (tuple, list)):
            raise ValueError('Malformed region: ' + str(region))
        in_this_region = chroms[:] == desired_chrom
        if len(region) > 1:
            in_this_region = va.logical_and(
                in_this_region,
                va.logical_and(region[1] <= poss, poss < region[2]))
        if in_any_region is None:
            in_any_region = in_this_region
        else:
            in_any_region = va.logical_or(in_any_region, in_this_region)

    return in_any_region
예제 #5
0
def _kosman(vars1, vars2):
    indi1, indi2 = _get_gts_non_missing_in_both(vars1, vars2)

    if indi1.shape[1] != 2:
        raise ValueError('Only diploid are allowed')

    alleles_comparison1 = indi1 == indi2.transpose()[:, :, None]
    alleles_comparison2 = indi2 == indi1.transpose()[:, :, None]

    result = va.add(va.any(alleles_comparison2, axis=2).sum(axis=0),
                    va.any(alleles_comparison1, axis=2).sum(axis=0),
                    dtype=np.float64)

    result[result == 0] = 1
    result[result == 4] = 0

    mask = va.logical_and(result != 1, result != 0)
    result[mask] = 0.5

    return result