def keep_variable_variations(variations, max_alleles, filter_id='variable_variations'): gts = variations[GT_FIELD] some_not_missing_gts = va.any(gts != MISSING_INT, axis=2) selected_vars1 = va.any(some_not_missing_gts, axis=1) allele_counts = count_alleles(gts, max_alleles=max_alleles, count_missing=False) num_alleles_per_snp = va.sum(allele_counts > 0, axis=1) selected_vars2 = num_alleles_per_snp > 1 selected_vars = va.logical_and(selected_vars1, selected_vars2) selected_variations = variations.get_vars(selected_vars) num_selected_vars = va.count_nonzero(selected_vars) num_filtered = va.count_nonzero(va.logical_not(selected_vars)) flt_stats = {N_KEPT: num_selected_vars, N_FILTERED_OUT: num_filtered} return { FLT_VARS: selected_variations, FLT_ID: filter_id, FLT_STATS: flt_stats }
def _call_is_hom_in_memory(gts): is_hom = va.create_full_array_in_memory(gts.shape[:-1], True, dtype=numpy.bool) for idx in range(1, gts.shape[2]): is_hom = va.logical_and(gts[:, :, idx] == gts[:, :, idx - 1], is_hom) return is_hom
def gts_as_mat012(gts): '''It transforms the GT matrix into 0 (major allele h**o), 1 (het), 2(other hom)''' gts012 = va.sum(gts, axis=2) gts012[va.any(gts == MISSING_INT, axis=2)] = MISSING_INT gts012[gts012 >= 1 ] = 2 gts012[va.logical_and(gts012 == 2, va.any(gts == 0, axis=2))] = 1 return gts012
def _select_variations_in_region(variations, regions): chroms = variations[CHROM_FIELD] poss = variations[POS_FIELD] in_any_region = None for region in regions: desired_chrom = region[0] if isinstance(desired_chrom, (tuple, list)): raise ValueError('Malformed region: ' + str(region)) in_this_region = chroms[:] == desired_chrom if len(region) > 1: in_this_region = va.logical_and( in_this_region, va.logical_and(region[1] <= poss, poss < region[2])) if in_any_region is None: in_any_region = in_this_region else: in_any_region = va.logical_or(in_any_region, in_this_region) return in_any_region
def _kosman(vars1, vars2): indi1, indi2 = _get_gts_non_missing_in_both(vars1, vars2) if indi1.shape[1] != 2: raise ValueError('Only diploid are allowed') alleles_comparison1 = indi1 == indi2.transpose()[:, :, None] alleles_comparison2 = indi2 == indi1.transpose()[:, :, None] result = va.add(va.any(alleles_comparison2, axis=2).sum(axis=0), va.any(alleles_comparison1, axis=2).sum(axis=0), dtype=np.float64) result[result == 0] = 1 result[result == 4] = 0 mask = va.logical_and(result != 1, result != 0) result[mask] = 0.5 return result