def filter_by_maf_by_allele_count( variations, max_allowable_maf=None, min_allowable_maf=None, filter_id='filter_by_maf_by_allele_count', min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT, calc_histogram=False, n_bins=DEF_NUM_BINS, limits=None): mafs = calc_maf_by_allele_count(variations, min_num_genotypes=min_num_genotypes) # print(compute(mafs)) result = _select_vars(variations, mafs, min_allowable_maf, max_allowable_maf) if calc_histogram: if limits is None: limits = (0, 1) counts, bin_edges = va.histogram(mafs, n_bins=n_bins, limits=limits) result[FLT_STATS][COUNT] = counts result[FLT_STATS][BIN_EDGES] = bin_edges limits = [] if min_allowable_maf is not None: limits.append(min_allowable_maf) if max_allowable_maf is not None: limits.append(max_allowable_maf) result[FLT_STATS]['limits'] = limits return { FLT_VARS: result[FLT_VARS], FLT_ID: filter_id, FLT_STATS: result[FLT_STATS] }
def test_calc_maf_by_allele_count_in_memory(self): variations = Variations(samples=np.array(['aa', 'bb'])) variations[GT_FIELD] = np.array([[[-1, 1], [2, 1]], [[-1, -1], [-1, 2]], [[1, -1], [1, 1]]]) variations[RO_FIELD] = np.array([[-1, 8], [-1, -1], [6, 4]]) variations[AO_FIELD] = np.array([[[1, 4], [2, 1]], [[-1, -1], [3, 3]], [[1, 4], [5, 1]]]) result = calc_maf_by_allele_count(variations, min_num_genotypes=0) expected = [0.5, 0.5, 0.47619048] for a, b in zip(result, expected): self.assertAlmostEqual(a, b, places=2)
def test_calc_maf_by_allele_count(self): variations = Variations(samples=da.array(['aa', 'bb'])) variations[GT_FIELD] = da.from_array([[[-1, 1], [2, 1]], [[-1, -1], [-1, 2]], [[1, -1], [1, 1]]]) variations[RO_FIELD] = da.from_array( np.array([[-1, 8], [-1, -1], [6, 4]])) variations[AO_FIELD] = da.from_array( np.array([[[1, 4], [2, 1]], [[-1, -1], [3, 3]], [[1, 4], [5, 1]]])) # with this step we create a variation with dask arrays of unknown # shapes variations = remove_low_call_rate_vars(variations, 0)[FLT_VARS] future_result = calc_maf_by_allele_count(variations, min_num_genotypes=0) result = compute(future_result) expected = [0.5, 0.5, 0.47619048] for a, b in zip(result, expected): self.assertAlmostEqual(a, b, places=2)
def test_calc_maf_by_allele_count_empty_vars(self): variations = _create_empty_dask_variations() task = calc_maf_by_allele_count(variations) result = compute(task) self.assertEqual(result.shape, (0, ))