Exemplo n.º 1
0
    def test_empty_gt_allele_count(self):
        gts = np.array([])
        with self.assertRaises(EmptyVariationsError):
            count_alleles(gts, max_alleles=3)

        variations = _create_empty_dask_variations()
        gts = variations[GT_FIELD]
        task = count_alleles(gts, max_alleles=3)
        counts = task.compute()
        self.assertEqual(counts.shape, (0, 4))
Exemplo n.º 2
0
def keep_variable_variations(variations,
                             max_alleles,
                             filter_id='variable_variations'):
    gts = variations[GT_FIELD]
    some_not_missing_gts = va.any(gts != MISSING_INT, axis=2)
    selected_vars1 = va.any(some_not_missing_gts, axis=1)
    allele_counts = count_alleles(gts,
                                  max_alleles=max_alleles,
                                  count_missing=False)
    num_alleles_per_snp = va.sum(allele_counts > 0, axis=1)
    selected_vars2 = num_alleles_per_snp > 1

    selected_vars = va.logical_and(selected_vars1, selected_vars2)

    selected_variations = variations.get_vars(selected_vars)

    num_selected_vars = va.count_nonzero(selected_vars)
    num_filtered = va.count_nonzero(va.logical_not(selected_vars))

    flt_stats = {N_KEPT: num_selected_vars, N_FILTERED_OUT: num_filtered}

    return {
        FLT_VARS: selected_variations,
        FLT_ID: filter_id,
        FLT_STATS: flt_stats
    }
Exemplo n.º 3
0
 def test_allele_count(self):
     gts = np.array([[[0, 2], [-1, -1]], [[0, 2], [1, -1]], [[0, 0], [1,
                                                                      1]],
                     [[-1, -1], [-1, -1]]])
     counts = count_alleles(gts, max_alleles=3)
     expected = np.array([[1, 0, 1, 2], [1, 1, 1, 1], [2, 2, 0, 0],
                          [0, 0, 0, 4]])
     self.assertTrue(np.all(counts == expected))
Exemplo n.º 4
0
 def test_allele_count_dask(self):
     variations = _create_dask_variations()
     gts = variations[GT_FIELD]
     counts = count_alleles(gts, max_alleles=3)
     expected = [[2, 2, 0, 2], [2, 2, 0, 2], [2, 2, 0, 2], [
         3,
         1,
         0,
         2,
     ], [0, 0, 0, 6], [1, 1, 0, 4], [1, 3, 0, 2]]
     self.assertTrue(np.all(expected == counts.compute()))