def test_heterozygosity_observed(self): # diploid g = GenotypeArray( [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1] actual = allel.heterozygosity_observed(g, fill=-1) aeq(expect, actual) # polyploid g = GenotypeArray( [[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1] actual = allel.heterozygosity_observed(g, fill=-1) aeq(expect, actual)
def test_slice_types(self): g = GenotypeArray(diploid_genotype_data, dtype='i1') # row slice s = g[1:] assert isinstance(s, GenotypeArray) # col slice s = g[:, 1:] assert isinstance(s, GenotypeArray) # row index s = g[0] assert isinstance(s, GenotypeVector) assert not isinstance(s, GenotypeArray) # col index s = g[:, 0] assert isinstance(s, GenotypeVector) assert not isinstance(s, GenotypeArray) # ploidy index s = g[:, :, 0] assert isinstance(s, np.ndarray) assert not isinstance(s, GenotypeArray) # item s = g[0, 0, 0] assert isinstance(s, np.int8) assert not isinstance(s, GenotypeArray)
def test_pairwise_distance_multidim(self): g = GenotypeArray( [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') gac = g.to_allele_counts() def metric(ac1, ac2): mpd = allel.mean_pairwise_difference_between(ac1, ac2, fill=0) return mpd.sum() expect = [ allel.mean_pairwise_difference_between(gac[:, 0], gac[:, 1], fill=0).sum() ] actual = allel.pairwise_distance(gac, metric) aeq(expect, actual)
def test_haploidify_samples(self): # diploid g = GenotypeArray([[[0, 1], [2, 3]], [[4, 5], [6, 7]], [[8, 9], [10, 11]]], dtype='i1') h = g.haploidify_samples() assert 2 == h.ndim assert 3 == h.n_variants assert 2 == h.n_haplotypes assert np.int8 == h.dtype for i in range(g.n_variants): for j in range(g.n_samples): self.assertIn(h[i, j], set(g[i, j])) # triploid g = GenotypeArray([[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17]]], dtype='i1') h = g.haploidify_samples() assert 2 == h.ndim assert 3 == h.n_variants assert 2 == h.n_haplotypes assert np.int8 == h.dtype for i in range(g.n_variants): for j in range(g.n_samples): self.assertIn(h[i, j], set(g[i, j]))
def test_heterozygosity_expected(self): def refimpl(f, ploidy, fill=0): """Limited reference implementation for testing purposes.""" # check allele frequencies sum to 1 af_sum = np.sum(f, axis=1) # assume three alleles p = f[:, 0] q = f[:, 1] r = f[:, 2] out = 1 - p**ploidy - q**ploidy - r**ploidy with ignore_invalid(): out[(af_sum < 1) | np.isnan(af_sum)] = fill return out # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1] af = g.count_alleles().to_frequencies() expect2 = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_almost_equal(expect1, actual) assert_array_almost_equal(expect2, actual) expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0] actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=0) assert_array_almost_equal(expect3, actual) # polyploid g = GenotypeArray([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') af = g.count_alleles().to_frequencies() expect = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_almost_equal(expect, actual)
def build_genotype_array(genotypes, pop_samples, markers): g = list() for marker in markers: mgt = list() for population, sample_list in pop_samples.items(): for sample in sorted(sample_list): mgt.append(genotypes[sample][marker]) g.append(mgt) gt = GenotypeArray(g) print('GenotypeArray construction complete:', gt.n_variants, 'markers,', gt.n_samples, 'samples, and a ploidy of', gt.ploidy, file=sys.stderr) return gt
def test_inbreeding_coefficient(self): # diploid g = GenotypeArray( [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]) # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]) # expect = 1 - (ho/he) expect = [ -1, -1, 1 - 0, 1 - (.5 / .375), 1 - (.5 / .375), 1 - (.5 / .375), 1 - (1 / .5), 1 - (1 / .625), -1, 1 - (1 / .5), -1 ] actual = allel.inbreeding_coefficient(g, fill=-1) assert_array_almost_equal(expect, actual)
def test_constructor(self): # missing data arg with pytest.raises(TypeError): # noinspection PyArgumentList GenotypeArray() # data has wrong dtype data = 'foo bar' with pytest.raises(TypeError): GenotypeArray(data) # data has wrong dtype data = [4., 5., 3.7] with pytest.raises(TypeError): GenotypeArray(data) # data has wrong dimensions data = [1, 2, 3] with pytest.raises(TypeError): GenotypeArray(data) # data has wrong dimensions data = [[1, 2], [3, 4]] # use HaplotypeArray instead with pytest.raises(TypeError): GenotypeArray(data) # diploid data (typed) g = GenotypeArray(diploid_genotype_data, dtype='i1') aeq(diploid_genotype_data, g) assert np.int8 == g.dtype # polyploid data (typed) g = GenotypeArray(triploid_genotype_data, dtype='i1') aeq(triploid_genotype_data, g) assert np.int8 == g.dtype
def setup_instance(self, data, dtype=None): return GenotypeArray(data, dtype=dtype)