def test_mean_pairwise_diversity(self): # start with simplest case, two haplotypes, one pairwise comparison h = HaplotypeArray([[0, 0], [1, 1], [0, 1], [1, 2], [0, -1], [-1, -1]]) ac = h.count_alleles() expect = [0, 0, 1, 1, -1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) aeq(expect, actual) # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) assert_array_close(expect, actual)
def test_mean_pairwise_diversity(self): # start with simplest case, two haplotypes, one pairwise comparison h = HaplotypeArray([[0, 0], [1, 1], [0, 1], [1, 2], [0, -1], [-1, -1]]) ac = h.count_alleles() expect = [0, 0, 1, 1, -1, -1] actual = allel.stats.mean_pairwise_difference(ac, fill=-1) aeq(expect, actual) # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] actual = allel.stats.mean_pairwise_difference(ac, fill=-1) assert_array_close(expect, actual)
def test_rogers_huff_r(self): gn = [[0, 1, 2], [0, 1, 2]] expect = 1. actual = allel.stats.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2], [2, 1, 0]] expect = -1. actual = allel.stats.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 0, 0], [1, 1, 1]] actual = allel.stats.rogers_huff_r(gn) assert np.isnan(actual) gn = [[0, 1, 0, 1], [0, 1, 1, 0]] expect = 0 actual = allel.stats.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2, -1], [0, 1, 2, 2]] expect = 1. actual = allel.stats.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2, 2], [0, 1, 2, -1]] expect = 1. actual = allel.stats.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2], [0, 1, -1]] expect = 1. actual = allel.stats.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 2], [2, 0], [0, 1]] expect = [-1, 1, -1] actual = allel.stats.rogers_huff_r(gn) assert_array_close(expect, actual) gn = [[0, 2, 0], [0, 2, 0], [2, 0, 2], [0, 2, -1]] expect = [1, -1, 1, -1, 1, -1] actual = allel.stats.rogers_huff_r(gn) assert_array_close(expect, actual)
def test_rogers_huff_r(self): gn = [[0, 1, 2], [0, 1, 2]] expect = 1. actual = allel.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2], [2, 1, 0]] expect = -1. actual = allel.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 0, 0], [1, 1, 1]] actual = allel.rogers_huff_r(gn) assert np.isnan(actual) gn = [[0, 1, 0, 1], [0, 1, 1, 0]] expect = 0 actual = allel.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2, -1], [0, 1, 2, 2]] expect = 1. actual = allel.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2, 2], [0, 1, 2, -1]] expect = 1. actual = allel.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 1, 2], [0, 1, -1]] expect = 1. actual = allel.rogers_huff_r(gn) eq(expect, actual) gn = [[0, 2], [2, 0], [0, 1]] expect = [-1, 1, -1] actual = allel.rogers_huff_r(gn) assert_array_close(expect, actual) gn = [[0, 2, 0], [0, 2, 0], [2, 0, 2], [0, 2, -1]] expect = [1, -1, 1, -1, 1, -1] actual = allel.rogers_huff_r(gn) assert_array_close(expect, actual)
def test_heterozygosity_expected(self): def refimpl(af, ploidy, fill=0): """Limited reference implementation for testing purposes.""" # check allele frequencies sum to 1 af_sum = np.sum(af, axis=1) # assume three alleles p = af[:, 0] q = af[:, 1] r = af[:, 2] out = 1 - p**ploidy - q**ploidy - r**ploidy with ignore_invalid(): out[(af_sum < 1) | np.isnan(af_sum)] = fill return out # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1] af = g.count_alleles().to_frequencies() expect2 = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.stats.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_close(expect1, actual) assert_array_close(expect2, actual) expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0] actual = allel.stats.heterozygosity_expected(af, ploidy=g.ploidy, fill=0) assert_array_close(expect3, actual) # polyploid g = GenotypeArray([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') af = g.count_alleles().to_frequencies() expect = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.stats.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_close(expect, actual)
def test_heterozygosity_expected(self): def refimpl(f, ploidy, fill=0): """Limited reference implementation for testing purposes.""" # check allele frequencies sum to 1 af_sum = np.sum(f, axis=1) # assume three alleles p = f[:, 0] q = f[:, 1] r = f[:, 2] out = 1 - p**ploidy - q**ploidy - r**ploidy with ignore_invalid(): out[(af_sum < 1) | np.isnan(af_sum)] = fill return out # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect1 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1] af = g.count_alleles().to_frequencies() expect2 = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_close(expect1, actual) assert_array_close(expect2, actual) expect3 = [0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, 0] actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=0) assert_array_close(expect3, actual) # polyploid g = GenotypeArray([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') af = g.count_alleles().to_frequencies() expect = refimpl(af, ploidy=g.ploidy, fill=-1) actual = allel.heterozygosity_expected(af, ploidy=g.ploidy, fill=-1) assert_array_close(expect, actual)
def test_windowed_diversity(self): # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() # mean pairwise diversity # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(7/6)/10, (13/6)/10, 1/11] actual, _, _, _ = allel.windowed_diversity(pos, ac, size=10, start=1, stop=31) assert_array_close(expect, actual)
def test_inbreeding_coefficient(self): # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]) # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]) # expect = 1 - (ho/he) expect = [-1, -1, 1-0, 1-(.5/.375), 1-(.5/.375), 1-(.5/.375), 1-(1/.5), 1-(1/.625), -1, 1-(1/.5), -1] actual = allel.stats.inbreeding_coefficient(g, fill=-1) assert_array_close(expect, actual)
def test_windowed_diversity(self): # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() # mean pairwise diversity # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(7/6)/10, (13/6)/10, 1/11] actual, _, _, _ = allel.stats.windowed_diversity(pos, ac, size=10, start=1, stop=31) assert_array_close(expect, actual)
def test_inbreeding_coefficient(self): # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') # ho = np.array([0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1]) # he = np.array([0, 0, 0.5, .375, .375, .375, .5, .625, 0, .5, -1]) # expect = 1 - (ho/he) expect = [-1, -1, 1-0, 1-(.5/.375), 1-(.5/.375), 1-(.5/.375), 1-(1/.5), 1-(1/.625), -1, 1-(1/.5), -1] actual = allel.inbreeding_coefficient(g, fill=-1) assert_array_close(expect, actual)
def test_windowed_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() # mean pairwise divergence # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(6/4)/10, (9/4)/10, 0/11] actual, _, _, _ = allel.stats.windowed_divergence( pos, ac1, ac2, size=10, start=1, stop=31 ) assert_array_close(expect, actual)
def test_windowed_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() # mean pairwise divergence # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(6/4)/10, (9/4)/10, 0/11] actual, _, _, _ = allel.windowed_divergence( pos, ac1, ac2, size=10, start=1, stop=31 ) assert_array_close(expect, actual)