def test_slice_types(self): h = HaplotypeArray(haplotype_data, dtype='i1') # row slice s = h[1:] assert isinstance(s, HaplotypeArray) # col slice s = h[:, 1:] assert isinstance(s, HaplotypeArray) # row index s = h[0] assert isinstance(s, np.ndarray) assert not isinstance(s, HaplotypeArray) # col index s = h[:, 0] assert isinstance(s, np.ndarray) assert not isinstance(s, HaplotypeArray) # item s = h[0, 0] assert isinstance(s, np.int8) assert not isinstance(s, HaplotypeArray)
def test_windowed_diversity(self): # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() # mean pairwise diversity # expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(7 / 6) / 10, (13 / 6) / 10, 1 / 11] actual, _, _, _ = allel.windowed_diversity(pos, ac, size=10, start=1, stop=31) assert_array_almost_equal(expect, actual)
def test_pdist(self): from allel.stats.distance import pdist h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) import scipy.spatial d1 = scipy.spatial.distance.pdist(h.T, 'hamming') d2 = pdist(h, 'hamming') aeq(d1, d2)
def test_mean_pairwise_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] actual = allel.mean_pairwise_difference_between(ac1, ac2, fill=-1) aeq(expect, actual)
def test_windowed_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() # mean pairwise divergence # expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) expect = [(6 / 4) / 10, (9 / 4) / 10, 0 / 11] actual, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=10, start=1, stop=31) assert_array_almost_equal(expect, actual)
def test_mean_pairwise_diversity(self): # start with simplest case, two haplotypes, one pairwise comparison h = HaplotypeArray([[0, 0], [1, 1], [0, 1], [1, 2], [0, -1], [-1, -1]]) ac = h.count_alleles() expect = [0, 0, 1, 1, -1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) aeq(expect, actual) # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) assert_array_almost_equal(expect, actual)
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList HaplotypeArray() # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): HaplotypeArray(data) # data has wrong dtype data = [4., 5., 3.7] with self.assertRaises(TypeError): HaplotypeArray(data) # data has wrong dimensions data = [1, 2, 3] with self.assertRaises(TypeError): HaplotypeArray(data) # data has wrong dimensions data = diploid_genotype_data # use GenotypeArray instead with self.assertRaises(TypeError): HaplotypeArray(data) # haploid data (typed) h = HaplotypeArray(haplotype_data, dtype='i1') aeq(haplotype_data, h) assert np.int8 == h.dtype
def test_masked_windowed_divergence(self): h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() pos = SortedIndex([2, 4, 7, 14, 15, 18, 19, 25, 27]) mask = np.tile(np.repeat(np.array([True, False]), 5), 3) expect, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=5, start=1, stop=31) expect = expect[::2] actual, _, _, _ = allel.windowed_divergence(pos, ac1, ac2, size=10, start=1, stop=31, is_accessible=mask) assert_array_almost_equal(expect, actual)
def setup_instance(self, data, dtype=None): return HaplotypeArray(data, dtype=dtype)