def test_view(self): # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): np.array(data).view(HaplotypeArray) # data has wrong dtype data = [4., 5., 3.7] with self.assertRaises(TypeError): np.array(data).view(HaplotypeArray) # data has wrong dimensions data = [1, 2, 3] with self.assertRaises(TypeError): np.array(data).view(HaplotypeArray) # data has wrong dimensions data = diploid_genotype_data # use GenotypeArray instead with self.assertRaises(TypeError): np.array(data).view(HaplotypeArray) # haploid data h = np.array(haplotype_data).view(HaplotypeArray) aeq(haplotype_data, h) eq(np.int, h.dtype) eq(2, h.ndim) eq(4, h.n_variants) eq(3, h.n_haplotypes)
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList AlleleCountsDaskArray.from_array() # data has wrong dtype data = 'foo bar' with assert_raises(ValueError): AlleleCountsDaskArray.from_array(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(ValueError): AlleleCountsDaskArray.from_array(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(ValueError): AlleleCountsDaskArray.from_array(data) # data has wrong dimensions data = np.array([[[1, 2], [3, 4]]]) with assert_raises(ValueError): AlleleCountsDaskArray.from_array(data) # valid data (typed) hd = self.setup_instance(np.array(allele_counts_data, dtype='u2')) aeq(allele_counts_data, hd) eq(np.uint16, hd.dtype)
def test_to_hdf5_group(self): # setup HDF5 file node_path = 'test' tf = tempfile.NamedTemporaryFile(delete=False) file_path = tf.name tf.close() a = np.rec.array(variant_table_data, dtype=variant_table_dtype) # reorder columns because will come back out in sorted order a = a[sorted(a.dtype.names)] vt = self.setup_instance(a) # write using file path and node path vt.to_hdf5_group(file_path, node_path) with h5py.File(file_path, mode='r') as h5f: h5g = h5f[node_path] eq(sorted(a.dtype.names), sorted(h5g.keys())) for n in a.dtype.names: aeq(a[n], h5g[n][:]) # write using group and node path with h5py.File(file_path, mode='w') as h5f: vt.to_hdf5_group(h5f, node_path) with h5py.File(file_path, mode='r') as h5f: h5g = h5f[node_path] eq(sorted(a.dtype.names), sorted(h5g.keys())) for n in a.dtype.names: aeq(a[n], h5g[n][:])
def test_from_hdf5_group(self): # setup HDF5 file node_path = 'test' tf = tempfile.NamedTemporaryFile(delete=False) file_path = tf.name tf.close() a = np.rec.array(variant_table_data, dtype=variant_table_dtype) # reorder columns because will come back out in sorted order a = a[sorted(a.dtype.names)] with h5py.File(file_path, mode='w') as h5f: h5g = h5f.create_group(node_path) for n in a.dtype.names: h5g.create_dataset(n, data=a[n], chunks=True, compression='gzip') # file and node path vt = self._class.from_hdf5_group(file_path, node_path) self.assertIsInstance(vt, self._class) aeq(a, vt[:]) # dataset with h5py.File(file_path, mode='r') as h5f: h5g = h5f[node_path] vt = self._class.from_hdf5_group(h5g) self.assertIsInstance(vt, self._class) aeq(a, vt[:])
def test_mean_pairwise_diversity(self): # start with simplest case, two haplotypes, one pairwise comparison h = HaplotypeArray([[0, 0], [1, 1], [0, 1], [1, 2], [0, -1], [-1, -1]]) ac = h.count_alleles() expect = [0, 0, 1, 1, -1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) aeq(expect, actual) # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] actual = allel.mean_pairwise_difference(ac, fill=-1) assert_array_almost_equal(expect, actual)
def test_to_hdf5(self): # setup HDF5 file tf = tempfile.NamedTemporaryFile(delete=False) file_path = tf.name tf.close() # setup genotype array node_path = 'test' g = GenotypeCArray(diploid_genotype_data, dtype='i1') # write using file path and node path g.to_hdf5(file_path, node_path) # test outcome with h5py.File(file_path, mode='r') as h5f: h5d = h5f[node_path] aeq(g[:], h5d[:]) # write using group with h5py.File(file_path, mode='w') as h5f: g.to_hdf5(h5f, node_path) # test outcome with h5py.File(file_path, mode='r') as h5f: h5d = h5f[node_path] aeq(g[:], h5d[:])
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList HaplotypeArray() # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): HaplotypeArray(data) # data has wrong dtype data = [4., 5., 3.7] with self.assertRaises(TypeError): HaplotypeArray(data) # data has wrong dimensions data = [1, 2, 3] with self.assertRaises(TypeError): HaplotypeArray(data) # data has wrong dimensions data = diploid_genotype_data # use GenotypeArray instead with self.assertRaises(TypeError): HaplotypeArray(data) # haploid data (typed) h = HaplotypeArray(haplotype_data, dtype='i1') aeq(haplotype_data, h) eq(np.int8, h.dtype)
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList AlleleCountsChunkedArray() # data has wrong dtype data = 'foo bar' with assert_raises(TypeError): AlleleCountsChunkedArray(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(TypeError): AlleleCountsChunkedArray(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(TypeError): AlleleCountsChunkedArray(data) # data has wrong dimensions data = np.array([[[1, 2], [3, 4]]]) with assert_raises(TypeError): AlleleCountsChunkedArray(data) # typed data (typed) ac = AlleleCountsChunkedArray(np.array(allele_counts_data, dtype='u1')) aeq(allele_counts_data, ac) eq(np.uint8, ac.dtype)
def test_from_hdf5_condition(self): # setup HDF5 file node_path = 'test' tf = tempfile.NamedTemporaryFile(delete=False) file_path = tf.name tf.close() with h5py.File(file_path, mode='w') as h5f: h5f.create_dataset(node_path, data=diploid_genotype_data, chunks=(2, 3, 2)) # selection condition = [False, True, False, True, False] # file and node path g = GenotypeCArray.from_hdf5(file_path, node_path, condition=condition) expect = GenotypeArray(diploid_genotype_data).compress(condition, axis=0) aeq(expect, g) # dataset with h5py.File(file_path, mode='r') as h5f: dataset = h5f[node_path] g = GenotypeCArray.from_hdf5(dataset, condition=condition) aeq(expect, g)
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList AlleleCountsArray() # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): AlleleCountsArray(data) # data has wrong dtype data = [4., 5., 3.7] with self.assertRaises(TypeError): AlleleCountsArray(data) # data has wrong dimensions data = [1, 2, 3] with self.assertRaises(TypeError): AlleleCountsArray(data) # data has wrong dimensions data = diploid_genotype_data with self.assertRaises(TypeError): AlleleCountsArray(data) # valid data (typed) ac = AlleleCountsArray(allele_counts_data, dtype='u1') aeq(allele_counts_data, ac) eq(np.uint8, ac.dtype)
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList HaplotypeChunkedArray() # data has wrong dtype data = 'foo bar' with assert_raises(TypeError): HaplotypeChunkedArray(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(TypeError): HaplotypeChunkedArray(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(TypeError): HaplotypeChunkedArray(data) # data has wrong dimensions data = np.array([[[1, 2], [3, 4]]]) # use GenotypeCArray instead with assert_raises(TypeError): HaplotypeChunkedArray(data) # typed data (typed) h = HaplotypeChunkedArray(np.array(haplotype_data, dtype='i1')) aeq(haplotype_data, h) eq(np.int8, h.dtype)
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList HaplotypeDaskArray.from_array() # data has wrong dtype data = 'foo bar' with assert_raises(ValueError): HaplotypeDaskArray.from_array(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(ValueError): HaplotypeDaskArray.from_array(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(ValueError): HaplotypeDaskArray.from_array(data) # data has wrong dimensions data = np.array([[[1, 2], [3, 4]]]) # use GenotypeDaskArray instead with assert_raises(ValueError): HaplotypeDaskArray.from_array(data) # valid data (typed) hd = self.setup_instance(np.array(haplotype_data, dtype='i1')) aeq(haplotype_data, hd) eq(np.int8, hd.dtype)
def test_heterozygosity_observed(self): # diploid g = GenotypeArray([[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1] actual = allel.stats.heterozygosity_observed(g, fill=-1) aeq(expect, actual) # polyploid g = GenotypeArray([[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1] actual = allel.stats.heterozygosity_observed(g, fill=-1) aeq(expect, actual)
def test_view(self): # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # data has wrong dtype data = [4., 5., 3.7] with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # data has wrong dimensions data = [1, 2, 3] with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # data has wrong dimensions data = diploid_genotype_data with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # valid data ac = np.array(allele_counts_data).view(AlleleCountsArray) aeq(allele_counts_data, ac) eq(np.int, ac.dtype) eq(2, ac.ndim) eq(6, ac.n_variants) eq(3, ac.n_alleles)
def test_mean_pairwise_diversity(self): # start with simplest case, two haplotypes, one pairwise comparison h = HaplotypeArray([[0, 0], [1, 1], [0, 1], [1, 2], [0, -1], [-1, -1]]) ac = h.count_alleles() expect = [0, 0, 1, 1, -1, -1] actual = allel.stats.mean_pairwise_difference(ac, fill=-1) aeq(expect, actual) # four haplotypes, 6 pairwise comparison h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) ac = h.count_alleles() expect = [0, 3/6, 4/6, 3/6, 0, 5/6, 5/6, 1, -1] actual = allel.stats.mean_pairwise_difference(ac, fill=-1) assert_array_close(expect, actual)
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList UniqueIndex() # data has wrong dimensions data = [['A', 'C'], ['B', 'F']] with self.assertRaises(TypeError): UniqueIndex(data) # labels are not unique data = ['A', 'B', 'D', 'B'] with self.assertRaises(ValueError): UniqueIndex(data) # valid data data = ['A', 'C', 'B', 'F'] lbl = UniqueIndex(data) aeq(data, lbl) eq(1, lbl.ndim) eq(4, len(lbl)) # valid data (typed) data = np.array(['A', 'C', 'B', 'F'], dtype='S1') lbl = UniqueIndex(data, dtype='S1') aeq(data, lbl)
def test_view(self): # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # data has wrong dtype data = [4., 5., 3.7] with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # data has wrong dimensions data = [1, 2, 3] with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # data has wrong dimensions data = diploid_genotype_data with self.assertRaises(TypeError): np.array(data).view(AlleleCountsArray) # valid data ac = np.array(allele_counts_data).view(AlleleCountsArray) aeq(allele_counts_data, ac) eq(np.int, ac.dtype) eq(2, ac.ndim) eq(5, ac.n_variants) eq(3, ac.n_alleles)
def test_constructor(self): # missing data arg with pytest.raises(TypeError): # noinspection PyArgumentList UniqueIndex() # data has wrong dimensions data = [['A', 'C'], ['B', 'F']] with pytest.raises(TypeError): UniqueIndex(data) # labels are not unique data = ['A', 'B', 'D', 'B'] with pytest.raises(ValueError): UniqueIndex(data) # valid data data = ['A', 'C', 'B', 'F'] lbl = UniqueIndex(data) aeq(data, lbl) assert 1 == lbl.ndim assert 4 == len(lbl) # valid data (typed) data = np.array(['A', 'C', 'B', 'F'], dtype='S1') lbl = UniqueIndex(data, dtype='S1') aeq(data, lbl)
def test_heterozygosity_observed(self): # diploid g = GenotypeArray( [[[0, 0], [0, 0]], [[1, 1], [1, 1]], [[1, 1], [2, 2]], [[0, 0], [0, 1]], [[0, 0], [0, 2]], [[1, 1], [1, 2]], [[0, 1], [0, 1]], [[0, 1], [1, 2]], [[0, 0], [-1, -1]], [[0, 1], [-1, -1]], [[-1, -1], [-1, -1]]], dtype='i1') expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1] actual = allel.heterozygosity_observed(g, fill=-1) aeq(expect, actual) # polyploid g = GenotypeArray( [[[0, 0, 0], [0, 0, 0]], [[1, 1, 1], [1, 1, 1]], [[1, 1, 1], [2, 2, 2]], [[0, 0, 0], [0, 0, 1]], [[0, 0, 0], [0, 0, 2]], [[1, 1, 1], [0, 1, 2]], [[0, 0, 1], [0, 1, 1]], [[0, 1, 1], [0, 1, 2]], [[0, 0, 0], [-1, -1, -1]], [[0, 0, 1], [-1, -1, -1]], [[-1, -1, -1], [-1, -1, -1]]], dtype='i1') expect = [0, 0, 0, .5, .5, .5, 1, 1, 0, 1, -1] actual = allel.heterozygosity_observed(g, fill=-1) aeq(expect, actual)
def test_locate_unlinked(self): gn = [[0, 1, 2], [0, 1, 2]] expect = [True, False] actual = allel.locate_unlinked(gn, size=2, step=2, threshold=.5) aeq(expect, actual) gn = [[0, 1, 1, 2], [0, 1, 1, 2], [1, 1, 0, 2], [1, 1, 0, 2]] actual = allel.locate_unlinked(gn, size=2, step=1, threshold=.5) expect = [True, False, True, False] aeq(expect, actual) gn = [[0, 1, 1, 2], [0, 1, 1, 2], [0, 1, 1, 2], [1, 1, 0, 2], [1, 1, 0, 2]] actual = allel.locate_unlinked(gn, size=2, step=1, threshold=.5) expect = [True, False, True, True, False] aeq(expect, actual) actual = allel.locate_unlinked(gn, size=3, step=1, threshold=.5) expect = [True, False, False, True, False] aeq(expect, actual) # test with bcolz carray import bcolz gnz = bcolz.carray(gn, chunklen=2) actual = allel.locate_unlinked(gnz, size=2, step=1, threshold=.5, blen=2) expect = [True, False, True, True, False] aeq(expect, actual)
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList AlleleCountsChunkedArray() # data has wrong dtype data = 'foo bar' with assert_raises(ValueError): AlleleCountsChunkedArray(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(ValueError): AlleleCountsChunkedArray(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(ValueError): AlleleCountsChunkedArray(data) # data has wrong dimensions data = np.array([[[1, 2], [3, 4]]]) with assert_raises(ValueError): AlleleCountsChunkedArray(data) # typed data (typed) ac = AlleleCountsChunkedArray(np.array(allele_counts_data, dtype='u1')) aeq(allele_counts_data, ac) eq(np.uint8, ac.dtype)
def test_constructor(self): # missing data arg with assert_raises(TypeError): # noinspection PyArgumentList HaplotypeChunkedArray() # data has wrong dtype data = 'foo bar' with assert_raises(ValueError): HaplotypeChunkedArray(data) # data has wrong dtype data = np.array([4., 5., 3.7]) with assert_raises(ValueError): HaplotypeChunkedArray(data) # data has wrong dimensions data = np.array([1, 2, 3]) with assert_raises(ValueError): HaplotypeChunkedArray(data) # data has wrong dimensions data = np.array([[[1, 2], [3, 4]]]) # use GenotypeCArray instead with assert_raises(ValueError): HaplotypeChunkedArray(data) # typed data (typed) h = HaplotypeChunkedArray(np.array(haplotype_data, dtype='i1')) aeq(haplotype_data, h) eq(np.int8, h.dtype)
def test_view(self): a = np.rec.array(variant_table_data, dtype=variant_table_dtype) vt = a.view(VariantTable) aeq(a, vt) eq(1, vt.ndim) eq(5, vt.n_variants) eq(variant_table_names, vt.names)
def test_view(self): a = np.rec.array(feature_table_data, dtype=feature_table_dtype) ft = a.view(FeatureTable) aeq(a, ft) eq(1, ft.ndim) eq(6, ft.n_features) eq(feature_table_names, ft.names)
def test_take(self): g = np.array(diploid_genotype_data) gd = self.setup_instance(g) # take variants not in original order indices = [2, 0] expect = g.take(indices, axis=0) actual = gd.take(indices, axis=0) aeq(expect, actual)
def test_sfs_scaled(self): dac = [0, 1, 2, 1] expect = [0, 2, 2] actual = allel.sfs_scaled(dac) aeq(expect, actual) for dtype in 'u2', 'i2', 'u8', 'i8': daca = np.asarray(dac, dtype=dtype) actual = allel.sfs_scaled(daca) aeq(expect, actual)
def test_eval_vm(self): a = np.rec.array(variant_table_data, dtype=variant_table_dtype) vt = self.setup_instance(a) expr = '(DP > 30) & (QD < 4)' r = vt.eval(expr, vm='numexpr') aeq([False, False, True, False, True], r) r = vt.eval(expr, vm='python') aeq([False, False, True, False, True], r)
def test_sfs_folded(self): ac = [[0, 3], [1, 2], [2, 1]] expect = [1, 2] actual = allel.sfs_folded(ac) aeq(expect, actual) for dtype in 'u2', 'i2', 'u8', 'i8': aca = np.asarray(ac, dtype=dtype) actual = allel.sfs_folded(aca) aeq(expect, actual)
def test_take(self): a = np.rec.array(variant_table_data, dtype=variant_table_dtype) vt = VariantTable(a) # take variants not in original order indices = [2, 0] t = vt.take(indices) eq(2, t.n_variants) expect = a.take(indices) aeq(expect, t)
def test_pdist(self): from allel.stats.distance import pdist h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) import scipy.spatial d1 = scipy.spatial.distance.pdist(h.T, 'hamming') d2 = pdist(h, 'hamming') aeq(d1, d2)
def test_roh_mhmm_100pct(self): # values correspond to start/stop/length/is_marginal roh_expected = np.array([[1, 100, 100, True]], dtype=object) fraction_expected = 1.0 gv = np.zeros((4, 2), dtype=np.int16) pos = [1, 10, 50, 100] roh, fraction = allel.roh_mhmm(gv, pos, contig_size=100) aeq(roh.values, roh_expected) assert fraction == fraction_expected
def test_take(self): g = self.setup_instance(diploid_genotype_data) # take variants not in original order indices = [2, 0] t = g.take(indices, axis=0) eq(2, t.n_variants) eq(g.n_samples, t.n_samples) eq(g.ploidy, t.ploidy) expect = np.array(diploid_genotype_data).take(indices, axis=0) aeq(expect, t)
def test_mask_inaccessible(self): np.random.seed(2837) for n_vars in [5, 50, 500]: pos = np.arange(1, n_vars + 1) ac = np.random.randint(1, 40, n_vars * 2).reshape((n_vars, 2)) mask = np.random.randint(2, size=n_vars).astype(bool) mpos, mac = mask_inaccessible(mask, pos, ac) aeq(mac, ac[mask]) aeq(mpos, pos[mask])
def test_moving_statistic(self): f = allel.moving_statistic values = [2, 5, 8, 16] expect = [7, 24] actual = f(values, statistic=np.sum, size=2) aeq(expect, actual) values = [2, 5, 8, 16] expect = [7, 13, 24] actual = f(values, statistic=np.sum, size=2, step=1) aeq(expect, actual)
def test_moving_statistic(self): f = allel.stats.moving_statistic values = [2, 5, 8, 16] expect = [7, 24] actual = f(values, statistic=np.sum, size=2) aeq(expect, actual) values = [2, 5, 8, 16] expect = [7, 13, 24] actual = f(values, statistic=np.sum, size=2, step=1) aeq(expect, actual)
def test_count_alleles_subpops(self): data = chunked.storage_registry['default'].array(diploid_genotype_data, chunklen=2) g = GenotypeChunkedArray(data) subpops = {'foo': [0, 2], 'bar': [1]} ac_subpops = g.count_alleles_subpops(subpops) for p in subpops.keys(): ac = g.take(subpops[p], axis=1).count_alleles() aeq(ac, ac_subpops[p]) loc = np.array([True, False, True, False, True]) t = ac_subpops.compress(loc) eq(3, len(t))
def test_mean_pairwise_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() expect = [0 / 4, 2 / 4, 4 / 4, 2 / 4, 0 / 4, 4 / 4, 3 / 4, -1, -1] actual = allel.mean_pairwise_difference_between(ac1, ac2, fill=-1) aeq(expect, actual)
def test_to_n_ref_array_like(self): # see also https://github.com/cggh/scikit-allel/issues/66 gn = self.setup_instance(diploid_genotype_data).to_n_ref(fill=-1) t = gn > 0 eq(4, np.count_nonzero(t)) expect = np.array([[1, 1, 0], [1, 0, 0], [1, 0, 0], [0, 0, 0], [0, 0, 0]], dtype='b1') aeq(expect, t) # numpy reductions trigger the issue expect = np.array([2, 1, 1, 0, 0]) actual = np.sum(t, axis=1) aeq(expect, actual) expect = np.array([0, 0, 0, 0, 0]) actual = np.min(t, axis=1) aeq(expect, actual) expect = np.array([1, 1, 1, 0, 0]) actual = np.max(t, axis=1) aeq(expect, actual)
def test_pdist(self): h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) import scipy.spatial d1 = scipy.spatial.distance.pdist(h.T, 'hamming') import allel.stats.distance d2 = allel.stats.distance.pdist(h, 'hamming') aeq(d1, d2)
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList SortedIndex() # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): SortedIndex(data) # data has wrong dimensions data = [[1, 2], [3, 4]] with self.assertRaises(TypeError): SortedIndex(data) # values are not sorted data = [2, 1, 3, 5] with self.assertRaises(ValueError): SortedIndex(data) # values are not sorted data = [4., 5., 3.7] with self.assertRaises(ValueError): SortedIndex(data) # valid data (unique) data = [1, 4, 5, 7, 12] idx = SortedIndex(data) aeq(data, idx) eq(np.int, idx.dtype) eq(1, idx.ndim) eq(5, len(idx)) assert idx.is_unique # valid data (non-unique) data = [1, 4, 5, 5, 7, 12] idx = SortedIndex(data) aeq(data, idx) eq(np.int, idx.dtype) eq(1, idx.ndim) eq(6, len(idx)) assert not idx.is_unique # valid data (typed) data = [1, 4, 5, 5, 7, 12] idx = SortedIndex(data, dtype='u4') aeq(data, idx) eq(np.uint32, idx.dtype) # valid data (non-numeric) data = ['1', '12', '4', '5', '5', '7'] idx = SortedIndex(data) aeq(data, idx)
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList FeatureChunkedTable() # recarray ra = np.rec.array(feature_table_data, dtype=feature_table_dtype) ft = FeatureChunkedTable(ra) eq(6, len(ft)) aeq(ra, ft) # dict d = {n: ra[n] for n in feature_table_names} ft = FeatureChunkedTable(d, names=feature_table_names) eq(6, len(ft)) aeq(ra, ft)
def test_constructor(self): # missing data arg with self.assertRaises(TypeError): # noinspection PyArgumentList VariantChunkedTable() # recarray ra = np.rec.array(variant_table_data, dtype=variant_table_dtype) vt = VariantChunkedTable(ra) eq(5, len(vt)) aeq(ra, vt) # dict d = {n: ra[n] for n in variant_table_names} vt = VariantChunkedTable(d, names=variant_table_names) eq(5, len(vt)) aeq(ra, vt)
def test_from_hdf5(self): # setup HDF5 file node_path = 'test' tf = tempfile.NamedTemporaryFile(delete=False) file_path = tf.name tf.close() with h5py.File(file_path, mode='w') as h5f: h5f.create_dataset(node_path, data=haplotype_data, chunks=(2, 3)) # file and node path h = HaplotypeCArray.from_hdf5(file_path, node_path) aeq(haplotype_data, h) # dataset with h5py.File(file_path, mode='r') as h5f: dataset = h5f[node_path] h = HaplotypeCArray.from_hdf5(dataset) aeq(haplotype_data, h)
def test_mean_pairwise_divergence(self): # simplest case, two haplotypes in each population h = HaplotypeArray([[0, 0, 0, 0], [0, 0, 0, 1], [0, 0, 1, 1], [0, 1, 1, 1], [1, 1, 1, 1], [0, 0, 1, 2], [0, 1, 1, 2], [0, 1, -1, -1], [-1, -1, -1, -1]]) h1 = h.take([0, 1], axis=1) h2 = h.take([2, 3], axis=1) ac1 = h1.count_alleles() ac2 = h2.count_alleles() expect = [0/4, 2/4, 4/4, 2/4, 0/4, 4/4, 3/4, -1, -1] actual = allel.stats.mean_pairwise_difference_between(ac1, ac2, fill=-1) aeq(expect, actual)
def test_view(self): # data has wrong dtype data = 'foo bar' with self.assertRaises(TypeError): np.asarray(data).view(SortedIndex) # data has wrong dimensions data = [[1, 2], [3, 4]] with self.assertRaises(TypeError): np.asarray(data).view(SortedIndex) # values are not sorted data = [2, 1, 3, 5] with self.assertRaises(ValueError): np.asarray(data).view(SortedIndex) # values are not sorted data = [4., 5., 3.7] with self.assertRaises(ValueError): np.asarray(data).view(SortedIndex) # valid data (unique) data = [1, 4, 5, 7, 12] idx = np.asarray(data).view(SortedIndex) aeq(data, idx) eq(np.int, idx.dtype) eq(1, idx.ndim) eq(5, len(idx)) assert idx.is_unique # valid data (non-unique) data = [1, 4, 5, 5, 7, 12] idx = np.asarray(data).view(SortedIndex) aeq(data, idx) eq(np.int, idx.dtype) eq(1, idx.ndim) eq(6, len(idx)) assert not idx.is_unique # valid data (typed) data = np.array([1, 4, 5, 5, 7, 12], dtype='u4') idx = np.asarray(data).view(SortedIndex) aeq(data, idx) eq(np.uint32, idx.dtype) # valid data (non-numeric) data = ['1', '12', '4', '5', '5', '7'] idx = np.asarray(data).view(SortedIndex) aeq(data, idx)