def test_export_block_matrices(self): data = [np.random.rand(11 * 12), np.random.rand(5 * 17)] arrs = [data[0].reshape((11, 12)), data[1].reshape((5, 17))] bms = [ hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] with hl.TemporaryDirectory() as prefix: hl.experimental.export_block_matrices(bms, f'{prefix}/files') for i in range(len(bms)): a = arrs[i] a2 = np.loadtxt( hl.current_backend().fs.open(f'{prefix}/files/{i}.tsv')) self.assertTrue(np.array_equal(a, a2)) with hl.TemporaryDirectory() as prefix2: custom_names = ["nameA", "inner/nameB.tsv"] hl.experimental.export_block_matrices( bms, f'{prefix2}/files', custom_filenames=custom_names) for i in range(len(bms)): a = arrs[i] a2 = np.loadtxt(hl.current_backend().fs.open( f'{prefix2}/files/{custom_names[i]}')) self.assertTrue(np.array_equal(a, a2))
def test_stage_locally(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) with hl.TemporaryDirectory(ensure_exists=False) as bm_uri: BlockMatrix.from_numpy(nd, block_size=3).write(bm_uri, stage_locally=True) bm = BlockMatrix.read(bm_uri) self._assert_eq(nd, bm)
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] with hl.TemporaryDirectory() as rect_uri, hl.TemporaryDirectory() as rect_bytes_uri: BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_uri)) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_uri, binary=True))
def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) with hl.TemporaryDirectory() as bm_uri: bm.export_blocks(bm_uri, binary=True) actual = BlockMatrix.rectangles_to_numpy(bm_uri, binary=True) self._assert_eq(nd, actual)
def setupAnnotationDBTests(cls): startTestHailContext() t = hl.utils.range_table(10) t = t.key_by(locus=hl.locus('1', t.idx + 1)) t = t.annotate(annotation=hl.str(t.idx)) cls.tempdir_manager = hl.TemporaryDirectory() d = cls.tempdir_manager.__enter__() fname = d + '/f.mt' t.write(fname) cls.db_json = { 'unique_dataset': { 'description': 'now with unique rows!', 'url': 'https://example.com', 'annotation_db': { 'key_properties': ['unique'] }, 'versions': [{ 'url': { "aws": { "eu": fname, "us": fname }, "gcp": { "eu": fname, "us": fname } }, 'version': 'v1', 'reference_genome': 'GRCh37' }] }, 'nonunique_dataset': { 'description': 'non-unique rows :(', 'url': 'https://example.net', 'annotation_db': { 'key_properties': [] }, 'versions': [{ 'url': { "aws": { "eu": fname, "us": fname }, "gcp": { "eu": fname, "us": fname } }, 'version': 'v1', 'reference_genome': 'GRCh37' }] } }
def test_write_overwrite(self): with hl.TemporaryDirectory(ensure_exists=False) as path: bm = BlockMatrix.from_numpy(np.array([[0]])) bm.write(path) self.assertRaises(FatalError, lambda: bm.write(path)) bm2 = BlockMatrix.from_numpy(np.array([[1]])) bm2.write(path, overwrite=True) self._assert_eq(BlockMatrix.read(path), bm2)
def test_write_from_entry_expr_overwrite(self): mt = hl.balding_nichols_model(1, 1, 1) mt = mt.select_entries(x=mt.GT.n_alt_alleles()) bm = BlockMatrix.from_entry_expr(mt.x) with hl.TemporaryDirectory(ensure_exists=False) as path: BlockMatrix.write_from_entry_expr(mt.x, path) self.assertRaises(FatalError, lambda: BlockMatrix.write_from_entry_expr(mt.x, path)) BlockMatrix.write_from_entry_expr(mt.x, path, overwrite=True) self._assert_eq(BlockMatrix.read(path), bm) with hl.TemporaryDirectory(ensure_exists=False) as path: # non-field expressions currently take a separate code path BlockMatrix.write_from_entry_expr(mt.x + 1, path) self.assertRaises(FatalError, lambda: BlockMatrix.write_from_entry_expr(mt.x + 1, path)) BlockMatrix.write_from_entry_expr(mt.x + 2, path, overwrite=True) self._assert_eq(BlockMatrix.read(path), bm + 2)
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: with hl.TemporaryDirectory() as rect_uri, hl.TemporaryDirectory() as rect_uri_bytes: bm = BlockMatrix.from_numpy(nd, block_size=block_size) bm.export_rectangles(rect_uri, rects) self._assert_rectangles_eq(nd, rect_uri, rects) bm.export_rectangles(rect_uri_bytes, rects, binary=True) self._assert_rectangles_eq(nd, rect_uri_bytes, rects, binary=True)
def test_export_rectangles_filtered(self): with hl.TemporaryDirectory() as rect_uri: nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd) bm = bm[1:3, 1:3] export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] bm.export_rectangles(rect_uri, export_rects) expected = np.array([[6.0, 7.0], [10.0, 11.0]]) self._assert_rectangles_eq(expected, rect_uri, export_rects)
def test_from_entry_expr(self): mt = get_dataset() mt = mt.annotate_entries(x=hl.or_else(mt.GT.n_alt_alleles(), 0)).cache() a1 = BlockMatrix.from_entry_expr(hl.or_else(mt.GT.n_alt_alleles(), 0), block_size=32).to_numpy() a2 = BlockMatrix.from_entry_expr(mt.x, block_size=32).to_numpy() a3 = BlockMatrix.from_entry_expr(hl.float64(mt.x), block_size=32).to_numpy() self._assert_eq(a1, a2) self._assert_eq(a1, a3) with hl.TemporaryDirectory(ensure_exists=False) as path: BlockMatrix.write_from_entry_expr(mt.x, path, block_size=32) a4 = BlockMatrix.read(path).to_numpy() self._assert_eq(a1, a4)
def test_export_rectangles_sparse(self): with hl.TemporaryDirectory() as rect_uri: nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] bm.sparsify_rectangles(sparsify_rects).export_rectangles(rect_uri, export_rects) expected = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 0.0, 0.0], [13.0, 14.0, 0.0, 0.0]]) self._assert_rectangles_eq(expected, rect_uri, export_rects)
def test_block_matrices_tofiles(self): data = [np.random.rand(11 * 12), np.random.rand(5 * 17)] arrs = [data[0].reshape((11, 12)), data[1].reshape((5, 17))] bms = [ hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] with hl.TemporaryDirectory() as prefix: hl.experimental.block_matrices_tofiles(bms, f'{prefix}/files') for i in range(len(bms)): a = data[i] a2 = np.frombuffer(hl.current_backend().fs.open( f'{prefix}/files/{i}', mode='rb').read()) self.assertTrue(np.array_equal(a, a2))