def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) bm = BlockMatrix.from_numpy(nd, block_size=block_size) bm.export_rectangles(rect_uri, rects) self._assert_rectangles_eq(nd, rect_path, rects) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) bm.export_rectangles(rect_uri_bytes, rects, binary=True) self._assert_rectangles_eq(nd, rect_path_bytes, rects, binary=True)
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) bm = BlockMatrix.from_numpy(nd, block_size=block_size) bm.export_rectangles(rect_uri, rects) self._assert_rectangles_eq(nd, rect_path, rects) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) bm.export_rectangles(rect_uri_bytes, rects, binary=True) self._assert_rectangles_eq(nd, rect_path_bytes, rects, binary=True)
def test_export_block_matrices(self): data = [np.random.rand(11 * 12), np.random.rand(5 * 17)] arrs = [data[0].reshape((11, 12)), data[1].reshape((5, 17))] bms = [ hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] prefix = new_local_temp_dir() hl.experimental.export_block_matrices(bms, f'{prefix}/files') for i in range(len(bms)): a = arrs[i] a2 = np.loadtxt(f'{prefix}/files/{i}.tsv') self.assertTrue(np.array_equal(a, a2)) prefix2 = new_local_temp_dir() custom_names = ["nameA", "inner/nameB.tsv"] hl.experimental.export_block_matrices(bms, f'{prefix2}/files', custom_filenames=custom_names) for i in range(len(bms)): a = arrs[i] a2 = np.loadtxt(f'{prefix2}/files/{custom_names[i]}') self.assertTrue(np.array_equal(a, a2))
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: bm_uri = new_temp_file() rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) (BlockMatrix.from_numpy( nd, block_size=block_size).sparsify_rectangles(rects).write( bm_uri, force_row_major=True)) BlockMatrix.export_rectangles(bm_uri, rect_uri, rects) for (i, r) in enumerate(rects): file = rect_path + '/rect-' + str(i) + '_' + '-'.join( map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.loadtxt(file, ndmin=2) self._assert_eq(expected, actual) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True) for (i, r) in enumerate(rects): file = rect_path_bytes + '/rect-' + str( i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) self._assert_eq(expected, actual) bm_uri = new_temp_file() rect_uri = new_temp_file() (BlockMatrix.from_numpy(nd, block_size=5).sparsify_rectangles( [[0, 1, 0, 1]]).write(bm_uri, force_row_major=True)) with self.assertRaises(FatalError) as e: BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]]) self.assertEquals( e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]' )
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: bm_uri = new_temp_file() rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) (BlockMatrix.from_numpy(nd, block_size=block_size) .sparsify_rectangles(rects) .write(bm_uri, force_row_major=True)) BlockMatrix.export_rectangles(bm_uri, rect_uri, rects) for (i, r) in enumerate(rects): file = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.loadtxt(file, ndmin = 2) self._assert_eq(expected, actual) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True) for (i, r) in enumerate(rects): file = rect_path_bytes + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) self._assert_eq(expected, actual) bm_uri = new_temp_file() rect_uri = new_temp_file() (BlockMatrix.from_numpy(nd, block_size=5) .sparsify_rectangles([[0, 1, 0, 1]]) .write(bm_uri, force_row_major=True)) with self.assertRaises(FatalError) as e: BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]]) self.assertEquals(e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]')
def to_numpy(self): """Collects the block matrix into a `NumPy ndarray <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__. Examples -------- >>> from hail.linalg import BlockMatrix >>> bm = BlockMatrix.random(10, 20) >>> a = bm.to_numpy() Notes ----- The number of entries must be less than :math:`2^{31}`. The resulting ndarray will have the same shape as the block matrix. Returns ------- :class:`numpy.ndarray` """ local_temp_dir = new_local_temp_dir() path = local_temp_dir + '/binary' uri = local_path_uri(path) self.tofile(uri) return np.fromfile(path).reshape((self.n_rows, self.n_cols))
def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) bm_path = new_local_temp_dir() bm_uri = local_path_uri(bm_path) bm.export_blocks(bm_uri, binary=True) actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True) self._assert_eq(nd, actual)
def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) bm_path = new_local_temp_dir() bm_uri = local_path_uri(bm_path) bm.export_blocks(bm_uri, binary=True) actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True) self._assert_eq(nd, actual)
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) rect_bytes_path = new_local_temp_dir() rect_bytes_uri = local_path_uri(rect_bytes_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) self._assert_eq( expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) rect_bytes_path = new_local_temp_dir() rect_bytes_uri = local_path_uri(rect_bytes_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
def test_export_rectangles_filtered(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd) bm = bm[1:3, 1:3] export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] bm.export_rectangles(rect_uri, export_rects) expected = np.array([[6.0, 7.0], [10.0, 11.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def from_numpy(cls, ndarray, block_size=None): """Distributes a `NumPy ndarray <https://docs.scipy.org/doc/numpy/reference/generated/numpy.ndarray.html>`__ as a block matrix. Examples -------- >>> import numpy as np >>> a = np.random.rand(10, 20) >>> bm = BlockMatrix.from_numpy(a) Notes ----- The ndarray must have two dimensions, each of non-zero size. The number of entries must be less than :math:`2^{31}`. Parameters ---------- ndarray: :class:`numpy.ndarray` ndarray with two dimensions, each of non-zero size. block_size: :obj:`int`, optional Block size. Default given by :meth:`default_block_size`. Returns ------- :class:`.BlockMatrix` """ if not block_size: block_size = BlockMatrix.default_block_size() if ndarray.ndim != 2: raise FatalError( "from_numpy: ndarray must have two axes, found shape {}". format(ndarray.shape)) n_rows, n_cols = ndarray.shape if n_rows == 0 or n_cols == 0: raise FatalError( "from_numpy: ndarray dimensions must be non-zero, found shape {}" .format(ndarray.shape)) if ndarray.dtype != np.float64: ndarray = ndarray.astype(np.float64) local_temp_dir = new_local_temp_dir() path = local_temp_dir + '/binary' uri = local_path_uri(path) ndarray.tofile(path) return cls.fromfile(uri, n_rows, n_cols, block_size)
def test_export_rectangles_sparse(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] bm.sparsify_rectangles(sparsify_rects).export_rectangles( rect_uri, export_rects) expected = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 0.0, 0.0], [13.0, 14.0, 0.0, 0.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def test_export_rectangles_filtered(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd) bm = bm[1:3, 1:3] export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] bm.export_rectangles(rect_uri, export_rects) expected = np.array([[6.0, 7.0], [10.0, 11.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def test_export_rectangles_sparse(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] bm.sparsify_rectangles(sparsify_rects).export_rectangles(rect_uri, export_rects) expected = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 0.0, 0.0], [13.0, 14.0, 0.0, 0.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def test_export_block_matrices(self): data = [ np.random.rand(11*12), np.random.rand(5*17) ] arrs = [ data[0].reshape((11, 12)), data[1].reshape((5, 17)) ] bms = [ hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] prefix = new_local_temp_dir() hl.experimental.export_block_matrices(bms, f'{prefix}/files') for i in range(len(bms)): a = arrs[i] a2 = np.loadtxt(f'{prefix}/files/{i}.tsv') self.assertTrue(np.array_equal(a, a2))
def test_block_matrices_tofiles(self): data = [np.random.rand(11 * 12), np.random.rand(5 * 17)] arrs = [data[0].reshape((11, 12)), data[1].reshape((5, 17))] bms = [ hl.linalg.BlockMatrix._create(11, 12, data[0].tolist(), block_size=4), hl.linalg.BlockMatrix._create(5, 17, data[1].tolist(), block_size=8) ] prefix = new_local_temp_dir() hl.experimental.block_matrices_tofiles(bms, f'{prefix}/files') for i in range(len(bms)): a = data[i] a2 = np.fromfile(f'{prefix}/files/{i}') self.assertTrue(np.array_equal(a, a2))