def test_sparsify_band(self): nd = np.array([[ 1.0, 2.0, 3.0, 4.0], [ 5.0, 6.0, 7.0, 8.0], [ 9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self._assert_eq( bm.sparsify_band(lower=-1, upper=2), np.array([[ 1., 2., 3., 0.], [ 5., 6., 7., 8.], [ 0., 10., 11., 12.], [ 0., 0., 15., 16.]])) self._assert_eq( bm.sparsify_band(lower=0, upper=0, blocks_only=True), np.array([[ 1., 2., 0., 0.], [ 5., 6., 0., 0.], [ 0., 0., 11., 12.], [ 0., 0., 15., 16.]])) nd2 = np.arange(0, 80, dtype=float).reshape(8, 10) bm2 = BlockMatrix.from_numpy(nd2, block_size=3) for bounds in [[0, 0], [1, 1], [2, 2], [-5, 5], [-7, 0], [0, 9], [-100, 100]]: lower, upper = bounds actual = bm2.sparsify_band(lower, upper, blocks_only=False).to_numpy() mask = np.fromfunction(lambda i, j: (lower <= j - i) * (j - i <= upper), (8, 10)) self._assert_eq(actual, nd2 * mask)
def test_tree_matmul(self): nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) m = BlockMatrix.from_numpy(nm, block_size=2) nrow = np.array([[7.0, 8.0, 9.0]]) row = BlockMatrix.from_numpy(nrow, block_size=2) self._assert_eq(m.tree_matmul(m.T, splits=2), nm @ nm.T) self._assert_eq(m.tree_matmul(nm.T, splits=2), nm @ nm.T) self._assert_eq(row.tree_matmul(row.T, splits=2), nrow @ nrow.T) self._assert_eq(row.tree_matmul(nrow.T, splits=2), nrow @ nrow.T) self._assert_eq(m.T.tree_matmul(m, splits=2), nm.T @ nm) self._assert_eq(m.T.tree_matmul(nm, splits=2), nm.T @ nm) self._assert_eq(row.T.tree_matmul(row, splits=2), nrow.T @ nrow) self._assert_eq(row.T.tree_matmul(nrow, splits=2), nrow.T @ nrow) # Variety of block sizes and splits fifty_by_sixty = np.arange(50 * 60).reshape((50, 60)) sixty_by_twenty_five = np.arange(60 * 25).reshape((60, 25)) block_sizes = [7, 10] split_sizes = [2, 9] for block_size in block_sizes: bm_fifty_by_sixty = BlockMatrix.from_numpy(fifty_by_sixty, block_size) bm_sixty_by_twenty_five = BlockMatrix.from_numpy(sixty_by_twenty_five, block_size) for split_size in split_sizes: self._assert_eq(bm_fifty_by_sixty.tree_matmul(bm_fifty_by_sixty.T, splits=split_size), fifty_by_sixty @ fifty_by_sixty.T) self._assert_eq(bm_fifty_by_sixty.tree_matmul(bm_sixty_by_twenty_five, splits=split_size), fifty_by_sixty @ sixty_by_twenty_five)
def test_stage_locally(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) with hl.TemporaryDirectory(ensure_exists=False) as bm_uri: BlockMatrix.from_numpy(nd, block_size=3).write(bm_uri, stage_locally=True) bm = BlockMatrix.read(bm_uri) self._assert_eq(nd, bm)
def test_sum(self): def sums_agree(bm, nd): self.assertAlmostEqual(bm.sum(), np.sum(nd)) self._assert_close(bm.sum(axis=0), np.sum(nd, axis=0, keepdims=True)) self._assert_close(bm.sum(axis=1), np.sum(nd, axis=1, keepdims=True)) nd = np.random.normal(size=(11, 13)) bm = BlockMatrix.from_numpy(nd, block_size=3) nd2 = np.zeros(shape=(5, 7)) nd2[2, 4] = 1.0 nd2[2, 5] = 2.0 nd2[3, 4] = 3.0 nd2[3, 5] = 4.0 bm2 = BlockMatrix.from_numpy(nd2, block_size=2).sparsify_rectangles([[2, 4, 4, 6]]) bm3 = BlockMatrix.from_numpy(nd2, block_size=2).sparsify_rectangles([[2, 4, 4, 6], [0, 5, 0, 1]]) bm4 = BlockMatrix.from_numpy(nd2, block_size=2).sparsify_rectangles([[2, 4, 4, 6], [0, 1, 0, 7]]) nd5 = np.zeros(shape=(5, 7)) bm5 = BlockMatrix.fill(5, 7, value=0.0, block_size=2).sparsify_rectangles([]) sums_agree(bm, nd) sums_agree(bm2, nd2) sums_agree(bm3, nd2) sums_agree(bm4, nd2) sums_agree(bm5, nd5)
def test_matrix_ops(self): nm = np.matrix([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) m = BlockMatrix.from_numpy(nm, block_size=2) nrow = np.matrix([[7.0, 8.0, 9.0]]) row = BlockMatrix.from_numpy(nrow, block_size=2) self._assert_eq(m.T, nm.T) self._assert_eq(m.T, nm.T) self._assert_eq(row.T, nrow.T) self._assert_eq(m @ m.T, nm @ nm.T) self._assert_eq(m @ nm.T, nm @ nm.T) self._assert_eq(row @ row.T, nrow @ nrow.T) self._assert_eq(row @ nrow.T, nrow @ nrow.T) self._assert_eq(m.T @ m, nm.T @ nm) self._assert_eq(m.T @ nm, nm.T @ nm) self._assert_eq(row.T @ row, nrow.T @ nrow) self._assert_eq(row.T @ nrow, nrow.T @ nrow) self.assertRaises(ValueError, lambda: m @ m) self.assertRaises(ValueError, lambda: m @ nm) self._assert_eq(m.diagonal(), np.array([1.0, 5.0])) self._assert_eq(m.T.diagonal(), np.array([1.0, 5.0])) self._assert_eq((m @ m.T).diagonal(), np.array([14.0, 77.0]))
def test_stage_locally(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) bm_uri = new_temp_file() BlockMatrix.from_numpy(nd, block_size=3).write(bm_uri, stage_locally=True) bm = BlockMatrix.read(bm_uri) self._assert_eq(nd, bm)
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: bm_uri = new_temp_file() rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) (BlockMatrix.from_numpy( nd, block_size=block_size).sparsify_rectangles(rects).write( bm_uri, force_row_major=True)) BlockMatrix.export_rectangles(bm_uri, rect_uri, rects) for (i, r) in enumerate(rects): file = rect_path + '/rect-' + str(i) + '_' + '-'.join( map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.loadtxt(file, ndmin=2) self._assert_eq(expected, actual) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True) for (i, r) in enumerate(rects): file = rect_path_bytes + '/rect-' + str( i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) self._assert_eq(expected, actual) bm_uri = new_temp_file() rect_uri = new_temp_file() (BlockMatrix.from_numpy(nd, block_size=5).sparsify_rectangles( [[0, 1, 0, 1]]).write(bm_uri, force_row_major=True)) with self.assertRaises(FatalError) as e: BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]]) self.assertEquals( e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]' )
def test_sparsify_row_intervals(self): nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self._assert_eq( bm.sparsify_row_intervals(starts=[1, 0, 2, 2], stops=[2, 0, 3, 4]), np.array([[0., 2., 0., 0.], [0., 0., 0., 0.], [0., 0., 11., 0.], [0., 0., 15., 16.]])) self._assert_eq( bm.sparsify_row_intervals(starts=[1, 0, 2, 2], stops=[2, 0, 3, 4], blocks_only=True), np.array([[1., 2., 0., 0.], [5., 6., 0., 0.], [0., 0., 11., 12.], [0., 0., 15., 16.]])) nd2 = np.random.normal(size=(8, 10)) bm2 = BlockMatrix.from_numpy(nd2, block_size=3) for bounds in [[[0, 1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8]], [[0, 0, 5, 3, 4, 5, 8, 2], [9, 0, 5, 3, 4, 5, 9, 5]], [[0, 5, 10, 8, 7, 6, 5, 4], [0, 5, 10, 9, 8, 7, 6, 5]]]: starts, stops = bounds actual = bm2.sparsify_row_intervals(starts, stops, blocks_only=False).to_numpy() expected = nd2.copy() for i in range(0, 8): for j in range(0, starts[i]): expected[i, j] = 0.0 for j in range(stops[i], 10): expected[i, j] = 0.0 self._assert_eq(actual, expected)
def test_sparse_transposition(self): block_list = [1, 2] np_square = np.arange(16, dtype=np.float64).reshape((4, 4)) block_size = 2 bm = BlockMatrix.from_numpy(np_square, block_size=block_size) sparse_bm = bm._sparsify_blocks(block_list).T sparse_np = sparsify_numpy(np_square, block_size, block_list).T assert np.array_equal(sparse_bm.to_numpy(), sparse_np) block_list = [4, 8, 10, 12, 13, 14] np_square = np.arange(225, dtype=np.float64).reshape((15, 15)) block_size = 4 bm = BlockMatrix.from_numpy(np_square, block_size=block_size) sparse_bm = bm._sparsify_blocks(block_list).T sparse_np = sparsify_numpy(np_square, block_size, block_list).T assert np.array_equal(sparse_bm.to_numpy(), sparse_np) block_list = [2, 5, 8, 10, 11] np_square = np.arange(150, dtype=np.float64).reshape((10, 15)) block_size = 4 bm = BlockMatrix.from_numpy(np_square, block_size=block_size) sparse_bm = bm._sparsify_blocks(block_list).T sparse_np = sparsify_numpy(np_square, block_size, block_list).T assert np.array_equal(sparse_bm.to_numpy(), sparse_np) block_list = [2, 5, 8, 10, 11] np_square = np.arange(165, dtype=np.float64).reshape((15, 11)) block_size = 4 bm = BlockMatrix.from_numpy(np_square, block_size=block_size) sparse_bm = bm._sparsify_blocks(block_list).T sparse_np = sparsify_numpy(np_square, block_size, block_list).T assert np.array_equal(sparse_bm.to_numpy(), sparse_np)
def test_promote(self): nx = np.matrix([[2.0]]) nc = np.matrix([[1.0], [2.0]]) nr = np.matrix([[1.0, 2.0, 3.0]]) nm = np.matrix([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) e = 2 x = BlockMatrix.from_numpy(nx) c = BlockMatrix.from_numpy(nc) r = BlockMatrix.from_numpy(nr) m = BlockMatrix.from_numpy(nm) nct, nrt, nmt = nc.T, nr.T, nm.T ct, rt, mt = c.T, r.T, m.T good = [(x, x), (x, c), (x, r), (x, m), (x, e), (c, x), (c, c), (c, m), (c, e), (r, x), (r, r), (r, m), (r, e), (m, x), (m, c), (m, r), (m, m), (m, e), (x, nx), (x, nc), (x, nr), (x, nm), (c, nx), (c, nc), (c, nm), (r, nx), (r, nr), (r, nm), (m, nx), (m, nc), (m, nr), (m, nm)] bad = [(c, r), (r, c), (c, ct), (r, rt), (c, rt), (c, mt), (ct, r), (ct, m), (r, ct), (r, mt), (rt, c), (rt, m), (m, ct), (m, rt), (m, mt), (mt, c), (mt, r), (mt, m), (c, nr), (r, nc), (c, nct), (r, nrt), (c, nrt), (c, nmt), (ct, nr), (ct, nm), (r, nct), (r, nmt), (rt, nc), (rt, nm), (m, nct), (m, nrt), (m, nmt), (mt, nc), (mt, nr), (mt, nm)] for (a, b) in good: a._promote(b, '') for (a, b) in bad: self.assertRaises(ValueError, lambda: a._promote(b, ''))
def test_sparsify_band(self): nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self._assert_eq( bm.sparsify_band(lower=-1, upper=2), np.array([[1., 2., 3., 0.], [5., 6., 7., 8.], [0., 10., 11., 12.], [0., 0., 15., 16.]])) self._assert_eq( bm.sparsify_band(lower=0, upper=0, blocks_only=True), np.array([[1., 2., 0., 0.], [5., 6., 0., 0.], [0., 0., 11., 12.], [0., 0., 15., 16.]])) nd2 = np.arange(0, 80, dtype=float).reshape(8, 10) bm2 = BlockMatrix.from_numpy(nd2, block_size=3) for bounds in [[0, 0], [1, 1], [2, 2], [-5, 5], [-7, 0], [0, 9], [-100, 100]]: lower, upper = bounds actual = bm2.sparsify_band(lower, upper, blocks_only=False).to_numpy() mask = np.fromfunction( lambda i, j: (lower <= j - i) * (j - i <= upper), (8, 10)) self._assert_eq(actual, nd2 * mask)
def test_write_overwrite(self): with hl.TemporaryDirectory(ensure_exists=False) as path: bm = BlockMatrix.from_numpy(np.array([[0]])) bm.write(path) self.assertRaises(FatalError, lambda: bm.write(path)) bm2 = BlockMatrix.from_numpy(np.array([[1]])) bm2.write(path, overwrite=True) self._assert_eq(BlockMatrix.read(path), bm2)
def test_svd(self): def assert_same_columns_up_to_sign(a, b): for j in range(a.shape[1]): assert np.allclose(a[:, j], b[:, j]) or np.allclose( -a[:, j], b[:, j]) x0 = np.array([[-2.0, 0.0, 3.0], [-1.0, 2.0, 4.0]]) u0, s0, vt0 = np.linalg.svd(x0, full_matrices=False) x = BlockMatrix.from_numpy(x0) # _svd u, s, vt = x.svd() assert_same_columns_up_to_sign(u, u0) assert np.allclose(s, s0) assert_same_columns_up_to_sign(vt.T, vt0.T) s = x.svd(compute_uv=False) assert np.allclose(s, s0) # left _svd_gramian u, s, vt = x.svd(complexity_bound=0) assert_same_columns_up_to_sign(u, u0) assert np.allclose(s, s0) assert_same_columns_up_to_sign(vt.to_numpy().T, vt0.T) s = x.svd(compute_uv=False, complexity_bound=0) assert np.allclose(s, s0) # right _svd_gramian x = BlockMatrix.from_numpy(x0.T) u, s, vt = x.svd(complexity_bound=0) assert_same_columns_up_to_sign(u.to_numpy(), vt0.T) assert np.allclose(s, s0) assert_same_columns_up_to_sign(vt.T, u0) s = x.svd(compute_uv=False, complexity_bound=0) assert np.allclose(s, s0) # left _svd_gramian when dimensions agree x = BlockMatrix.from_numpy(x0[:, :2]) u, s, vt = x.svd(complexity_bound=0) assert isinstance(u, np.ndarray) assert isinstance(vt, BlockMatrix) # rank-deficient X sets negative eigenvalues to 0.0 a = np.array([[0.0, 1.0, np.e, np.pi, 10.0, 25.0]]) x0 = a.T @ a # rank 1 e, _ = np.linalg.eigh(x0 @ x0.T) x = BlockMatrix.from_numpy(x0) _, s, _ = x.svd(complexity_bound=0) assert np.all(s >= 0.0) s = x.svd(compute_uv=False, complexity_bound=0) assert np.all(s >= 0)
def test_svd(self): def assert_same_columns_up_to_sign(a, b): for j in range(a.shape[1]): assert np.allclose(a[:, j], b[:, j]) or np.allclose(-a[:, j], b[:, j]) x0 = np.array([[-2.0, 0.0, 3.0], [-1.0, 2.0, 4.0]]) u0, s0, vt0 = np.linalg.svd(x0, full_matrices=False) x = BlockMatrix.from_numpy(x0) # _svd u, s, vt = x.svd() assert_same_columns_up_to_sign(u, u0) assert np.allclose(s, s0) assert_same_columns_up_to_sign(vt.T, vt0.T) s = x.svd(compute_uv=False) assert np.allclose(s, s0) # left _svd_gramian u, s, vt = x.svd(complexity_bound=0) assert_same_columns_up_to_sign(u, u0) assert np.allclose(s, s0) assert_same_columns_up_to_sign(vt.to_numpy().T, vt0.T) s = x.svd(compute_uv=False, complexity_bound=0) assert np.allclose(s, s0) # right _svd_gramian x = BlockMatrix.from_numpy(x0.T) u, s, vt = x.svd(complexity_bound=0) assert_same_columns_up_to_sign(u.to_numpy(), vt0.T) assert np.allclose(s, s0) assert_same_columns_up_to_sign(vt.T, u0) s = x.svd(compute_uv=False, complexity_bound=0) assert np.allclose(s, s0) # left _svd_gramian when dimensions agree x = BlockMatrix.from_numpy(x0[:, :2]) u, s, vt = x.svd(complexity_bound=0) assert isinstance(u, np.ndarray) assert isinstance(vt, BlockMatrix) # rank-deficient X sets negative eigenvalues to 0.0 a = np.array([[0.0, 1.0, np.e, np.pi, 10.0, 25.0]]) x0 = a.T @ a # rank 1 e, _ = np.linalg.eigh(x0 @ x0.T) x = BlockMatrix.from_numpy(x0) _, s, _ = x.svd(complexity_bound=0) assert np.all(s >= 0.0) s = x.svd(compute_uv=False, complexity_bound=0) assert np.all(s >= 0)
def test_write_overwrite(self): path = new_temp_file() bm = BlockMatrix.from_numpy(np.array([[0]])) bm.write(path) self.assertRaises(FatalError, lambda: bm.write(path)) bm2 = BlockMatrix.from_numpy(np.array([[1]])) bm2.write(path, overwrite=True) self._assert_eq(BlockMatrix.read(path), bm2)
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create_block_matrix(n_rows, n_cols, data.tolist(), row_major=True, block_size=4) a = data.reshape((n_rows, n_cols)) with tempfile.NamedTemporaryFile() as bm_f: with tempfile.NamedTemporaryFile() as a_f: bm.tofile(bm_f.name) a.tofile(a_f.name) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy() self.assertTrue(np.array_equal(a1, a)) self.assertTrue(np.array_equal(a2, a)) self.assertTrue(np.array_equal(a3, a)) self.assertTrue(np.array_equal(a4, a)) self.assertTrue(np.array_equal(a5, a)) bmT = bm.T aT = a.T with tempfile.NamedTemporaryFile() as bmT_f: with tempfile.NamedTemporaryFile() as aT_f: bmT.tofile(bmT_f.name) aT.tofile(aT_f.name) aT1 = bmT.to_numpy() aT2 = BlockMatrix.from_numpy(aT).to_numpy() aT3 = np.fromfile(bmT_f.name).reshape((n_cols, n_rows)) aT4 = BlockMatrix.fromfile(aT_f.name, n_cols, n_rows).to_numpy() aT5 = BlockMatrix.fromfile(bmT_f.name, n_cols, n_rows).to_numpy() self.assertTrue(np.array_equal(aT1, aT)) self.assertTrue(np.array_equal(aT2, aT)) self.assertTrue(np.array_equal(aT3, aT)) self.assertTrue(np.array_equal(aT4, aT)) self.assertTrue(np.array_equal(aT5, aT))
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), row_major=True, block_size=4) a = data.reshape((n_rows, n_cols)) with tempfile.NamedTemporaryFile() as bm_f: with tempfile.NamedTemporaryFile() as a_f: bm.tofile(bm_f.name) a.tofile(a_f.name) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy() self._assert_eq(a1, a) self._assert_eq(a2, a) self._assert_eq(a3, a) self._assert_eq(a4, a) self._assert_eq(a5, a) bmt = bm.T at = a.T with tempfile.NamedTemporaryFile() as bmt_f: with tempfile.NamedTemporaryFile() as at_f: bmt.tofile(bmt_f.name) at.tofile(at_f.name) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows)) at4 = BlockMatrix.fromfile(at_f.name, n_cols, n_rows).to_numpy() at5 = BlockMatrix.fromfile(bmt_f.name, n_cols, n_rows).to_numpy() self._assert_eq(at1, at) self._assert_eq(at2, at) self._assert_eq(at3, at) self._assert_eq(at4, at) self._assert_eq(at5, at)
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: bm_uri = new_temp_file() rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) (BlockMatrix.from_numpy(nd, block_size=block_size) .sparsify_rectangles(rects) .write(bm_uri, force_row_major=True)) BlockMatrix.export_rectangles(bm_uri, rect_uri, rects) for (i, r) in enumerate(rects): file = rect_path + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.loadtxt(file, ndmin = 2) self._assert_eq(expected, actual) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) BlockMatrix.export_rectangles(bm_uri, rect_uri_bytes, rects, binary=True) for (i, r) in enumerate(rects): file = rect_path_bytes + '/rect-' + str(i) + '_' + '-'.join(map(str, r)) expected = nd[r[0]:r[1], r[2]:r[3]] actual = np.reshape(np.fromfile(file), (r[1] - r[0], r[3] - r[2])) self._assert_eq(expected, actual) bm_uri = new_temp_file() rect_uri = new_temp_file() (BlockMatrix.from_numpy(nd, block_size=5) .sparsify_rectangles([[0, 1, 0, 1]]) .write(bm_uri, force_row_major=True)) with self.assertRaises(FatalError) as e: BlockMatrix.export_rectangles(bm_uri, rect_uri, [[5, 6, 5, 6]]) self.assertEquals(e.msg, 'block (1, 1) missing for rectangle 0 with bounds [5, 6, 5, 6]')
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) a = data.reshape((n_rows, n_cols)) with hl.TemporaryFilename() as bm_f, hl.TemporaryFilename() as a_f: bm.tofile(bm_f) a.tofile(a_f) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.frombuffer( hl.current_backend().fs.open(bm_f, mode='rb').read() ).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f, n_rows, n_cols).to_numpy() self._assert_eq(a1, a) self._assert_eq(a2, a) self._assert_eq(a3, a) self._assert_eq(a4, a) self._assert_eq(a5, a) bmt = bm.T at = a.T with hl.TemporaryFilename() as bmt_f, hl.TemporaryFilename() as at_f: bmt.tofile(bmt_f) at.tofile(at_f) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() at3 = np.frombuffer( hl.current_backend().fs.open(bmt_f, mode='rb').read() ).reshape((n_cols, n_rows)) at4 = BlockMatrix.fromfile(at_f, n_cols, n_rows).to_numpy() at5 = BlockMatrix.fromfile(bmt_f, n_cols, n_rows).to_numpy() self._assert_eq(at1, at) self._assert_eq(at2, at) self._assert_eq(at3, at) self._assert_eq(at4, at) self._assert_eq(at5, at) self._assert_eq(bm.to_numpy(_force_blocking=True), a)
def test_sparsify_triangle(self): nd = np.array([[ 1.0, 2.0, 3.0, 4.0], [ 5.0, 6.0, 7.0, 8.0], [ 9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self.assertFalse(bm.is_sparse) self.assertTrue(bm.sparsify_triangle().is_sparse) self._assert_eq( bm.sparsify_triangle(), np.array([[ 1., 2., 3., 4.], [ 0., 6., 7., 8.], [ 0., 0., 11., 12.], [ 0., 0., 0., 16.]])) self._assert_eq( bm.sparsify_triangle(lower=True), np.array([[ 1., 0., 0., 0.], [ 5., 6., 0., 0.], [ 9., 10., 11., 0.], [13., 14., 15., 16.]])) self._assert_eq( bm.sparsify_triangle(blocks_only=True), np.array([[ 1., 2., 3., 4.], [ 5., 6., 7., 8.], [ 0., 0., 11., 12.], [ 0., 0., 15., 16.]]))
def test_export_rectangles(self): nd = np.arange(0, 80, dtype=float).reshape(8, 10) rects1 = [[0, 1, 0, 1], [4, 5, 7, 8]] rects2 = [[4, 5, 0, 10], [0, 8, 4, 5]] rects3 = [[0, 1, 0, 1], [1, 2, 1, 2], [2, 3, 2, 3], [3, 5, 3, 6], [3, 6, 3, 7], [3, 7, 3, 8], [4, 5, 0, 10], [0, 8, 4, 5], [0, 8, 0, 10]] for rects in [rects1, rects2, rects3]: for block_size in [3, 4, 10]: rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) bm = BlockMatrix.from_numpy(nd, block_size=block_size) bm.export_rectangles(rect_uri, rects) self._assert_rectangles_eq(nd, rect_path, rects) rect_path_bytes = new_local_temp_dir() rect_uri_bytes = local_path_uri(rect_path_bytes) bm.export_rectangles(rect_uri_bytes, rects, binary=True) self._assert_rectangles_eq(nd, rect_path_bytes, rects, binary=True)
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] with hl.TemporaryDirectory() as rect_uri, hl.TemporaryDirectory() as rect_bytes_uri: BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_uri)) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_uri, binary=True))
def test_slicing(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) bm = BlockMatrix.from_numpy(nd, block_size=3) for indices in [(0, 0), (5, 7), (-3, 9), (-8, -10)]: self._assert_eq(bm[indices], nd[indices]) for indices in [(0, slice(3, 4)), (1, slice(3, 4)), (-8, slice(3, 4)), (-1, slice(3, 4))]: self._assert_eq(bm[indices], np.expand_dims(nd[indices], 0)) for indices in [(slice(3, 4), 0), (slice(3, 4), 1), (slice(3, 4), -8), (slice(3, 4), -1)]: self._assert_eq(bm[indices], np.expand_dims(nd[indices], 1)) for indices in [(slice(0, 8), slice(0, 10)), (slice(0, 8, 2), slice(0, 10, 2)), (slice(2, 4), slice(5, 7)), (slice(-8, -1), slice(-10, -1)), (slice(-8, -1, 2), slice(-10, -1, 2)), (slice(None, 4, 1), slice(None, 4, 1)), (slice(4, None), slice(4, None)), (slice(None, None), slice(None, None))]: self._assert_eq(bm[indices], nd[indices]) self.assertRaises(ValueError, lambda: bm[0, ]) self.assertRaises(ValueError, lambda: bm[9, 0]) self.assertRaises(ValueError, lambda: bm[-9, 0]) self.assertRaises(ValueError, lambda: bm[0, 11]) self.assertRaises(ValueError, lambda: bm[0, -11]) self.assertRaises(ValueError, lambda: bm[::-1, 0]) self.assertRaises(ValueError, lambda: bm[0, ::-1]) self.assertRaises(ValueError, lambda: bm[:0, 0]) self.assertRaises(ValueError, lambda: bm[0, :0]) self.assertRaises(ValueError, lambda: bm[0:9, 0]) self.assertRaises(ValueError, lambda: bm[-9:, 0]) self.assertRaises(ValueError, lambda: bm[:-9, 0]) self.assertRaises(ValueError, lambda: bm[0, :11]) self.assertRaises(ValueError, lambda: bm[0, -11:]) self.assertRaises(ValueError, lambda: bm[0, :-11]) bm2 = bm.sparsify_row_intervals([0, 0, 0, 0, 0, 0, 0, 0], [2, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(bm2[0, 1], 1.0) self.assertEqual(bm2[0, 2], 0.0) self.assertEqual(bm2[0, 9], 0.0) nd2 = np.zeros(shape=(8, 10)) nd2[0, 1] = 1.0 self._assert_eq(bm2[:, :], nd2) self._assert_eq(bm2[:, 1], nd2[:, 1:2]) self._assert_eq(bm2[1, :], nd2[1:2, :]) self._assert_eq(bm2[0:5, 0:5], nd2[0:5, 0:5])
def test_special_elementwise_ops(self): nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) m = BlockMatrix.from_numpy(nm) self._assert_close(m**3, nm**3) self._assert_close(m.sqrt(), np.sqrt(nm)) self._assert_close(m.log(), np.log(nm)) self._assert_close((m - 4).abs(), np.abs(nm - 4))
def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) with hl.TemporaryDirectory() as bm_uri: bm.export_blocks(bm_uri, binary=True) actual = BlockMatrix.rectangles_to_numpy(bm_uri, binary=True) self._assert_eq(nd, actual)
def test_block_matrix_from_numpy(self): ndarray = np.matrix([[0, 1, 2, 3, 4], [5, 6, 7, 8, 9], [10, 11, 12, 13, 14]], dtype=np.float64) for block_size in [1, 2, 5, 1024]: block_matrix = BlockMatrix.from_numpy(ndarray, block_size) assert (block_matrix.n_rows == 3) assert (block_matrix.n_cols == 5) assert (block_matrix.to_numpy() == ndarray).all()
def test_to_from_numpy(self): n_rows = 10 n_cols = 11 data = np.random.rand(n_rows * n_cols) bm = BlockMatrix._create(n_rows, n_cols, data.tolist(), block_size=4) a = data.reshape((n_rows, n_cols)) with tempfile.NamedTemporaryFile() as bm_f: with tempfile.NamedTemporaryFile() as a_f: bm.tofile(bm_f.name) a.tofile(a_f.name) a1 = bm.to_numpy() a2 = BlockMatrix.from_numpy(a, block_size=5).to_numpy() a3 = np.fromfile(bm_f.name).reshape((n_rows, n_cols)) a4 = BlockMatrix.fromfile(a_f.name, n_rows, n_cols, block_size=3).to_numpy() a5 = BlockMatrix.fromfile(bm_f.name, n_rows, n_cols).to_numpy() self._assert_eq(a1, a) self._assert_eq(a2, a) self._assert_eq(a3, a) self._assert_eq(a4, a) self._assert_eq(a5, a) bmt = bm.T at = a.T with tempfile.NamedTemporaryFile() as bmt_f: with tempfile.NamedTemporaryFile() as at_f: bmt.tofile(bmt_f.name) at.tofile(at_f.name) at1 = bmt.to_numpy() at2 = BlockMatrix.from_numpy(at).to_numpy() at3 = np.fromfile(bmt_f.name).reshape((n_cols, n_rows)) at4 = BlockMatrix.fromfile(at_f.name, n_cols, n_rows).to_numpy() at5 = BlockMatrix.fromfile(bmt_f.name, n_cols, n_rows).to_numpy() self._assert_eq(at1, at) self._assert_eq(at2, at) self._assert_eq(at3, at) self._assert_eq(at4, at) self._assert_eq(at5, at) self._assert_eq(bm.to_numpy(_force_blocking=True), a)
def test_sparsify_row_intervals(self): nd = np.array([[ 1.0, 2.0, 3.0, 4.0], [ 5.0, 6.0, 7.0, 8.0], [ 9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self._assert_eq( bm.sparsify_row_intervals( starts=[1, 0, 2, 2], stops= [2, 0, 3, 4]), np.array([[ 0., 2., 0., 0.], [ 0., 0., 0., 0.], [ 0., 0., 11., 0.], [ 0., 0., 15., 16.]])) self._assert_eq( bm.sparsify_row_intervals( starts=[1, 0, 2, 2], stops= [2, 0, 3, 4], blocks_only=True), np.array([[ 1., 2., 0., 0.], [ 5., 6., 0., 0.], [ 0., 0., 11., 12.], [ 0., 0., 15., 16.]])) nd2 = np.random.normal(size=(8, 10)) bm2 = BlockMatrix.from_numpy(nd2, block_size=3) for bounds in [[[0, 1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 8]], [[0, 0, 5, 3, 4, 5, 8, 2], [9, 0, 5, 3, 4, 5, 9, 5]], [[0, 5, 10, 8, 7, 6, 5, 4], [0, 5, 10, 9, 8, 7, 6, 5]]]: starts, stops = bounds actual = bm2.sparsify_row_intervals(starts, stops, blocks_only=False).to_numpy() expected = nd2.copy() for i in range(0, 8): for j in range(0, starts[i]): expected[i, j] = 0.0 for j in range(stops[i], 10): expected[i, j] = 0.0 self._assert_eq(actual, expected)
def test_matrix_ops(self): nm = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) m = BlockMatrix.from_numpy(nm, block_size=2) nsquare = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) square = BlockMatrix.from_numpy(nsquare, block_size=2) nrow = np.array([[7.0, 8.0, 9.0]]) row = BlockMatrix.from_numpy(nrow, block_size=2) self._assert_eq(m.T, nm.T) self._assert_eq(m.T, nm.T) self._assert_eq(row.T, nrow.T) self._assert_eq(m @ m.T, nm @ nm.T) self._assert_eq(m @ nm.T, nm @ nm.T) self._assert_eq(row @ row.T, nrow @ nrow.T) self._assert_eq(row @ nrow.T, nrow @ nrow.T) self._assert_eq(m.T @ m, nm.T @ nm) self._assert_eq(m.T @ nm, nm.T @ nm) self._assert_eq(row.T @ row, nrow.T @ nrow) self._assert_eq(row.T @ nrow, nrow.T @ nrow) self.assertRaises(ValueError, lambda: m @ m) self.assertRaises(ValueError, lambda: m @ nm) self._assert_eq(m.diagonal(), np.array([[1.0, 5.0]])) self._assert_eq(m.T.diagonal(), np.array([[1.0, 5.0]])) self._assert_eq((m @ m.T).diagonal(), np.array([[14.0, 77.0]])) self._assert_eq(m.sum(axis=0).T, np.array([[5.0], [7.0], [9.0]])) self._assert_eq(m.sum(axis=1).T, np.array([[6.0, 15.0]])) self._assert_eq( m.sum(axis=0).T + row, np.array([[12.0, 13.0, 14.0], [14.0, 15.0, 16.0], [16.0, 17.0, 18.0]])) self._assert_eq( m.sum(axis=0) + row.T, np.array([[12.0, 14.0, 16.0], [13.0, 15.0, 17.0], [14.0, 16.0, 18.0]])) self._assert_eq( square.sum(axis=0).T + square.sum(axis=1), np.array([[18.0], [30.0], [42.0]]))
def test_sum_with_sparsify(self): nd = np.zeros(shape=(5, 7)) nd[2, 4] = 1.0 nd[2, 5] = 2.0 nd[3, 4] = 3.0 nd[3, 5] = 4.0 bm = BlockMatrix.from_numpy(nd, block_size=2).sparsify_rectangles([[2, 4, 4, 6]]) bm2 = BlockMatrix.from_numpy(nd, block_size=2).sparsify_rectangles([[2, 4, 4, 6], [0, 5, 0, 1]]) bm3 = BlockMatrix.from_numpy(nd, block_size=2).sparsify_rectangles([[2, 4, 4, 6], [0, 1, 0, 7]]) nd4 = np.zeros(shape=(5, 7)) bm4 = BlockMatrix.fill(5, 7, value=0.0, block_size=2).sparsify_rectangles([]) self.assert_sums_agree(bm, nd) self.assert_sums_agree(bm2, nd) self.assert_sums_agree(bm3, nd) self.assert_sums_agree(bm4, nd4)
def test_export_blocks(self): nd = np.ones(shape=(8, 10)) bm = BlockMatrix.from_numpy(nd, block_size=20) bm_path = new_local_temp_dir() bm_uri = local_path_uri(bm_path) bm.export_blocks(bm_uri, binary=True) actual = BlockMatrix.rectangles_to_numpy(bm_path, binary=True) self._assert_eq(nd, actual)
def test_special_elementwise_ops(self): nm = np.array([[1.0, 2.0, 3.0, 3.14], [4.0, 5.0, 6.0, 12.12]]) m = BlockMatrix.from_numpy(nm) self._assert_close(m ** 3, nm ** 3) self._assert_close(m.sqrt(), np.sqrt(nm)) self._assert_close(m.ceil(), np.ceil(nm)) self._assert_close(m.floor(), np.floor(nm)) self._assert_close(m.log(), np.log(nm)) self._assert_close((m - 4).abs(), np.abs(nm - 4))
def test_slicing(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) bm = BlockMatrix.from_numpy(nd, block_size=3) for indices in [(0, 0), (5, 7), (-3, 9), (-8, -10)]: self._assert_eq(bm[indices], nd[indices]) for indices in [(0, slice(3, 4)), (1, slice(3, 4)), (-8, slice(3, 4)), (-1, slice(3, 4))]: self._assert_eq(bm[indices], np.expand_dims(nd[indices], 0)) self._assert_eq(bm[indices] - bm, nd[indices] - nd) self._assert_eq(bm - bm[indices], nd - nd[indices]) for indices in [(slice(3, 4), 0), (slice(3, 4), 1), (slice(3, 4), -8), (slice(3, 4), -1)]: self._assert_eq(bm[indices], np.expand_dims(nd[indices], 1)) self._assert_eq(bm[indices] - bm, nd[indices] - nd) self._assert_eq(bm - bm[indices], nd - nd[indices]) for indices in [(slice(0, 8), slice(0, 10)), (slice(0, 8, 2), slice(0, 10, 2)), (slice(2, 4), slice(5, 7)), (slice(-8, -1), slice(-10, -1)), (slice(-8, -1, 2), slice(-10, -1, 2)), (slice(None, 4, 1), slice(None, 4, 1)), (slice(4, None), slice(4, None)), (slice(None, None), slice(None, None))]: self._assert_eq(bm[indices], nd[indices]) self._assert_eq(bm[indices][:, :2], nd[indices][:, :2]) self._assert_eq(bm[indices][:2, :], nd[indices][:2, :]) self.assertRaises(ValueError, lambda: bm[0, ]) self.assertRaises(ValueError, lambda: bm[9, 0]) self.assertRaises(ValueError, lambda: bm[-9, 0]) self.assertRaises(ValueError, lambda: bm[0, 11]) self.assertRaises(ValueError, lambda: bm[0, -11]) self.assertRaises(ValueError, lambda: bm[::-1, 0]) self.assertRaises(ValueError, lambda: bm[0, ::-1]) self.assertRaises(ValueError, lambda: bm[:0, 0]) self.assertRaises(ValueError, lambda: bm[0, :0]) self.assertRaises(ValueError, lambda: bm[0:9, 0]) self.assertRaises(ValueError, lambda: bm[-9:, 0]) self.assertRaises(ValueError, lambda: bm[:-9, 0]) self.assertRaises(ValueError, lambda: bm[0, :11]) self.assertRaises(ValueError, lambda: bm[0, -11:]) self.assertRaises(ValueError, lambda: bm[0, :-11])
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) rect_bytes_path = new_local_temp_dir() rect_bytes_uri = local_path_uri(rect_bytes_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
def test_rectangles_to_numpy(self): nd = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]) rects = [[0, 3, 0, 1], [1, 2, 0, 2]] rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_uri, rects) rect_bytes_path = new_local_temp_dir() rect_bytes_uri = local_path_uri(rect_bytes_path) BlockMatrix.from_numpy(nd).export_rectangles(rect_bytes_uri, rects, binary=True) expected = np.array([[1.0, 0.0], [4.0, 5.0], [7.0, 0.0]]) self._assert_eq(expected, BlockMatrix.rectangles_to_numpy(rect_path)) self._assert_eq( expected, BlockMatrix.rectangles_to_numpy(rect_bytes_path, binary=True))
def test_sparsify_blocks(self): block_list = [1, 2] np_square = np.arange(16, dtype=np.float64).reshape((4, 4)) block_size = 2 bm = BlockMatrix.from_numpy(np_square, block_size=block_size) bm = bm._sparsify_blocks(block_list) sparse_numpy = sparsify_numpy(np_square, block_size, block_list) assert np.array_equal(bm.to_numpy(), sparse_numpy) assert np.array_equal( sparse_numpy, np.array([[0, 0, 2, 3], [0, 0, 6, 7], [8, 9, 0, 0], [12, 13, 0, 0]])) block_list = [4, 8, 10, 12, 13, 14] np_square = np.arange(225, dtype=np.float64).reshape((15, 15)) block_size = 4 bm = BlockMatrix.from_numpy(np_square, block_size=block_size) bm = bm._sparsify_blocks(block_list) sparse_numpy = sparsify_numpy(np_square, block_size, block_list) assert np.array_equal(bm.to_numpy(), sparse_numpy)
def test_sparsify_rectangles(self): nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self._assert_eq( bm.sparsify_rectangles([[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]]), np.array([[1., 2., 3., 4.], [5., 6., 7., 8.], [9., 10., 0., 0.], [13., 14., 0., 0.]])) self._assert_eq(bm.sparsify_rectangles([]), np.zeros(shape=(4, 4)))
def test_slicing(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) bm = BlockMatrix.from_numpy(nd, block_size=3) for indices in [(0, 0), (5, 7), (-3, 9), (-8, -10)]: self._assert_eq(bm[indices], nd[indices]) for indices in [(0, slice(3, 4)), (1, slice(3, 4)), (-8, slice(3, 4)), (-1, slice(3, 4))]: self._assert_eq(bm[indices], np.expand_dims(nd[indices], 0)) for indices in [(slice(3, 4), 0), (slice(3, 4), 1), (slice(3, 4), -8), (slice(3, 4), -1)]: self._assert_eq(bm[indices], np.expand_dims(nd[indices], 1)) for indices in [(slice(0, 8), slice(0, 10)), (slice(0, 8, 2), slice(0, 10, 2)), (slice(2, 4), slice(5, 7)), (slice(-8, -1), slice(-10, -1)), (slice(-8, -1, 2), slice(-10, -1, 2)), (slice(None, 4, 1), slice(None, 4, 1)), (slice(4, None), slice(4, None)), (slice(None, None), slice(None, None))]: self._assert_eq(bm[indices], nd[indices]) self.assertRaises(ValueError, lambda: bm[0, ]) self.assertRaises(ValueError, lambda: bm[9, 0]) self.assertRaises(ValueError, lambda: bm[-9, 0]) self.assertRaises(ValueError, lambda: bm[0, 11]) self.assertRaises(ValueError, lambda: bm[0, -11]) self.assertRaises(ValueError, lambda: bm[::-1, 0]) self.assertRaises(ValueError, lambda: bm[0, ::-1]) self.assertRaises(ValueError, lambda: bm[:0, 0]) self.assertRaises(ValueError, lambda: bm[0, :0]) self.assertRaises(ValueError, lambda: bm[0:9, 0]) self.assertRaises(ValueError, lambda: bm[-9:, 0]) self.assertRaises(ValueError, lambda: bm[:-9, 0]) self.assertRaises(ValueError, lambda: bm[0, :11]) self.assertRaises(ValueError, lambda: bm[0, -11:]) self.assertRaises(ValueError, lambda: bm[0, :-11])
def test_sparsify_rectangles(self): nd = np.array([[ 1.0, 2.0, 3.0, 4.0], [ 5.0, 6.0, 7.0, 8.0], [ 9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) self._assert_eq( bm.sparsify_rectangles([[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]]), np.array([[ 1., 2., 3., 4.], [ 5., 6., 7., 8.], [ 9., 10., 0., 0.], [13., 14., 0., 0.]])) self._assert_eq(bm.sparsify_rectangles([]), np.zeros(shape=(4, 4)))
def test_slices_with_sparsify(self): nd = np.array(np.arange(0, 80, dtype=float)).reshape(8, 10) bm = BlockMatrix.from_numpy(nd, block_size=3) bm2 = bm.sparsify_row_intervals([0, 0, 0, 0, 0, 0, 0, 0], [2, 0, 0, 0, 0, 0, 0, 0]) self.assertEqual(bm2[0, 1], 1.0) self.assertEqual(bm2[0, 2], 0.0) self.assertEqual(bm2[0, 9], 0.0) nd2 = np.zeros(shape=(8, 10)) nd2[0, 1] = 1.0 self._assert_eq(bm2[:, :], nd2) self._assert_eq(bm2[:, 1], nd2[:, 1:2]) self._assert_eq(bm2[1, :], nd2[1:2, :]) self._assert_eq(bm2[0:5, 0:5], nd2[0:5, 0:5])
def test_block_matrix_entries(self): n_rows, n_cols = 5, 3 rows = [{'i': i, 'j': j, 'entry': float(i + j)} for i in range(n_rows) for j in range(n_cols)] schema = hl.tstruct(i=hl.tint32, j=hl.tint32, entry=hl.tfloat64) table = hl.Table.parallelize([hl.struct(i=row['i'], j=row['j'], entry=row['entry']) for row in rows], schema) table = table.annotate(i=hl.int64(table.i), j=hl.int64(table.j)).key_by('i', 'j') ndarray = np.reshape(list(map(lambda row: row['entry'], rows)), (n_rows, n_cols)) for block_size in [1, 2, 1024]: block_matrix = BlockMatrix.from_numpy(ndarray, block_size) entries_table = block_matrix.entries() self.assertEqual(entries_table.count(), n_cols * n_rows) self.assertEqual(len(entries_table.row), 3) self.assertTrue(table._same(entries_table))
def test_export_rectangles_filtered(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd) bm = bm[1:3, 1:3] export_rects = [[0, 1, 0, 2], [1, 2, 0, 2]] bm.export_rectangles(rect_uri, export_rects) expected = np.array([[6.0, 7.0], [10.0, 11.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def test_export_rectangles_sparse(self): rect_path = new_local_temp_dir() rect_uri = local_path_uri(rect_path) nd = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0], [13.0, 14.0, 15.0, 16.0]]) bm = BlockMatrix.from_numpy(nd, block_size=2) sparsify_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4]] export_rects = [[0, 1, 0, 1], [0, 3, 0, 2], [1, 2, 0, 4], [2, 4, 2, 4]] bm.sparsify_rectangles(sparsify_rects).export_rectangles(rect_uri, export_rects) expected = np.array([[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 0.0, 0.0], [13.0, 14.0, 0.0, 0.0]]) self._assert_rectangles_eq(expected, rect_path, export_rects)
def from_kinship(cls, y, x, k, p_path=None, overwrite=False): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`K`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> k = np.array([[ 1. , -0.8727875 , 0.96397335, 0.94512946], ... [-0.8727875 , 1. , -0.93036112, -0.97320323], ... [ 0.96397335, -0.93036112, 1. , 0.98294169], ... [ 0.94512946, -0.97320323, 0.98294169, 1. ]]) >>> model, p = LinearMixedModel.from_kinship(y, x, k) >>> model.fit() >>> model.h_sq 0.2525148830695317 >>> model.s array([3.83501295, 0.13540343, 0.02454114, 0.00504248]) Truncate to a rank :math:`r=2` model: >>> r = 2 >>> s_r = model.s[:r] >>> p_r = p[:r, :] >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) >>> model.fit() >>> model.h_sq 0.25193197591429695 Notes ----- This method eigendecomposes :math:`K = P^T S P` on the master and returns ``LinearMixedModel(p @ y, p @ x, s)`` and ``p``. The performance of eigendecomposition depends critically on the number of master cores and the NumPy / SciPy configuration, viewable with ``np.show_config()``. For Intel machines, we recommend installing the `MKL <https://anaconda.org/anaconda/mkl>`__ package for Anaconda, as is done by `cloudtools <https://github.com/Nealelab/cloudtools>`__. `k` must be positive semi-definite; symmetry is not checked as only the lower triangle is used. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects. k: :class:`ndarray` :math:`n \times n` positive semi-definite kernel :math:`K`. p_path: :obj:`str`, optional Path at which to write :math:`P` as a block matrix. overwrite: :obj:`bool` If ``True``, overwrite an existing file at `p_path`. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`K`. p: :class:`ndarray` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. """ _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(k, "k", 2) n = k.shape[0] if k.shape[1] != n: raise ValueError("from_kinship: 'k' must be a square matrix") if y.shape[0] != n: raise ValueError("from_kinship: 'y' and 'k' must have the same " "number of rows") if x.shape[0] != n: raise ValueError("from_kinship: 'x' and 'k' must have the same " "number of rows") s, u = hl.linalg._eigh(k) if s[0] < -1e12 * s[-1]: raise Exception("from_kinship: smallest eigenvalue of 'k' is" f"negative: {s[0]}") # flip singular values to descending order s = np.flip(s, axis=0) u = np.fliplr(u) p = u.T if p_path: BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite) model = LinearMixedModel(p @ y, p @ x, s, p_path=p_path) return model, p
def from_random_effects(cls, y, x, z, p_path=None, overwrite=False, max_condition_number=1e-10, complexity_bound=8192): r"""Initializes a model from :math:`y`, :math:`X`, and :math:`Z`. Examples -------- >>> from hail.stats import LinearMixedModel >>> y = np.array([0.0, 1.0, 8.0, 9.0]) >>> x = np.array([[1.0, 0.0], ... [1.0, 2.0], ... [1.0, 1.0], ... [1.0, 4.0]]) >>> z = np.array([[0.0, 0.0, 1.0], ... [0.0, 1.0, 2.0], ... [1.0, 2.0, 4.0], ... [2.0, 4.0, 8.0]]) >>> model, p = LinearMixedModel.from_random_effects(y, x, z) >>> model.fit() >>> model.h_sq 0.38205307244271675 Notes ----- If :math:`n \leq m`, the returned model is full rank. If :math:`n > m`, the returned model is low rank. In this case only, eigenvalues less than or equal to `max_condition_number` times the top eigenvalue are dropped from :math:`S`, with the corresponding eigenvectors dropped from :math:`P`. This guards against precision loss on left eigenvectors computed via the right gramian :math:`Z^T Z` in :meth:`BlockMatrix.svd`. In either case, one can truncate to a rank :math:`r` model as follows. If `p` is an ndarray: >>> p_r = p[:r, :] # doctest: +SKIP >>> s_r = model.s[:r] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x) # doctest: +SKIP If `p` is a block matrix: >>> p[:r, :].write(p_r_path) # doctest: +SKIP >>> p_r = BlockMatrix.read(p_r_path) # doctest: +SKIP >>> s_r = model.s[:r] # doctest: +SKIP >>> model_r = LinearMixedModel(p_r @ y, p_r @ x, s_r, y, x, p_r_path) # doctest: +SKIP This method applies no standardization to `z`. Warning ------- If `z` is a block matrix, then ideally `z` should be the result of directly reading from disk (and possibly a transpose). This is most critical if :math:`n > m`, because in this case multiplication by `z` will result in all preceding transformations being repeated ``n / block_size`` times, as explained in :class:`.BlockMatrix`. At least one dimension must be less than or equal to 46300. See the warning in :meth:`.BlockMatrix.svd` for performance considerations. Parameters ---------- y: :class:`ndarray` :math:`n` vector of observations :math:`y`. x: :class:`ndarray` :math:`n \times p` matrix of fixed effects :math:`X`. z: :class:`ndarray` or :class:`BlockMatrix` :math:`n \times m` matrix of random effects :math:`Z`. p_path: :obj:`str`, optional Path at which to write :math:`P` as a block matrix. Required if `z` is a block matrix. overwrite: :obj:`bool` If ``True``, overwrite an existing file at `p_path`. max_condition_number: :obj:`float` Maximum condition number. Must be greater than 1e-16. complexity_bound: :obj:`int` Complexity bound for :meth:`.BlockMatrix.svd` when `z` is a block matrix. Returns ------- model: :class:`LinearMixedModel` Model constructed from :math:`y`, :math:`X`, and :math:`Z`. p: :class:`ndarray` or :class:`.BlockMatrix` Matrix :math:`P` whose rows are the eigenvectors of :math:`K`. The type is block matrix if `z` is a block matrix and :meth:`.BlockMatrix.svd` of `z` returns :math:`U` as a block matrix. """ z_is_bm = isinstance(z, BlockMatrix) if z_is_bm and p_path is None: raise ValueError("from_random_effects: 'p_path' required when 'z'" "is a block matrix.") if max_condition_number < 1e-16: raise ValueError("from_random_effects: 'max_condition_number' must " f"be at least 1e-16, found {max_condition_number}") _check_dims(y, "y", 1) _check_dims(x, "x", 2) _check_dims(z, "z", 2) n, m = z.shape if y.shape[0] != n: raise ValueError("from_random_effects: 'y' and 'z' must have the " "same number of rows") if x.shape[0] != n: raise ValueError("from_random_effects: 'x' and 'z' must have the " "same number of rows") if z_is_bm: u, s0, _ = z.svd(complexity_bound=complexity_bound) p = u.T p_is_bm = isinstance(p, BlockMatrix) else: u, s0, _ = hl.linalg._svd(z, full_matrices=False) p = u.T p_is_bm = False s = s0 ** 2 low_rank = n > m if low_rank: assert np.all(np.isfinite(s)) r = np.searchsorted(-s, -max_condition_number * s[0]) if r < m: info(f'from_random_effects: model rank reduced from {m} to {r} ' f'due to ill-condition.' f'\n Largest dropped eigenvalue was {s[r]}.') s = s[:r] p = p[:r, :] if p_path is not None: if p_is_bm: p.write(p_path, overwrite=overwrite) p = BlockMatrix.read(p_path) else: BlockMatrix.from_numpy(p).write(p_path, overwrite=overwrite) if p_is_bm: py, px = (p @ y.reshape(n, 1)).to_numpy().flatten(), (p @ x).to_numpy() else: py, px = p @ y, p @ x if low_rank: model = LinearMixedModel(py, px, s, y, x, p_path) else: model = LinearMixedModel(py, px, s, p_path=p_path) return model, p
def test_elementwise_ops(self): nx = np.matrix([[2.0]]) nc = np.matrix([[1.0], [2.0]]) nr = np.matrix([[1.0, 2.0, 3.0]]) nm = np.matrix([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]]) e = 2.0 x = BlockMatrix.from_numpy(nx, block_size=8) c = BlockMatrix.from_numpy(nc, block_size=8) r = BlockMatrix.from_numpy(nr, block_size=8) m = BlockMatrix.from_numpy(nm, block_size=8) self.assertRaises(TypeError, lambda: x + np.array(['one'], dtype=str)) self._assert_eq(+m, 0 + m) self._assert_eq(-m, 0 - m) # addition self._assert_eq(x + e, nx + e) self._assert_eq(c + e, nc + e) self._assert_eq(r + e, nr + e) self._assert_eq(m + e, nm + e) self._assert_eq(x + e, e + x) self._assert_eq(c + e, e + c) self._assert_eq(r + e, e + r) self._assert_eq(m + e, e + m) self._assert_eq(x + x, 2 * x) self._assert_eq(c + c, 2 * c) self._assert_eq(r + r, 2 * r) self._assert_eq(m + m, 2 * m) self._assert_eq(x + c, np.matrix([[3.0], [4.0]])) self._assert_eq(x + r, np.matrix([[3.0, 4.0, 5.0]])) self._assert_eq(x + m, np.matrix([[3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])) self._assert_eq(c + m, np.matrix([[2.0, 3.0, 4.0], [6.0, 7.0, 8.0]])) self._assert_eq(r + m, np.matrix([[2.0, 4.0, 6.0], [5.0, 7.0, 9.0]])) self._assert_eq(x + c, c + x) self._assert_eq(x + r, r + x) self._assert_eq(x + m, m + x) self._assert_eq(c + m, m + c) self._assert_eq(r + m, m + r) self._assert_eq(x + nx, x + x) self._assert_eq(x + nc, x + c) self._assert_eq(x + nr, x + r) self._assert_eq(x + nm, x + m) self._assert_eq(c + nx, c + x) self._assert_eq(c + nc, c + c) self._assert_eq(c + nm, c + m) self._assert_eq(r + nx, r + x) self._assert_eq(r + nr, r + r) self._assert_eq(r + nm, r + m) self._assert_eq(m + nx, m + x) self._assert_eq(m + nc, m + c) self._assert_eq(m + nr, m + r) self._assert_eq(m + nm, m + m) # subtraction self._assert_eq(x - e, nx - e) self._assert_eq(c - e, nc - e) self._assert_eq(r - e, nr - e) self._assert_eq(m - e, nm - e) self._assert_eq(x - e, -(e - x)) self._assert_eq(c - e, -(e - c)) self._assert_eq(r - e, -(e - r)) self._assert_eq(m - e, -(e - m)) self._assert_eq(x - x, np.zeros((1, 1))) self._assert_eq(c - c, np.zeros((2, 1))) self._assert_eq(r - r, np.zeros((1, 3))) self._assert_eq(m - m, np.zeros((2, 3))) self._assert_eq(x - c, np.matrix([[1.0], [0.0]])) self._assert_eq(x - r, np.matrix([[1.0, 0.0, -1.0]])) self._assert_eq(x - m, np.matrix([[1.0, 0.0, -1.0], [-2.0, -3.0, -4.0]])) self._assert_eq(c - m, np.matrix([[0.0, -1.0, -2.0], [-2.0, -3.0, -4.0]])) self._assert_eq(r - m, np.matrix([[0.0, 0.0, 0.0], [-3.0, -3.0, -3.0]])) self._assert_eq(x - c, -(c - x)) self._assert_eq(x - r, -(r - x)) self._assert_eq(x - m, -(m - x)) self._assert_eq(c - m, -(m - c)) self._assert_eq(r - m, -(m - r)) self._assert_eq(x - nx, x - x) self._assert_eq(x - nc, x - c) self._assert_eq(x - nr, x - r) self._assert_eq(x - nm, x - m) self._assert_eq(c - nx, c - x) self._assert_eq(c - nc, c - c) self._assert_eq(c - nm, c - m) self._assert_eq(r - nx, r - x) self._assert_eq(r - nr, r - r) self._assert_eq(r - nm, r - m) self._assert_eq(m - nx, m - x) self._assert_eq(m - nc, m - c) self._assert_eq(m - nr, m - r) self._assert_eq(m - nm, m - m) # multiplication self._assert_eq(x * e, nx * e) self._assert_eq(c * e, nc * e) self._assert_eq(r * e, nr * e) self._assert_eq(m * e, nm * e) self._assert_eq(x * e, e * x) self._assert_eq(c * e, e * c) self._assert_eq(r * e, e * r) self._assert_eq(m * e, e * m) self._assert_eq(x * x, x ** 2) self._assert_eq(c * c, c ** 2) self._assert_eq(r * r, r ** 2) self._assert_eq(m * m, m ** 2) self._assert_eq(x * c, np.matrix([[2.0], [4.0]])) self._assert_eq(x * r, np.matrix([[2.0, 4.0, 6.0]])) self._assert_eq(x * m, np.matrix([[2.0, 4.0, 6.0], [8.0, 10.0, 12.0]])) self._assert_eq(c * m, np.matrix([[1.0, 2.0, 3.0], [8.0, 10.0, 12.0]])) self._assert_eq(r * m, np.matrix([[1.0, 4.0, 9.0], [4.0, 10.0, 18.0]])) self._assert_eq(x * c, c * x) self._assert_eq(x * r, r * x) self._assert_eq(x * m, m * x) self._assert_eq(c * m, m * c) self._assert_eq(r * m, m * r) self._assert_eq(x * nx, x * x) self._assert_eq(x * nc, x * c) self._assert_eq(x * nr, x * r) self._assert_eq(x * nm, x * m) self._assert_eq(c * nx, c * x) self._assert_eq(c * nc, c * c) self._assert_eq(c * nm, c * m) self._assert_eq(r * nx, r * x) self._assert_eq(r * nr, r * r) self._assert_eq(r * nm, r * m) self._assert_eq(m * nx, m * x) self._assert_eq(m * nc, m * c) self._assert_eq(m * nr, m * r) self._assert_eq(m * nm, m * m) # division self._assert_close(x / e, nx / e) self._assert_close(c / e, nc / e) self._assert_close(r / e, nr / e) self._assert_close(m / e, nm / e) self._assert_close(x / e, 1 / (e / x)) self._assert_close(c / e, 1 / (e / c)) self._assert_close(r / e, 1 / (e / r)) self._assert_close(m / e, 1 / (e / m)) self._assert_close(x / x, np.ones((1, 1))) self._assert_close(c / c, np.ones((2, 1))) self._assert_close(r / r, np.ones((1, 3))) self._assert_close(m / m, np.ones((2, 3))) self._assert_close(x / c, np.matrix([[2 / 1.0], [2 / 2.0]])) self._assert_close(x / r, np.matrix([[2 / 1.0, 2 / 2.0, 2 / 3.0]])) self._assert_close(x / m, np.matrix([[2 / 1.0, 2 / 2.0, 2 / 3.0], [2 / 4.0, 2 / 5.0, 2 / 6.0]])) self._assert_close(c / m, np.matrix([[1 / 1.0, 1 / 2.0, 1 / 3.0], [2 / 4.0, 2 / 5.0, 2 / 6.0]])) self._assert_close(r / m, np.matrix([[1 / 1.0, 2 / 2.0, 3 / 3.0], [1 / 4.0, 2 / 5.0, 3 / 6.0]])) self._assert_close(x / c, 1 / (c / x)) self._assert_close(x / r, 1 / (r / x)) self._assert_close(x / m, 1 / (m / x)) self._assert_close(c / m, 1 / (m / c)) self._assert_close(r / m, 1 / (m / r)) self._assert_close(x / nx, x / x) self._assert_close(x / nc, x / c) self._assert_close(x / nr, x / r) self._assert_close(x / nm, x / m) self._assert_close(c / nx, c / x) self._assert_close(c / nc, c / c) self._assert_close(c / nm, c / m) self._assert_close(r / nx, r / x) self._assert_close(r / nr, r / r) self._assert_close(r / nm, r / m) self._assert_close(m / nx, m / x) self._assert_close(m / nc, m / c) self._assert_close(m / nr, m / r) self._assert_close(m / nm, m / m)
def test_sum(self): nd = np.random.normal(size=(11, 13)) bm = BlockMatrix.from_numpy(nd, block_size=3) self.assert_sums_agree(bm, nd)
def test_linear_mixed_model_math(self): gamma = 2.0 # testing at fixed value of gamma n, f, m = 4, 2, 3 y = np.array([0.0, 1.0, 8.0, 9.0]) x = np.array([[1.0, 0.0], [1.0, 2.0], [1.0, 1.0], [1.0, 4.0]]) z = np.array([[0.0, 0.0, 1.0], [0.0, 1.0, 2.0], [1.0, 2.0, 4.0], [2.0, 4.0, 8.0]]) k = z @ z.T v = k + np.eye(4) / gamma v_inv = np.linalg.inv(v) beta = np.linalg.solve(x.T @ v_inv @ x, x.T @ v_inv @ y) residual = y - x @ beta sigma_sq = 1 / (n - f) * (residual @ v_inv @ residual) sv = sigma_sq * v neg_log_lkhd = 0.5 * (np.linalg.slogdet(sv)[1] + np.linalg.slogdet(x.T @ np.linalg.inv(sv) @ x)[1]) # plus C x_star = np.array([1.0, 0.0, 1.0, 0.0]) a = x_star.reshape(n, 1) x1 = np.hstack([a, x]) beta1 = np.linalg.solve(x1.T @ v_inv @ x1, x1.T @ v_inv @ y) residual1 = y - x1 @ beta1 chi_sq = n * np.log((residual @ v_inv @ residual) / (residual1 @ v_inv @ residual1)) # test from_kinship, full-rank fit model, p = LinearMixedModel.from_kinship(y, x, k) s0, u0 = np.linalg.eigh(k) s0 = np.flip(s0, axis=0) p0 = np.fliplr(u0).T self.assertTrue(model._same(LinearMixedModel(p0 @ y, p0 @ x, s0))) model.fit(np.log(gamma)) self.assertTrue(np.allclose(model.beta, beta)) self.assertAlmostEqual(model.sigma_sq, sigma_sq) self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)), neg_log_lkhd) # test full-rank alternative pa = p @ a stats = model.fit_alternatives_numpy(pa).collect()[0] self.assertAlmostEqual(stats.beta, beta1[0]) self.assertAlmostEqual(stats.chi_sq, chi_sq) pa_t_path = utils.new_temp_file() BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True) stats = model.fit_alternatives(pa_t_path).collect()[0] self.assertAlmostEqual(stats.beta, beta1[0]) self.assertAlmostEqual(stats.chi_sq, chi_sq) # test from_random_effects, low-rank fit s0, p0 = s0[:m], p0[:m, :] # test BlockMatrix path temp_path = utils.new_temp_file() model, _ = LinearMixedModel.from_random_effects(y, x, BlockMatrix.from_numpy(z), p_path=temp_path, complexity_bound=0) lmm = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x, p_path=temp_path) self.assertTrue(model._same(lmm)) # test ndarray path model, p = LinearMixedModel.from_random_effects(y, x, z) lmm = LinearMixedModel(p0 @ y, p0 @ x, s0, y, x) self.assertTrue(model._same(lmm)) model.fit(np.log(gamma)) self.assertTrue(np.allclose(model.beta, beta)) self.assertAlmostEqual(model.sigma_sq, sigma_sq) self.assertAlmostEqual(model.compute_neg_log_reml(np.log(gamma)), neg_log_lkhd) # test low_rank alternative pa = p @ a stats = model.fit_alternatives_numpy(pa, a).collect()[0] self.assertAlmostEqual(stats.beta, beta1[0]) self.assertAlmostEqual(stats.chi_sq, chi_sq) a_t_path = utils.new_temp_file() BlockMatrix.from_numpy(a.T).write(a_t_path, force_row_major=True) pa_t_path = utils.new_temp_file() BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True) stats = model.fit_alternatives(pa_t_path, a_t_path).collect()[0] self.assertAlmostEqual(stats.beta, beta1[0]) self.assertAlmostEqual(stats.chi_sq, chi_sq)
def test_linear_mixed_model_fastlmm(self): # FastLMM Test data is from all.bed, all.bim, all.fam, cov.txt, pheno_10_causals.txt: # https://github.com/MicrosoftGenomics/FaST-LMM/tree/master/tests/datasets/synth # # Data is filtered to chromosome 1,3 and samples 0-124,375-499 (2000 variants and 250 samples) # # Results are computed with single_snp (with LOCO) as in: # https://github.com/MicrosoftGenomics/FaST-LMM/blob/master/doc/ipynb/FaST-LMM.ipynb n, m = 250, 1000 # per chromosome x_table = hl.import_table(resource('fastlmmCov.txt'), no_header=True, impute=True).key_by('f1') y_table = hl.import_table(resource('fastlmmPheno.txt'), no_header=True, impute=True, delimiter=' ').key_by('f1') mt = hl.import_plink(bed=resource('fastlmmTest.bed'), bim=resource('fastlmmTest.bim'), fam=resource('fastlmmTest.fam'), reference_genome=None) mt = mt.annotate_cols(x=x_table[mt.col_key].f2) mt = mt.annotate_cols(y=y_table[mt.col_key].f2).cache() x = np.array([np.ones(n), mt.key_cols_by()['x'].collect()]).T y = np.array(mt.key_cols_by()['y'].collect()) mt_chr1 = mt.filter_rows(mt.locus.contig == '1') mt_chr3 = mt.filter_rows(mt.locus.contig == '3') # testing chrom 1 for h2, betas, p-values h2_fastlmm = 0.14276125 beta_fastlmm = [0.012202061, 0.037718282, -0.033572693, 0.29171541, -0.045644170] # FastLMM p-values do not agree to high precision because FastLMM regresses # out x from each SNP first and does an F(1, dof)-test on (beta / se)^2 # (t-test), whereas Hail does likelihood ratio test. # We verify below that Hail's p-values remain fixed going forward. # fastlmm = [0.84650294, 0.57865098, 0.59050998, 1.6649473e-06, 0.46892059] pval_hail = [0.84543084, 0.57596760, 0.58788517, 1.4057279e-06, 0.46578204] gamma_fastlmm = h2_fastlmm / (1 - h2_fastlmm) g = BlockMatrix.from_entry_expr(mt_chr1.GT.n_alt_alleles()).to_numpy().T g_std = self._filter_and_standardize_cols(g) # full rank k = (g_std @ g_std.T) * (n / m) s, u = np.linalg.eigh(k) p = u.T model = LinearMixedModel(p @ y, p @ x, s) model.fit() assert np.isclose(model.h_sq, h2_fastlmm) h2_std_error = 0.13770773 # hard coded having checked against plot assert np.isclose(model.h_sq_standard_error, h2_std_error) h_sq_norm_lkhd = model.h_sq_normalized_lkhd()[1:-1] argmax = int(100 * h2_fastlmm) assert argmax <= np.argmax(h_sq_norm_lkhd) + 1 <= argmax + 1 assert np.isclose(np.sum(h_sq_norm_lkhd), 1.0) mt3_chr3_5var = mt_chr3.filter_rows(mt_chr3.locus.position < 2005) # first 5 a = BlockMatrix.from_entry_expr(mt3_chr3_5var.GT.n_alt_alleles()).to_numpy().T # FastLMM standardizes each variant to have mean 0 and variance 1. a = self._filter_and_standardize_cols(a) * np.sqrt(n) pa = p @ a model.fit(log_gamma=np.log(gamma_fastlmm)) res = model.fit_alternatives_numpy(pa, return_pandas=True) assert np.allclose(res['beta'], beta_fastlmm) assert np.allclose(res['p_value'], pval_hail) pa_t_path = utils.new_temp_file(suffix='bm') BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True) res = model.fit_alternatives(pa_t_path).to_pandas() assert np.allclose(res['beta'], beta_fastlmm) assert np.allclose(res['p_value'], pval_hail) # low rank ld = g_std.T @ g_std sl, v = np.linalg.eigh(ld) n_eigenvectors = int(np.sum(sl > 1e-10)) assert n_eigenvectors < n sl = sl[-n_eigenvectors:] v = v[:, -n_eigenvectors:] s = sl * (n / m) p = (g_std @ (v / np.sqrt(sl))).T model = LinearMixedModel(p @ y, p @ x, s, y, x) model.fit() assert np.isclose(model.h_sq, h2_fastlmm) assert np.isclose(model.h_sq_standard_error, h2_std_error) model.fit(log_gamma=np.log(gamma_fastlmm)) pa = p @ a res = model.fit_alternatives_numpy(pa, a, return_pandas=True) assert np.allclose(res['beta'], beta_fastlmm) assert np.allclose(res['p_value'], pval_hail) a_t_path = utils.new_temp_file(suffix='bm') BlockMatrix.from_numpy(a.T).write(a_t_path, force_row_major=True) pa_t_path = utils.new_temp_file(suffix='bm') BlockMatrix.from_numpy(pa.T).write(pa_t_path, force_row_major=True) res = model.fit_alternatives(pa_t_path, a_t_path).to_pandas() assert np.allclose(res['beta'], beta_fastlmm) assert np.allclose(res['p_value'], pval_hail) # testing chrom 3 for h2 h2_fastlmm = 0.36733240 g = BlockMatrix.from_entry_expr(mt_chr3.GT.n_alt_alleles()).to_numpy().T g_std = self._filter_and_standardize_cols(g) # full rank k = (g_std @ g_std.T) * (n / m) s, u = np.linalg.eigh(k) p = u.T model = LinearMixedModel(p @ y, p @ x, s) model.fit() assert np.isclose(model.h_sq, h2_fastlmm) h2_std_error = 0.17409641 # hard coded having checked against plot assert np.isclose(model.h_sq_standard_error, h2_std_error) h_sq_norm_lkhd = model.h_sq_normalized_lkhd()[1:-1] argmax = int(100 * h2_fastlmm) assert argmax <= np.argmax(h_sq_norm_lkhd) + 1 <= argmax + 1 assert np.isclose(np.sum(h_sq_norm_lkhd), 1.0) # low rank l = g_std.T @ g_std sl, v = np.linalg.eigh(l) n_eigenvectors = int(np.sum(sl > 1e-10)) assert n_eigenvectors < n sl = sl[-n_eigenvectors:] v = v[:, -n_eigenvectors:] s = sl * (n / m) p = (g_std @ (v / np.sqrt(sl))).T model = LinearMixedModel(p @ y, p @ x, s, y, x) model.fit() assert np.isclose(model.h_sq, h2_fastlmm) assert np.isclose(model.h_sq_standard_error, h2_std_error)