def test_singular_values(self): # Check that the PCA output has the correct singular values rng = np.random.RandomState(0) n_samples = 100 n_features = 80 X = mt.tensor(rng.randn(n_samples, n_features)) pca = PCA(n_components=2, svd_solver='full', random_state=rng).fit(X) rpca = PCA(n_components=2, svd_solver='randomized', random_state=rng).fit(X) assert_array_almost_equal(pca.singular_values_.fetch(), rpca.singular_values_.fetch(), 1) # Compare to the Frobenius norm X_pca = pca.transform(X) X_rpca = rpca.transform(X) assert_array_almost_equal( mt.sum(pca.singular_values_**2.0).to_numpy(), (mt.linalg.norm(X_pca, "fro")**2.0).to_numpy(), 12) assert_array_almost_equal( mt.sum(rpca.singular_values_**2.0).to_numpy(), (mt.linalg.norm(X_rpca, "fro")**2.0).to_numpy(), 0) # Compare to the 2-norms of the score vectors assert_array_almost_equal( pca.singular_values_.fetch(), mt.sqrt(mt.sum(X_pca**2.0, axis=0)).to_numpy(), 12) assert_array_almost_equal( rpca.singular_values_.fetch(), mt.sqrt(mt.sum(X_rpca**2.0, axis=0)).to_numpy(), 2) # Set the singular values and see what we get back rng = np.random.RandomState(0) n_samples = 100 n_features = 110 X = mt.tensor(rng.randn(n_samples, n_features)) pca = PCA(n_components=3, svd_solver='full', random_state=rng) rpca = PCA(n_components=3, svd_solver='randomized', random_state=rng) X_pca = pca.fit_transform(X) X_pca /= mt.sqrt(mt.sum(X_pca**2.0, axis=0)) X_pca[:, 0] *= 3.142 X_pca[:, 1] *= 2.718 X_hat = mt.dot(X_pca, pca.components_) pca.fit(X_hat) rpca.fit(X_hat) assert_array_almost_equal(pca.singular_values_.fetch(), [3.142, 2.718, 1.0], 14) assert_array_almost_equal(rpca.singular_values_.fetch(), [3.142, 2.718, 1.0], 14)
def testMockExecuteSize(self): import mars.tensor as mt from mars.core.graph import DAG from mars.tensor.fetch import TensorFetch from mars.tensor.arithmetic import TensorTreeAdd graph_add = DAG() input_chunks = [] for _ in range(2): fetch_op = TensorFetch(dtype=np.dtype('int64')) inp_chunk = fetch_op.new_chunk(None, shape=(100, 100)).data input_chunks.append(inp_chunk) add_op = TensorTreeAdd(args=input_chunks, dtype=np.dtype('int64')) add_chunk = add_op.new_chunk(input_chunks, shape=(100, 100), dtype=np.dtype('int64')).data graph_add.add_node(add_chunk) for inp_chunk in input_chunks: graph_add.add_node(inp_chunk) graph_add.add_edge(inp_chunk, add_chunk) executor = Executor() res = executor.execute_graph(graph_add, [add_chunk.key], compose=False, mock=True)[0] self.assertEqual(res, (80000, 80000)) self.assertEqual(executor.mock_max_memory, 80000) for _ in range(3): new_add_op = TensorTreeAdd(args=[add_chunk], dtype=np.dtype('int64')) new_add_chunk = new_add_op.new_chunk([add_chunk], shape=(100, 100), dtype=np.dtype('int64')).data graph_add.add_node(new_add_chunk) graph_add.add_edge(add_chunk, new_add_chunk) add_chunk = new_add_chunk executor = Executor() res = executor.execute_graph(graph_add, [add_chunk.key], compose=False, mock=True)[0] self.assertEqual(res, (80000, 80000)) self.assertEqual(executor.mock_max_memory, 160000) a = mt.random.rand(10, 10, chunk_size=10) b = a[:, mt.newaxis, :] - a r = mt.triu(mt.sqrt(b**2).sum(axis=2)) executor = Executor() res = executor.execute_tensor(r, concat=False, mock=True) # larger than maximal memory size in calc procedure self.assertGreaterEqual(res[0][0], 800) self.assertGreaterEqual(executor.mock_max_memory, 8000)
def testMockExecuteSize(self): a = mt.random.rand(10, 10, chunk_size=10) b = a[:, mt.newaxis, :] - a r = mt.triu(mt.sqrt(b**2).sum(axis=2)) executor = Executor() res = executor.execute_tensor(r, concat=False, mock=True) # larger than maximal memory size in calc procedure self.assertGreaterEqual(res[0][0], 800) self.assertGreaterEqual(res[0][1], 8000)
def testElementwise(self): t1 = ones((10000, 5000), chunk_size=500, gpu=True) t2 = ones(5000, chunk_size=500, gpu=True) t = (t1 - t2) / sqrt(t2 * (1 - t2) * len(t2)) g = t.build_graph(tiled=True) RuntimeOptimizer(g, self.executor._engine).optimize([], False) self.assertTrue(any(n.op.__class__.__name__ == 'TensorCpFuseChunk' for n in g)) c = next(n for n in g if n.op.__class__.__name__ == 'TensorCpFuseChunk') self.assertGreater(len(_evaluate(c)), 1)
def testElementwise(self): t1 = ones((10000, 5000), chunk_size=500, gpu=True) t2 = ones(5000, chunk_size=500, gpu=True) t = (t1 - t2) / sqrt(t2 * (1 - t2) * len(t2)) g = t.build_graph(tiled=True) graph = self.executor._preprocess(g, []) self.assertTrue(any(n.op.__class__.__name__ == 'TensorCpFuseChunk' for n in graph)) c = next(n for n in graph if n.op.__class__.__name__ == 'TensorCpFuseChunk') print(_evaluate(c))
def test_pca_check_projection(self): # Test that the projection of data is correct rng = np.random.RandomState(0) n, p = 100, 3 X = mt.tensor(rng.randn(n, p) * .1) X[:10] += mt.array([3, 4, 5]) Xt = 0.1 * mt.tensor(rng.randn(1, p)) + mt.array([3, 4, 5]) for solver in self.solver_list: Yt = PCA(n_components=2, svd_solver=solver).fit(X).transform(Xt) Yt /= mt.sqrt((Yt**2).sum()) assert_almost_equal(mt.abs(Yt[0][0]).to_numpy(), 1., 1)
def test_cupy(): t1 = mt.ones((100, 50), chunk_size=50, gpu=True) t2 = mt.ones(50, chunk_size=50, gpu=True) t = (t1 - t2) / mt.sqrt(t2 * (1 - t2) * len(t2)) graph = TileableGraph([t.data]) next(TileableGraphBuilder(graph).build()) context = dict() chunk_graph_builder = ChunkGraphBuilder(graph, fuse_enabled=False, tile_context=context) chunk_graph = next(chunk_graph_builder.build()) CupyRuntimeOptimizer(chunk_graph).optimize() assert any(n.op.__class__.__name__ == 'TensorCpFuseChunk' for n in chunk_graph)
def test_singular_values(setup): # Check that the TruncatedSVD output has the correct singular values # Set the singular values and see what we get back rng = np.random.RandomState(0) n_samples = 100 n_features = 110 X = rng.randn(n_samples, n_features) rpca = TruncatedSVD(n_components=3, algorithm='randomized', random_state=rng) X_rpca = rpca.fit_transform(X) X_rpca /= mt.sqrt(mt.sum(X_rpca**2.0, axis=0)) X_rpca[:, 0] *= 3.142 X_rpca[:, 1] *= 2.718 X_hat_rpca = mt.dot(X_rpca, rpca.components_) rpca.fit(X_hat_rpca) assert_array_almost_equal(rpca.singular_values_.to_numpy(), [3.142, 2.718, 1.0], 14)