def test_similarityMatrixTopK_sparseToSparse(self): numRows = 20 TopK = 5 dense_input = np.random.random((numRows, numRows)) sparse_input = sps.csr_matrix(dense_input) dense_output = similarityMatrixTopK(dense_input, k=TopK, forceSparseOutput=False, inplace=False) sparse_output = similarityMatrixTopK(sparse_input, k=TopK, forceSparseOutput=True) self.assertTrue(np.allclose(dense_output, sparse_output.todense()), "sparseToSparse CSR incorrect") sparse_input = sps.csc_matrix(dense_input) sparse_output = similarityMatrixTopK(sparse_input, k=TopK, forceSparseOutput=True) self.assertTrue(np.allclose(dense_output, sparse_output.todense()), "sparseToSparse CSC incorrect")
def updateSimilarityMatrix(self): if self.topK != False: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S.T, k=self.topK, forceSparseOutput=True) else: self.W = similarityMatrixTopK(self.S.T, k=self.topK, forceSparseOutput=False) else: if self.sparse_weights: self.W_sparse = sps.csr_matrix(self.S.T) else: self.W = self.S.T
def test_similarityMatrixTopK_denseToSparse(self): numRows = 100 TopK = 20 dense = np.random.random((numRows, numRows)) sparse = similarityMatrixTopK(dense, k=TopK, forceSparseOutput=True) dense = similarityMatrixTopK(dense, k=TopK, forceSparseOutput=False) self.assertTrue( np.equal(dense, sparse.todense()).all(), "denseToSparse incorrect")
def get_S_incremental_and_set_W(self): self.S_incremental = self.cythonEpoch.get_S() if self.train_with_sparse_weights: self.W_sparse = self.S_incremental else: if self.sparse_weights: self.W_sparse = similarityMatrixTopK(self.S_incremental, k=self.topK) else: self.W = self.S_incremental
def test_similarityMatrixTopK_denseToDense(self): numRows = 100 TopK = 20 dense_input = np.random.random((numRows, numRows)) dense_output = similarityMatrixTopK(dense_input, k=TopK, forceSparseOutput=False) numExpectedNonZeroCells = TopK * numRows numNonZeroCells = np.sum(dense_output != 0) self.assertEqual(numExpectedNonZeroCells, numNonZeroCells, "DenseToDense incorrect")
def test_cosine_similarity_TopK_big(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel n_items = 500 n_users = 1000 TopK = n_items data_matrix = sps.random(n_users, n_items, density=0.1) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"
def test_cosine_similarity_TopK(self): from Base.Cython.cosine_similarity import Cosine_Similarity as Cosine_Similarity_Cython from Base.cosine_similarity import Compute_Similarity as Cosine_Similarity_Python from Base.cosine_similarity_parallel import Cosine_Similarity_Parallel as Cosine_Similarity_Parallel TopK = 4 data_matrix = np.array([[1, 1, 0, 1], [0, 1, 1, 1], [1, 0, 1, 0]]) data_matrix = sps.csr_matrix(data_matrix) cosine_similarity = Cosine_Similarity_Cython(data_matrix, topK=TopK, normalize=False) W_dense_Cython = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Python(data_matrix, topK=TopK, normalize=False) W_dense_Python = cosine_similarity.compute_similarity().toarray() cosine_similarity = Cosine_Similarity_Parallel(data_matrix, topK=TopK, normalize=False) W_dense_Parallel = cosine_similarity.compute_similarity().toarray() W_dense_mul = data_matrix.T.dot(data_matrix) W_dense_mul[np.arange(W_dense_mul.shape[0]), np.arange(W_dense_mul.shape[0])] = 0.0 W_dense_mul = similarityMatrixTopK(W_dense_mul, k=TopK).toarray() assert np.allclose(W_dense_Cython, W_dense_mul, atol=1e-4), "W_sparse_Cython not matching control" assert np.allclose(W_dense_Python, W_dense_mul, atol=1e-4), "W_dense_Python not matching control" assert np.allclose(W_dense_Parallel, W_dense_mul, atol=1e-4), "W_dense_Parallel not matching control"