def todense(self, out=None, allocator=mem_alloc, stream=None): if out is None: out = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C") if self.nnz == 0: # weird but happens out.fill(0.0, stream=stream) return out # we need to out-of-place transpose if we want rowmajor outputs # thus we need a temporary to store our results if out.flags.c_contiguous: tmp = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C") else: tmp = out if stream is not None: cusparse.cusparseSetStream(cusparse_handle, stream.handle) cublas.cublasSetStream(cublas_handle, stream.handle) cusparse.cusparseScsr2dense(cusparse_handle, self.shape[0], self.shape[1], self.descr, self.data.gpudata, self.indptr.gpudata, self.indices.gpudata, tmp.gpudata, tmp.shape[0]) if out.flags.c_contiguous: cublas.cublasSgeam(cublas_handle, 1, 1, tmp.shape[1], tmp.shape[0], 1.0, tmp.gpudata, tmp.shape[0], 0.0, 0, tmp.shape[0], out.gpudata, out.shape[1]) if stream is not None: cusparse.cusparseSetStream(cusparse_handle, 0) cublas.cublasSetStream(cublas_handle, 0) return out
def todense(self, out=None, allocator=mem_alloc, stream=None): if out is None: out = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C") if self.nnz == 0: # weird but happens out.fill(0.0, stream=stream) return out # we need to out-of-place transpose if we want rowmajor outputs # thus we need a temporary to store our results if out.flags.c_contiguous: tmp = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C") else: tmp = out if stream is not None: cusparse.cusparseSetStream(cusparse_handle, stream.handle) cublas.cublasSetStream(cublas_handle, stream.handle) cusparse.cusparseScsr2dense( cusparse_handle, self.shape[0], self.shape[1], self.descr, self.data.gpudata, self.indptr.gpudata, self.indices.gpudata, tmp.gpudata, tmp.shape[0], ) if out.flags.c_contiguous: cublas.cublasSgeam( cublas_handle, 1, 1, tmp.shape[1], tmp.shape[0], 1.0, tmp.gpudata, tmp.shape[0], 0.0, 0, tmp.shape[0], out.gpudata, out.shape[1], ) if stream is not None: cusparse.cusparseSetStream(cusparse_handle, 0) cublas.cublasSetStream(cublas_handle, 0) return out
def test_cusparseSetStream(): A = np.random.laplace(size=(3, 5)).astype(np.float32) A[A<0.1] = 0 A = sparse.csr_matrix(A, dtype=np.float32) A.sort_indices() a_data = gpu.to_gpu(A.data) a_indptr = gpu.to_gpu(A.indptr) a_indices = gpu.to_gpu(A.indices) out = gpu.empty((A.shape[0], A.shape[1]), dtype=A.dtype, order="F") h = cusparse.cusparseCreate() descrA = cusparse.cusparseCreateMatDescr() stream = Stream() cusparse.cusparseSetStream(h, stream.handle) cusparse.cusparseScsr2dense(h, A.shape[0], A.shape[1], descrA, a_data.gpudata, a_indptr.gpudata, a_indices.gpudata, out.gpudata, out.shape[0]) cusparse.cusparseSetStream(h, 0) stream.synchronize() assert_allclose(out.get(), A.A, rtol=1e-4)