Пример #1
0
    def todense(self, out=None, allocator=mem_alloc, stream=None):
        if out is None:
            out = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C")

        if self.nnz == 0:  # weird but happens
            out.fill(0.0, stream=stream)
            return out

        # we need to out-of-place transpose if we want rowmajor outputs
        # thus we need a temporary to store our results
        if out.flags.c_contiguous:
            tmp = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C")
        else:
            tmp = out

        if stream is not None:
            cusparse.cusparseSetStream(cusparse_handle, stream.handle)
            cublas.cublasSetStream(cublas_handle, stream.handle)

        cusparse.cusparseScsr2dense(cusparse_handle, self.shape[0],
            self.shape[1], self.descr, self.data.gpudata, self.indptr.gpudata,
            self.indices.gpudata, tmp.gpudata, tmp.shape[0])

        if out.flags.c_contiguous:
            cublas.cublasSgeam(cublas_handle, 1, 1, tmp.shape[1], tmp.shape[0],
                           1.0, tmp.gpudata, tmp.shape[0],
                           0.0, 0, tmp.shape[0], out.gpudata, out.shape[1])
        if stream is not None:
            cusparse.cusparseSetStream(cusparse_handle, 0)
            cublas.cublasSetStream(cublas_handle, 0)

        return out
Пример #2
0
    def todense(self, out=None, allocator=mem_alloc, stream=None):
        if out is None:
            out = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C")

        if self.nnz == 0:  # weird but happens
            out.fill(0.0, stream=stream)
            return out

        # we need to out-of-place transpose if we want rowmajor outputs
        # thus we need a temporary to store our results
        if out.flags.c_contiguous:
            tmp = gpuarray.empty(self.shape, allocator=allocator, dtype=self.dtype, order="C")
        else:
            tmp = out

        if stream is not None:
            cusparse.cusparseSetStream(cusparse_handle, stream.handle)
            cublas.cublasSetStream(cublas_handle, stream.handle)

        cusparse.cusparseScsr2dense(
            cusparse_handle,
            self.shape[0],
            self.shape[1],
            self.descr,
            self.data.gpudata,
            self.indptr.gpudata,
            self.indices.gpudata,
            tmp.gpudata,
            tmp.shape[0],
        )

        if out.flags.c_contiguous:
            cublas.cublasSgeam(
                cublas_handle,
                1,
                1,
                tmp.shape[1],
                tmp.shape[0],
                1.0,
                tmp.gpudata,
                tmp.shape[0],
                0.0,
                0,
                tmp.shape[0],
                out.gpudata,
                out.shape[1],
            )
        if stream is not None:
            cusparse.cusparseSetStream(cusparse_handle, 0)
            cublas.cublasSetStream(cublas_handle, 0)

        return out
Пример #3
0
def test_cusparseSetStream():
    A = np.random.laplace(size=(3, 5)).astype(np.float32)
    A[A<0.1] = 0
    A = sparse.csr_matrix(A, dtype=np.float32)
    A.sort_indices()

    a_data = gpu.to_gpu(A.data)
    a_indptr = gpu.to_gpu(A.indptr)
    a_indices = gpu.to_gpu(A.indices)
    out = gpu.empty((A.shape[0], A.shape[1]), dtype=A.dtype, order="F")


    h = cusparse.cusparseCreate()
    descrA = cusparse.cusparseCreateMatDescr()

    stream = Stream()
    cusparse.cusparseSetStream(h, stream.handle)
    cusparse.cusparseScsr2dense(h, A.shape[0], A.shape[1],
        descrA, a_data.gpudata, a_indptr.gpudata, a_indices.gpudata,
        out.gpudata, out.shape[0])
    cusparse.cusparseSetStream(h, 0)
    stream.synchronize()
    assert_allclose(out.get(), A.A, rtol=1e-4)