Пример #1
0
def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()
Пример #2
0
def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata,
                ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()
Пример #3
0
def test_todense_stream():
    ''' Test GPUCSRArray.todense()'''
    X = np.random.laplace(size=(600, 300)).astype(np.float32)
    X[X < 0.1] = 0
    X = csr_matrix(X, dtype=np.float32)
    Xd = GPUCSRArray(X)

    stream = Stream()

    Yd = Xd.todense(stream=stream)
    stream.synchronize()
    assert_allclose(Yd.get(), X.A, rtol=1e-3, err_msg="todense")
Пример #4
0
def test_todense_stream():
    ''' Test GPUCSRArray.todense()'''
    X = np.random.laplace(size=(600, 300)).astype(np.float32)
    X[X<0.1] = 0
    X = csr_matrix(X, dtype=np.float32)
    Xd = GPUCSRArray(X)

    stream = Stream()

    Yd = Xd.todense(stream=stream)
    stream.synchronize()
    assert_allclose(Yd.get(), X.A, rtol=1e-3, err_msg="todense")
Пример #5
0
def test_cusparseSetStream():
    A = np.random.laplace(size=(3, 5)).astype(np.float32)
    A[A<0.1] = 0
    A = sparse.csr_matrix(A, dtype=np.float32)
    A.sort_indices()

    a_data = gpu.to_gpu(A.data)
    a_indptr = gpu.to_gpu(A.indptr)
    a_indices = gpu.to_gpu(A.indices)
    out = gpu.empty((A.shape[0], A.shape[1]), dtype=A.dtype, order="F")


    h = cusparse.cusparseCreate()
    descrA = cusparse.cusparseCreateMatDescr()

    stream = Stream()
    cusparse.cusparseSetStream(h, stream.handle)
    cusparse.cusparseScsr2dense(h, A.shape[0], A.shape[1],
        descrA, a_data.gpudata, a_indptr.gpudata, a_indices.gpudata,
        out.gpudata, out.shape[0])
    cusparse.cusparseSetStream(h, 0)
    stream.synchronize()
    assert_allclose(out.get(), A.A, rtol=1e-4)
Пример #6
0
    def test_unary_func_kwargs(self):
        """tests if the kwargs to the unary functions work"""
        from pycuda.driver import Stream

        name, a, b, threshold = ("exp", -3, 3, 1e-5)
        gpu_func = getattr(cumath, name)
        cpu_func = getattr(np, numpy_func_names.get(name, name))
        for s in sizes:
            for dtype in dtypes:
                np.random.seed(1)
                A = (np.random.random(s) * (b - a) + a).astype(dtype)
                if complex:
                    A = A + (np.random.random(s) * (b - a) + a) * 1j

                np.random.seed(1)
                A = (np.random.random(s) * (b - a) + a).astype(dtype)
                args = gpuarray.to_gpu(A)

                # 'out' kw
                gpu_results = gpuarray.empty_like(args)
                gpu_results = gpu_func(args, out=gpu_results).get()
                cpu_results = cpu_func(A)
                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), (max_err, name, dtype)

                # 'out' position
                gpu_results = gpuarray.empty_like(args)
                gpu_results = gpu_func(args, gpu_results).get()
                cpu_results = cpu_func(A)
                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), (max_err, name, dtype)

                # 'stream' kw
                mystream = Stream()
                np.random.seed(1)
                A = (np.random.random(s) * (b - a) + a).astype(dtype)
                args = gpuarray.to_gpu(A)
                gpu_results = gpuarray.empty_like(args)
                gpu_results = gpu_func(args, stream=mystream).get()
                cpu_results = cpu_func(A)
                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), (max_err, name, dtype)

                # 'stream' position
                mystream = Stream()
                np.random.seed(1)
                A = (np.random.random(s) * (b - a) + a).astype(dtype)
                args = gpuarray.to_gpu(A)
                gpu_results = gpuarray.empty_like(args)
                gpu_results = gpu_func(args, mystream).get()
                cpu_results = cpu_func(A)
                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), (max_err, name, dtype)

                # 'out' and 'stream' kw
                mystream = Stream()
                np.random.seed(1)
                A = (np.random.random(s) * (b - a) + a).astype(dtype)
                args = gpuarray.to_gpu(A)
                gpu_results = gpuarray.empty_like(args)
                gpu_results = gpu_func(args, stream=mystream,
                                       out=gpu_results).get()
                cpu_results = cpu_func(A)
                max_err = np.max(np.abs(cpu_results - gpu_results))
                assert (max_err <= threshold).all(), (max_err, name, dtype)