示例#1
0
文件: test_op.py 项目: stachon/binet
def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()
示例#2
0
文件: test_op.py 项目: tvandera/binet
def test_cublas_bug():
    '''
    The SGEMM call would cause all calls after it to fail for some unknown
    reason. Likely this is caused swaprows causing memory corruption.

    NOTE: this was confirmed by nvidia to be a bug within CUDA, and should be
          fixed in CUDA 6.5
    '''
    from pycuda.driver import Stream
    from skcuda.cublas import cublasSgemm
    from skcuda.misc import _global_cublas_handle as handle

    n = 131

    s = slice(128, n)
    X = gpuarray.to_gpu(np.random.randn(n, 2483).astype(np.float32))
    a = gpuarray.empty((X.shape[1], 3), dtype=np.float32)
    c = gpuarray.empty((a.shape[0], X.shape[1]), dtype=np.float32)
    b = gpuarray.empty_like(X)

    m, n = a.shape[0], b[s].shape[1]
    k = a.shape[1]
    lda = m
    ldb = k
    ldc = m
    #cublasSgemm(handle, 0, 0, m, n, k, 0.0, b.gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc)
    cublasSgemm(handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata,
                ldb, 0.0, c.gpudata, ldc)
    #print handle, 'n', 'n', m, n, k, 1.0, b[s].gpudata, lda, a.gpudata, ldb, 0.0, c.gpudata, ldc

    #gpuarray.dot(d, Xoutd[s])
    #op.sgemm(a, b[s], c)

    stream = Stream()
    stream.synchronize()
示例#3
0
def test_todense_stream():
    ''' Test GPUCSRArray.todense()'''
    X = np.random.laplace(size=(600, 300)).astype(np.float32)
    X[X < 0.1] = 0
    X = csr_matrix(X, dtype=np.float32)
    Xd = GPUCSRArray(X)

    stream = Stream()

    Yd = Xd.todense(stream=stream)
    stream.synchronize()
    assert_allclose(Yd.get(), X.A, rtol=1e-3, err_msg="todense")
示例#4
0
def test_todense_stream():
    ''' Test GPUCSRArray.todense()'''
    X = np.random.laplace(size=(600, 300)).astype(np.float32)
    X[X<0.1] = 0
    X = csr_matrix(X, dtype=np.float32)
    Xd = GPUCSRArray(X)

    stream = Stream()

    Yd = Xd.todense(stream=stream)
    stream.synchronize()
    assert_allclose(Yd.get(), X.A, rtol=1e-3, err_msg="todense")
示例#5
0
def test_cusparseSetStream():
    A = np.random.laplace(size=(3, 5)).astype(np.float32)
    A[A<0.1] = 0
    A = sparse.csr_matrix(A, dtype=np.float32)
    A.sort_indices()

    a_data = gpu.to_gpu(A.data)
    a_indptr = gpu.to_gpu(A.indptr)
    a_indices = gpu.to_gpu(A.indices)
    out = gpu.empty((A.shape[0], A.shape[1]), dtype=A.dtype, order="F")


    h = cusparse.cusparseCreate()
    descrA = cusparse.cusparseCreateMatDescr()

    stream = Stream()
    cusparse.cusparseSetStream(h, stream.handle)
    cusparse.cusparseScsr2dense(h, A.shape[0], A.shape[1],
        descrA, a_data.gpudata, a_indptr.gpudata, a_indices.gpudata,
        out.gpudata, out.shape[0])
    cusparse.cusparseSetStream(h, 0)
    stream.synchronize()
    assert_allclose(out.get(), A.A, rtol=1e-4)