示例#1
0
 def setup(sizes, dtype):
     ctx = cl.create_some_context()
     queue = cl.CommandQueue(ctx)
     host_arrays, device_arrays = [], []
     for size in sizes:
         numpy_array = np.random.rand(*size).astype(dtype=dtype)
         opencl_array = Array(queue, numpy_array.shape, numpy_array.dtype)
         opencl_array.set(numpy_array)
         host_arrays.append(numpy_array)
         device_arrays.append(opencl_array)
     queue.finish()
     return queue, host_arrays, device_arrays
示例#2
0
def setup_op(queue, A):
    cla = Array(queue, A.shape, A.dtype)

    cla.set(A)

    def matvect(x, y):
        blas.gemv(queue, cla, x, y, transA=True)
        return

    def matvec(x, y):
        blas.gemv(queue, cla, x, y)

    return LinearOperator(A.shape, matvec, rmatvec=matvect, dtype=A.dtype)
示例#3
0
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyclblast

# Settings for this sample
dtype = 'float32'

print("# Setting up OpenCL")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

print("# Setting up Numpy arrays")
m, n, k = 2, 3, 4
a = np.random.rand(m, k).astype(dtype=dtype)
b = np.random.rand(k, n).astype(dtype=dtype)
c = np.random.rand(m, n).astype(dtype=dtype)

print("# Setting up OpenCL arrays")
cla = Array(queue, a.shape, a.dtype)
clb = Array(queue, b.shape, b.dtype)
clc = Array(queue, c.shape, c.dtype)
cla.set(a)
clb.set(b)
clc.set(c)

print("# Example level-3 operation: GEMM")
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
print("# Matrix C result: %s" % clc.get())
print("# Expected result: %s" % (np.dot(a, b)))
示例#4
0
from datetime import datetime

if __name__ == "__main__":

    # Set up pyopencl:
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # Set up a basic sgemm example:
    m, n, k = 2, 3, 4
    a = np.random.rand(m, k).astype(dtype=np.float32)
    b = np.random.rand(k, n).astype(dtype=np.float32)
    c = np.empty((m, n), np.float32)
    cla = Array(queue, a.shape, a.dtype)
    clb = Array(queue, b.shape, b.dtype)
    clc = Array(queue, c.shape, c.dtype)
    cla.set(a)
    clb.set(b)
    clc.set(c)

    # Perform sgemm on these matrices, overriding the CLBlast parameters. In this example, we'll
    # just change the 'MWG' parameter a couple of times:
    params = { "KWG": 32, "KWI": 2, "MDIMA": 8, "MDIMC": 8, "MWG": 64, "NDIMB": 8, "NDIMC": 8,
            "NWG": 64, "SA": 0, "SB": 0, "STRM": 0, "STRN": 0, "VWM": 4, "VWN": 1 }
    for mwg in (32, 64, 256):
        print("Running sgemm tuned with MWG = %d" % mwg)
        params["MWG"] = mwg
        pyclblast.override_parameters(ctx.devices[0], 'Xgemm', 32, params)
        pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
        assert np.allclose(clc.get(), a.dot(b)), "uh-oh, xgemm isn't behaving correctly"
示例#5
0
from pyopencl.array import arange, Array
from pyopencl.reduction import ReductionKernel
import numpy

ctx = pyopencl.create_some_context()
queue = pyopencl.CommandQueue(ctx)

#print dir(cl)
#a = arange(queue, 400, dtype=numpy.float32)
#b = arange(queue, 400, dtype=numpy.float32)
acpu = numpy.zeros((100, 1), dtype=numpy.int32)
for i in xrange(0, 100):
    if i % 5 == 0:
        acpu[i] = 1

a = Array(queue, (100, 1), numpy.int32)
a.set(acpu)
queue.finish()

krnl = ReductionKernel(
    ctx,
    numpy.int32,
    neutral="0",
    reduce_expr="a+b",
    map_expr="x[i]",  #*y[i]",
    arguments="__global int *x")  #, __global in *y")

my_sum = krnl(a).get()
queue.finish()
print my_sum
示例#6
0
A = np.zeros((m, n), dtype=dtype)
x = np.zeros(n, dtype=dtype)
y = np.zeros(m, dtype=dtype)

rng = np.random.RandomState(1)  # change the seed to see different data
A[...] = rng.uniform(-1, 1, size=A.shape)
x[...] = rng.uniform(-1, 1, size=x.shape)
y[...] = rng.uniform(-1, 1, size=y.shape)

# allocate OpenCL memory on the device
clA = Array(queue, A.shape, A.dtype)
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)

# copy data to device
clA.set(A)
clx.set(x)

# compute a matrix-vector product (gemv)
blas.gemv(queue, clA, clx, cly)

# check the result
print("Expected: ", np.dot(A, x))
print("Actual:   ", cly.get())

# try a matrix-vector product with the transpose
cly.set(y)
blas.gemv(queue, clA, cly, clx, transA=True)
print("Expected: ", np.dot(A.T, y))
print("Actual:   ", clx.get())
示例#7
0
import pyclblast

# Settings for this sample
dtype = 'float32'
m, n = 4, 3
alpha = 1.0
beta = 0.0

print("# Setting up OpenCL")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

print("# Setting up Numpy arrays")
a = np.random.rand(m, n).astype(dtype=dtype)
x = np.random.rand(n).astype(dtype=dtype)
y = np.random.rand(m).astype(dtype=dtype)

print("# Setting up OpenCL arrays")
cla = Array(queue, a.shape, a.dtype)
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
cla.set(a)
clx.set(x)
cly.set(y)

print("# Example level-2 operation: GEMV")
pyclblast.gemv(queue, m, n, cla, clx, cly, a_ld=n, alpha=alpha, beta=beta)
queue.finish()
print("# Result for vector y: %s" % cly.get())
print("# Expected result:     %s" % (alpha * np.dot(a, x) + beta * y))
示例#8
0
def to_ocl(a):
    cla = Array(queue, a.shape, a.dtype)
    cla.set(a)
    return cla
示例#9
0
# generate some random data on the CPU
n = 5
dtype = 'float64'  # also supports 'float64'

x = np.zeros(n, dtype=dtype)
y = np.zeros(n, dtype=dtype)

rng = np.random.RandomState(1)  # change the seed to see different data
x[...] = rng.uniform(-1, 1, size=x.shape)
y[...] = rng.uniform(-1, 1, size=y.shape)

# allocate OpenCL memory on the device
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
cld = Array(queue, 1, x.dtype)

# copy data to device
clx.set(x)
cly.set(y)

# compute a dot product (dot)
blas.dot(queue, clx, cly, cld)

# check the result
print("Expected: ", np.dot(x, y))
print("Actual:   ", cld.get()[0])

# tidy up the BLAS
blas.teardown()
示例#10
0
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyclblast

# Settings for this sample
dtype = 'float32'
alpha = 1.5
n = 4

print("# Setting up OpenCL")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

print("# Setting up Numpy arrays")
x = np.random.rand(n).astype(dtype=dtype)
y = np.random.rand(n).astype(dtype=dtype)

print("# Setting up OpenCL arrays")
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
clx.set(x)
cly.set(y)

print("# Example level-1 operation: AXPY")
pyclblast.axpy(queue, n, clx, cly, alpha=alpha)
queue.finish()
print("# Result for vector y: %s" % cly.get())
print("# Expected result:     %s" % (alpha * x + y))
示例#11
0
import pyopencl
from pyopencl.array import arange, Array
from pyopencl.reduction import ReductionKernel
import numpy

ctx = pyopencl.create_some_context()
queue = pyopencl.CommandQueue(ctx)

#print dir(cl)
#a = arange(queue, 400, dtype=numpy.float32)
#b = arange(queue, 400, dtype=numpy.float32)
acpu = numpy.zeros((100, 1), dtype=numpy.int32)
for i in xrange(0,100):
    if i % 5 == 0:
        acpu[i] = 1

a = Array(queue, (100,1), numpy.int32)
a.set(acpu)
queue.finish()

krnl = ReductionKernel(ctx, numpy.int32, neutral="0",
                reduce_expr="a+b", map_expr="x[i]", #*y[i]",
                        arguments="__global int *x")#, __global in *y")

my_sum = krnl(a).get()
queue.finish()
print my_sum


示例#12
0
def to_ocl(a):
    cla = Array(queue, a.shape, a.dtype)
    cla.set(a)
    return cla
示例#13
0
x[...] = rng.uniform(-1, 1, size=x.shape)
x1[...] = rng.uniform(-1, 1, size=x1.shape)
x2[...] = rng.uniform(-1, 1, size=x2.shape)

A_upper = np.triu(A)
A = np.tril(A)

# allocate OpenCL memory on the device
clA = Array(queue, A.shape, A.dtype)
clA_upper = Array(queue, A.shape, A.dtype)
clx = Array(queue, x.shape, x.dtype)
clx1 = Array(queue, x1.shape, x1.dtype)
clx2 = Array(queue, x2.shape, x2.dtype)

# copy data to device
clA.set(A)
clA_upper.set(A_upper)
clx.set(x)

# compute a triangular solve (trsv)
blas.trsv(queue, clA, clx)

# check the result
print("Expected: ", np.linalg.solve(A, x))
print("Actual:   ", clx.get())
print()

# try a triangular solve with the transpose
clx1.set(x1)
blas.trsv(queue, clA, clx1, transA=True)
print("Expected: ", np.linalg.solve(A.T, x1))