示例#1
0
            def run(self, queue, atol, rtol):
                qcomp = queue.cl_queue_comp

                xarr = Array(qcomp, cnt, dtype, data=x.data)
                yarr = Array(qcomp, cnt, dtype, data=y.data)
                zarr = Array(qcomp, cnt, dtype, data=z.data)

                self._retarr = rkern(xarr, yarr, zarr, atol, rtol, queue=qcomp)
示例#2
0
def rand(queue, shape, dtype, luxury=None, a=0, b=1):
    """Return an array of `shape` filled with random values of `dtype`
    in the range [a,b).
    """

    from pyopencl.array import Array
    gen = _get_generator(queue, luxury)
    result = Array(queue, shape, dtype)
    result.add_event(gen.fill_uniform(result, a=a, b=b))
    return result
示例#3
0
文件: clrandom.py 项目: AI42/pyopencl
def rand(queue, shape, dtype, luxury=None, a=0, b=1):
    """Return an array of `shape` filled with random values of `dtype`
    in the range [a,b).
    """

    from pyopencl.array import Array
    gen = _get_generator(queue, luxury)
    result = Array(queue, shape, dtype)
    result.add_event(
            gen.fill_uniform(result, a=a, b=b))
    return result
示例#4
0
 def setup(sizes, dtype):
     ctx = cl.create_some_context()
     queue = cl.CommandQueue(ctx)
     host_arrays, device_arrays = [], []
     for size in sizes:
         numpy_array = np.random.rand(*size).astype(dtype=dtype)
         opencl_array = Array(queue, numpy_array.shape, numpy_array.dtype)
         opencl_array.set(numpy_array)
         host_arrays.append(numpy_array)
         device_arrays.append(opencl_array)
     queue.finish()
     return queue, host_arrays, device_arrays
示例#5
0
def setup_op(queue, A):
    cla = Array(queue, A.shape, A.dtype)

    cla.set(A)

    def matvect(x, y):
        blas.gemv(queue, cla, x, y, transA=True)
        return

    def matvec(x, y):
        blas.gemv(queue, cla, x, y)

    return LinearOperator(A.shape, matvec, rmatvec=matvect, dtype=A.dtype)
示例#6
0
    def __init__(self, sf, omega):
        '''Param:
             sf: the freeze out hypersf ds0,ds1,ds2,ds3,vx,vy,veta,etas
             omega: omega^tau, x, y, etas
        '''
        self.cwd, cwf = os.path.split(__file__)
        self.mass = 1.115
        self.Tfrz = 0.137
        self.ctx = cl.create_some_context()
        self.queue = cl.CommandQueue(self.ctx)

        src = open('kernel_polarization.cl', 'r').read()
        self.prg = cl.Program(self.ctx, src).build()

        # calc umu since they are used for each (Y,pt,phi)
        vx = sf[:, 4]
        vy = sf[:, 5]
        vz = sf[:, 6]
        v_sqr = vx * vx + vy * vy + vz * vz
        v_sqr[v_sqr > 1.0] = 0.99999
        u0 = 1.0 / np.sqrt(1.0 - v_sqr)

        self.size_sf = len(sf[:, 0])

        h_umu = np.zeros((self.size_sf, 4), dtype=np.float32)
        h_umu[:, 0] = u0
        h_umu[:, 1] = u0 * vx
        h_umu[:, 2] = u0 * vy
        h_umu[:, 3] = u0 * vz

        h_smu = sf[:, 0:4].astype(np.float32)
        h_etas = sf[:, 7].astype(np.float32)
        h_omegaY = 0.5 * omega[:, 2].astype(np.float32)

        mf = cl.mem_flags
        self.d_smu = cl.Buffer(self.ctx,
                               mf.READ_ONLY | mf.COPY_HOST_PTR,
                               hostbuf=h_smu)
        self.d_umu = cl.Buffer(self.ctx,
                               mf.READ_ONLY | mf.COPY_HOST_PTR,
                               hostbuf=h_umu)
        self.d_omegaY = cl.Buffer(self.ctx,
                                  mf.READ_ONLY | mf.COPY_HOST_PTR,
                                  hostbuf=h_omegaY)
        self.d_etas = cl.Buffer(self.ctx,
                                mf.READ_ONLY | mf.COPY_HOST_PTR,
                                hostbuf=h_etas)

        self.d_pol = Array(self.queue, self.size_sf, np.float32)
        self.d_vor = Array(self.queue, self.size_sf, np.float32)
        self.d_rho = Array(self.queue, self.size_sf, np.float32)
示例#7
0
def rand(queue, shape, dtype, luxury=None, a=0, b=1):
    """Return an array of `shape` filled with random values of `dtype`
    in the range [a,b).
    """

    if luxury is not None:
        from warnings import warn
        warn("Specifying the 'luxury' argument is deprecated and will stop being "
                "supported in PyOpenCL 2018.x", stacklevel=2)

    from pyopencl.array import Array
    gen = _get_generator(queue.context)
    result = Array(queue, shape, dtype)
    result.add_event(
            gen.fill_uniform(result, a=a, b=b))
    return result
示例#8
0
    def getitem_device(self, item):
        if isinstance(item, slice):
            item = np.arange(len(self))[item]

        if is_iterable(item):
            return CLRaggedArray.from_buffer(
                self.queue,
                self.cl_buf,
                self.starts[item],
                self.shape0s[item],
                self.shape1s[item],
                self.stride0s[item],
                self.stride1s[item],
                names=[self.names[i] for i in item],
            )
        else:
            s = self.dtype.itemsize
            return Array(
                self.queue,
                (self.shape0s[item], self.shape1s[item]),
                self.dtype,
                strides=(self.stride0s[item] * s, self.stride1s[item] * s),
                data=self.cl_buf.data,
                offset=self.starts[item] * s,
            )
示例#9
0
    def inner_rand(queue, shape, dtype, luxury=None, a=0, b=1):
        from pyopencl.array import Array
        luxury = kwargs.pop("luxury", None)

        gen = _get_generator(queue, luxury)
        result = Array(queue, shape, dtype)
        gen.fill_uniform(result, a=a, b=b)
        return result
示例#10
0
文件: nn.py 项目: JonnoFTW/nn-cl
    def forward(self, buf: array.Array, verbose: bool = False):
        # put x in the buffer
        size = self.layers[0].input_width
        # can probably do better here
        # this only works on pocl because they didn't implement CL_MISALIGNED_SUB_BUFFER_OFFSET
        #  buf = x.get_sub_region(size * idx, size)
        input_np = buf.get()
        for idx, l in enumerate(self.layers):
            l.inputs = input_np.copy()
            if verbose:
                print(f"Layer {idx}")
                print(
                    f"Input Batch: rows={l.batch_size} samples cols={l.input_width} features \n",
                    input_np)
            buf = l(buf)
            output = buf.get()
            if verbose:
                weights = l.get_weights()
                bias = l.get_bias()
                print(
                    f"\nWeights: (rows={l.units} units, cols={l.input_width} inputs)\n",
                    weights)
                # print("Biases:\n", bias)
                print(
                    f"\nOutput: (rows={l.batch_size} batch samples cols={l.units} units)\n",
                    output)
                expected = (np.dot(weights, input_np.T) + bias).T
                if l.activation == 'relu':
                    expected = np.clip(expected, 0, a_max=None)
                elif l.activation == 'sigmoid':
                    expected = 1 / (np.exp(-expected) + 1)
                elif l.activation == 'softmax':
                    exps = np.exp(expected)
                    expected = exps / exps.sum(axis=1)[:, None]
                print("Expected:\n", expected)
            input_np = output

        # output is the output of the last layer
        return buf
示例#11
0
def rand(context, queue, shape, dtype):
    from pyopencl.array import Array
    from pyopencl.elementwise import get_elwise_kernel

    result = Array(context, shape, dtype, queue=queue)

    if dtype == numpy.float32:
        func = get_elwise_kernel(
            context, "float *dest, unsigned int seed", md5_code + """
            #define POW_2_M32 (1/4294967296.0f)
            dest[i] = a*POW_2_M32;
            if ((i += gsize) < n)
                dest[i] = b*POW_2_M32;
            if ((i += gsize) < n)
                dest[i] = c*POW_2_M32;
            if ((i += gsize) < n)
                dest[i] = d*POW_2_M32;
            """, "md5_rng_float")
    elif dtype == numpy.float64:
        func = get_elwise_kernel(
            context, "double *dest, unsigned int seed", md5_code + """
            #define POW_2_M32 (1/4294967296.0)
            #define POW_2_M64 (1/18446744073709551616.)

            dest[i] = a*POW_2_M32 + b*POW_2_M64;

            if ((i += gsize) < n)
            {
              dest[i] = c*POW_2_M32 + d*POW_2_M64;
            }
            """, "md5_rng_float")
    elif dtype in [numpy.int32, numpy.uint32]:
        func = get_elwise_kernel(
            context, "unsigned int *dest, unsigned int seed", md5_code + """
            dest[i] = a;
            if ((i += gsize) < n)
                dest[i] = b;
            if ((i += gsize) < n)
                dest[i] = c;
            if ((i += gsize) < n)
                dest[i] = d;
            """, "md5_rng_int")
    else:
        raise NotImplementedError

    func(queue, result._global_size, result._local_size, result.data,
         numpy.random.randint(2**31 - 1), result.size)

    return result
示例#12
0
    def getitem_device(self, item):
        if isinstance(item, slice):
            item = np.arange(len(self))[item]

        if is_iterable(item):
            rval = self.__class__.__new__(self.__class__)
            rval.queue = self.queue
            rval.starts = self.starts[item]
            rval.shape0s = self.shape0s[item]
            rval.shape1s = self.shape1s[item]
            rval.stride0s = self.stride0s[item]
            rval.stride1s = self.stride1s[item]
            rval.cl_buf = self.cl_buf
            rval.names = [self.names[i] for i in item]
            return rval
        else:
            s = self.dtype.itemsize
            return Array(
                self.queue,
                (self.shape0s[item], self.shape1s[item]), self.dtype,
                strides=(self.stride0s[item] * s, self.stride1s[item] * s),
                data=self.cl_buf.data, offset=self.starts[item] * s)
示例#13
0
def rand(context, queue, shape, dtype):
    from pyopencl.array import Array

    result = Array(context, shape, dtype, queue=queue)
    _rand(result, numpy.random.randint(2**31 - 1))
    return result
示例#14
0
from pyopencl.array import arange, Array
from pyopencl.reduction import ReductionKernel
import numpy

ctx = pyopencl.create_some_context()
queue = pyopencl.CommandQueue(ctx)

#print dir(cl)
#a = arange(queue, 400, dtype=numpy.float32)
#b = arange(queue, 400, dtype=numpy.float32)
acpu = numpy.zeros((100, 1), dtype=numpy.int32)
for i in xrange(0, 100):
    if i % 5 == 0:
        acpu[i] = 1

a = Array(queue, (100, 1), numpy.int32)
a.set(acpu)
queue.finish()

krnl = ReductionKernel(
    ctx,
    numpy.int32,
    neutral="0",
    reduce_expr="a+b",
    map_expr="x[i]",  #*y[i]",
    arguments="__global int *x")  #, __global in *y")

my_sum = krnl(a).get()
queue.finish()
print my_sum
示例#15
0
# start up the BLAS
blas.setup()

# generate some random data on the CPU
n = 5
dtype = 'float64'  # also supports 'float64'

x = np.zeros(n, dtype=dtype)
y = np.zeros(n, dtype=dtype)

rng = np.random.RandomState(1)  # change the seed to see different data
x[...] = rng.uniform(-1, 1, size=x.shape)
y[...] = rng.uniform(-1, 1, size=y.shape)

# allocate OpenCL memory on the device
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
cld = Array(queue, 1, x.dtype)

# copy data to device
clx.set(x)
cly.set(y)

# compute a dot product (dot)
blas.dot(queue, clx, cly, cld)

# check the result
print("Expected: ", np.dot(x,y))
print("Actual:   ", cld.get()[0])

# tidy up the BLAS
示例#16
0
# generate some random data on the CPU
m, n = 5, 4
dtype = 'float32'  # also supports 'float64'

A = np.zeros((m, n), dtype=dtype)
x = np.zeros(n, dtype=dtype)
y = np.zeros(m, dtype=dtype)

rng = np.random.RandomState(1)  # change the seed to see different data
A[...] = rng.uniform(-1, 1, size=A.shape)
x[...] = rng.uniform(-1, 1, size=x.shape)
y[...] = rng.uniform(-1, 1, size=y.shape)

# allocate OpenCL memory on the device
clA = Array(queue, A.shape, A.dtype)
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)

# copy data to device
clA.set(A)
clx.set(x)

# compute a matrix-vector product (gemv)
blas.gemv(queue, clA, clx, cly)

# check the result
print("Expected: ", np.dot(A, x))
print("Actual:   ", cly.get())

# try a matrix-vector product with the transpose
示例#17
0
import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyclblast

# Settings for this sample
dtype = 'float32'

print("# Setting up OpenCL")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

print("# Setting up Numpy arrays")
m, n, k = 2, 3, 4
a = np.random.rand(m, k).astype(dtype=dtype)
b = np.random.rand(k, n).astype(dtype=dtype)
c = np.random.rand(m, n).astype(dtype=dtype)

print("# Setting up OpenCL arrays")
cla = Array(queue, a.shape, a.dtype)
clb = Array(queue, b.shape, b.dtype)
clc = Array(queue, c.shape, c.dtype)
cla.set(a)
clb.set(b)
clc.set(c)

print("# Example level-3 operation: GEMM")
pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
print("# Matrix C result: %s" % clc.get())
print("# Expected result: %s" % (np.dot(a, b)))
示例#18
0
def to_ocl(a):
    cla = Array(queue, a.shape, a.dtype)
    cla.set(a)
    return cla
示例#19
0
# start up the BLAS
blas.setup()

# generate some random data on the CPU
n = 5
dtype = 'float64'  # also supports 'float64'

x = np.zeros(n, dtype=dtype)
y = np.zeros(n, dtype=dtype)

rng = np.random.RandomState(1)  # change the seed to see different data
x[...] = rng.uniform(-1, 1, size=x.shape)
y[...] = rng.uniform(-1, 1, size=y.shape)

# allocate OpenCL memory on the device
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
cld = Array(queue, 1, x.dtype)

# copy data to device
clx.set(x)
cly.set(y)

# compute a dot product (dot)
blas.dot(queue, clx, cly, cld)

# check the result
print("Expected: ", np.dot(x, y))
print("Actual:   ", cld.get()[0])

# tidy up the BLAS
示例#20
0
n = 4

print("# Setting up OpenCL")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

print("# Setting up Numpy arrays")
x = np.random.rand(n * batch_count).astype(dtype=dtype)
y = np.random.rand(n * batch_count).astype(dtype=dtype)

print("# Batch offsets: next after each other")
x_offsets = [0, n]
y_offsets = [0, n]

print("# Setting up OpenCL arrays")
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
clx.set(x)
cly.set(y)

print("# Example level-1 batched operation: AXPY-batched")
assert len(alphas) == len(x_offsets) == len(y_offsets) == batch_count
pyclblast.axpyBatched(queue, n, clx, cly, alphas, x_offsets, y_offsets)
queue.finish()

print("# Full result for vector y: %s" % str(cly.get()))
for i in range(batch_count):
    result = alphas[i] * x[x_offsets[i]:x_offsets[i] +
                           n] + y[y_offsets[i]:y_offsets[i] + n]
    print("# Expected result batch #%d: %s" % (i, str(result)))
示例#21
0
import pyopencl
from pyopencl.array import arange, Array
from pyopencl.reduction import ReductionKernel
import numpy

ctx = pyopencl.create_some_context()
queue = pyopencl.CommandQueue(ctx)

#print dir(cl)
#a = arange(queue, 400, dtype=numpy.float32)
#b = arange(queue, 400, dtype=numpy.float32)
acpu = numpy.zeros((100, 1), dtype=numpy.int32)
for i in xrange(0,100):
    if i % 5 == 0:
        acpu[i] = 1

a = Array(queue, (100,1), numpy.int32)
a.set(acpu)
queue.finish()

krnl = ReductionKernel(ctx, numpy.int32, neutral="0",
                reduce_expr="a+b", map_expr="x[i]", #*y[i]",
                        arguments="__global int *x")#, __global in *y")

my_sum = krnl(a).get()
queue.finish()
print my_sum


示例#22
0
文件: opencl.py 项目: vincefn/pyvkfft
 def fft(self, src: cla.Array, dest: cla.Array = None):
     """
     Compute the forward FFT
     :param src: the source pyopencl Array
     :param dest: the destination pyopencl Array. Should be None for an inplace transform
     :return: the transformed array. For a R2C inplace transform, the complex view of the
         array is returned.
     """
     if self.inplace:
         if dest is not None:
             if src.data.int_ptr != dest.data.int_ptr:
                 raise RuntimeError(
                     "VkFFTApp.fft: dest is not None but this is an inplace transform"
                 )
         if self.batch_shape is not None:
             s = src.reshape(self.batch_shape)
         else:
             s = src
         _vkfft_opencl.fft(self.app, int(s.data.int_ptr),
                           int(s.data.int_ptr), int(self.queue.int_ptr))
         if self.norm == "ortho":
             if self.precision == 2:
                 src *= np.float16(self._get_fft_scale(norm=0))
             elif self.precision == 4:
                 src *= np.float32(self._get_fft_scale(norm=0))
             elif self.precision == 8:
                 src *= np.float64(self._get_fft_scale(norm=0))
         if self.r2c:
             if src.dtype == np.float32:
                 return src.view(dtype=np.complex64)
             elif src.dtype == np.float64:
                 return src.view(dtype=np.complex128)
         return src
     else:
         if dest is None:
             raise RuntimeError(
                 "VkFFTApp.fft: dest is None but this is an out-of-place transform"
             )
         elif src.data.int_ptr == dest.data.int_ptr:
             raise RuntimeError(
                 "VkFFTApp.fft: dest and src are identical but this is an out-of-place transform"
             )
         if self.r2c:
             assert (src.size == dest.size // dest.shape[-1] * 2 *
                     (dest.shape[-1] - 1))
         if self.batch_shape is not None:
             s = src.reshape(self.batch_shape)
             if self.r2c:
                 c_shape = tuple(
                     list(self.batch_shape[:-1]) +
                     [self.batch_shape[-1] // 2 + 1])
                 d = dest.reshape(c_shape)
             else:
                 d = dest.reshape(self.batch_shape)
         else:
             s, d = src, dest
         _vkfft_opencl.fft(self.app, int(s.data.int_ptr),
                           int(d.data.int_ptr), int(self.queue.int_ptr))
         if self.norm == "ortho":
             if self.precision == 2:
                 dest *= np.float16(self._get_fft_scale(norm=0))
             elif self.precision == 4:
                 dest *= np.float32(self._get_fft_scale(norm=0))
             elif self.precision == 8:
                 dest *= np.float64(self._get_fft_scale(norm=0))
         return dest
from pyopencl.array import Array
import pyclblast
from datetime import datetime

if __name__ == "__main__":

    # Set up pyopencl:
    ctx = cl.create_some_context()
    queue = cl.CommandQueue(ctx)

    # Set up a basic sgemm example:
    m, n, k = 2, 3, 4
    a = np.random.rand(m, k).astype(dtype=np.float32)
    b = np.random.rand(k, n).astype(dtype=np.float32)
    c = np.empty((m, n), np.float32)
    cla = Array(queue, a.shape, a.dtype)
    clb = Array(queue, b.shape, b.dtype)
    clc = Array(queue, c.shape, c.dtype)
    cla.set(a)
    clb.set(b)
    clc.set(c)

    # Perform sgemm on these matrices, overriding the CLBlast parameters. In this example, we'll
    # just change the 'MWG' parameter a couple of times:
    params = { "KWG": 32, "KWI": 2, "MDIMA": 8, "MDIMC": 8, "MWG": 64, "NDIMB": 8, "NDIMC": 8,
            "NWG": 64, "SA": 0, "SB": 0, "STRM": 0, "STRN": 0, "VWM": 4, "VWN": 1 }
    for mwg in (32, 64, 256):
        print("Running sgemm tuned with MWG = %d" % mwg)
        params["MWG"] = mwg
        pyclblast.override_parameters(ctx.devices[0], 'Xgemm', 32, params)
        pyclblast.gemm(queue, m, n, k, cla, clb, clc, a_ld=k, b_ld=n, c_ld=n)
示例#24
0
文件: opencl.py 项目: vincefn/pyvkfft
 def ifft(self, src: cla.Array, dest: cla.Array = None):
     """
     Compute the backward FFT
     :param src: the source GPUarray
     :param dest: the destination GPUarray. Should be None for an inplace transform
     :return: the transformed array. For a C2R inplace transform, the float view of the
         array is returned.
     """
     if self.inplace:
         if dest is not None:
             if src.data.int_ptr != dest.data.int_ptr:
                 raise RuntimeError(
                     "VkFFTApp.fft: dest!=src but this is an inplace transform"
                 )
         if self.batch_shape is not None:
             if self.r2c:
                 src_shape = tuple(
                     list(self.batch_shape[:-1]) +
                     [self.batch_shape[-1] // 2])
                 s = src.reshape(src_shape)
             else:
                 s = src.reshape(self.batch_shape)
         else:
             s = src
         _vkfft_opencl.ifft(self.app, int(s.data.int_ptr),
                            int(s.data.int_ptr), int(self.queue.int_ptr))
         if self.norm == "ortho":
             if self.precision == 2:
                 src *= np.float16(self._get_ifft_scale(norm=0))
             elif self.precision == 4:
                 src *= np.float32(self._get_ifft_scale(norm=0))
             elif self.precision == 8:
                 src *= np.float64(self._get_ifft_scale(norm=0))
         if self.r2c:
             if src.dtype == np.complex64:
                 return src.view(dtype=np.float32)
             elif src.dtype == np.complex128:
                 return src.view(dtype=np.float64)
         return src
     if not self.inplace:
         if dest is None:
             raise RuntimeError(
                 "VkFFTApp.ifft: dest is None but this is an out-of-place transform"
             )
         elif src.data.int_ptr == dest.data.int_ptr:
             raise RuntimeError(
                 "VkFFTApp.ifft: dest and src are identical but this is an out-of-place transform"
             )
         if self.r2c:
             assert (dest.size == src.size // src.shape[-1] * 2 *
                     (src.shape[-1] - 1))
             # Special case, src and dest buffer sizes are different,
             # VkFFT is configured to go back to the source buffer
             if self.batch_shape is not None:
                 src_shape = tuple(
                     list(self.batch_shape[:-1]) +
                     [self.batch_shape[-1] // 2 + 1])
                 s = src.reshape(src_shape)
                 d = dest.reshape(self.batch_shape)
             else:
                 s, d = src, dest
             _vkfft_opencl.ifft(self.app, int(d.data.int_ptr),
                                int(s.data.int_ptr),
                                int(self.queue.int_ptr))
         else:
             if self.batch_shape is not None:
                 s = src.reshape(self.batch_shape)
                 d = dest.reshape(self.batch_shape)
             else:
                 s, d = src, dest
             _vkfft_opencl.ifft(self.app, int(s.data.int_ptr),
                                int(d.data.int_ptr),
                                int(self.queue.int_ptr))
         if self.norm == "ortho":
             if self.precision == 2:
                 dest *= np.float16(self._get_ifft_scale(norm=0))
             elif self.precision == 4:
                 dest *= np.float32(self._get_ifft_scale(norm=0))
             elif self.precision == 8:
                 dest *= np.float64(self._get_ifft_scale(norm=0))
         return dest
示例#25
0
def to_ocl(a):
    cla = Array(queue, a.shape, a.dtype)
    cla.set(a)
    return cla
示例#26
0
from __future__ import print_function

import numpy as np
import pyopencl as cl
from pyopencl.array import Array
import pyopencl_blas as blas

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

# need to initialize the library
blas.setup()

dtype = 'float32'  # also supports 'float64'
x = np.array([1, 2, 3, 4], dtype=dtype)
y = np.array([4, 3, 2, 1], dtype=dtype)

clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
clx.set(x)
cly.set(y)

# call a BLAS function on the arrays
blas.axpy(queue, clx, cly, alpha=0.8)
print("Expected: ", 0.8 * x + y)
print("Actual:   ", cly.get())

# clean up the library when finished
blas.teardown()
示例#27
0
import pyclblast

# Settings for this sample
dtype = 'float32'
m, n = 4, 3
alpha = 1.0
beta = 0.0

print("# Setting up OpenCL")
ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

print("# Setting up Numpy arrays")
a = np.random.rand(m, n).astype(dtype=dtype)
x = np.random.rand(n).astype(dtype=dtype)
y = np.random.rand(m).astype(dtype=dtype)

print("# Setting up OpenCL arrays")
cla = Array(queue, a.shape, a.dtype)
clx = Array(queue, x.shape, x.dtype)
cly = Array(queue, y.shape, y.dtype)
cla.set(a)
clx.set(x)
cly.set(y)

print("# Example level-2 operation: GEMV")
pyclblast.gemv(queue, m, n, cla, clx, cly, a_ld=n, alpha=alpha, beta=beta)
queue.finish()
print("# Result for vector y: %s" % cly.get())
print("# Expected result:     %s" % (alpha * np.dot(a, x) + beta * y))
示例#28
0
A = np.zeros((n, n), dtype=dtype)
x = np.zeros(n, dtype=dtype)
x1 = np.zeros(n, dtype=dtype)
x2 = np.zeros(n, dtype=dtype)

rng = np.random.RandomState(1)  # change the seed to see different data
A[...] = rng.uniform(-1, 1, size=A.shape)
x[...] = rng.uniform(-1, 1, size=x.shape)
x1[...] = rng.uniform(-1, 1, size=x1.shape)
x2[...] = rng.uniform(-1, 1, size=x2.shape)

A_upper = np.triu(A)
A = np.tril(A)

# allocate OpenCL memory on the device
clA = Array(queue, A.shape, A.dtype)
clA_upper = Array(queue, A.shape, A.dtype)
clx = Array(queue, x.shape, x.dtype)
clx1 = Array(queue, x1.shape, x1.dtype)
clx2 = Array(queue, x2.shape, x2.dtype)

# copy data to device
clA.set(A)
clA_upper.set(A_upper)
clx.set(x)

# compute a triangular solve (trsv)
blas.trsv(queue, clA, clx)

# check the result
print("Expected: ", np.linalg.solve(A, x))