def test_ctype(self): a = cl.empty(ctx, [2], cl.cl_float2) b = a[1:] self.assertIs(a.ctype, b.ctype)
def test_call(self): expected = np.zeros([10], dtype=[('x', np.float32), ('y', np.float32)]) expected['x'] = np.arange(10) expected['y'] = np.sin(expected['x'] / 10) program = Program(ctx, source=source) program.build() generate_sin = program.kernel('generate_sin') generate_sin.argtypes = [global_memory(), ctypes.c_float] buf = empty(ctx, [10], ctype=cl.cl_float2) queue = Queue(ctx, ctx.devices[0]) size = [buf.size] with self.assertRaises(TypeError): generate_sin(queue, buf, 1.0) generate_sin(queue, buf, 1.0, global_work_size=size) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1])) generate_sin.global_work_size = lambda a, scale: [a.size] generate_sin(queue, buf, 1.0) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))
def main(): ctx = cl.Context() a = cl.empty(ctx, [256], cly.float2) queue = cl.Queue(ctx) generate_sin(queue, a) with a.map(queue) as view: array = np.asarray(view) print array
def test_size(self): buf = empty(ctx, [4]) self.assertEqual(buf._refcount, 1) self.assertEqual(len(buf), 4 / buf.itemsize) self.assertEqual(buf.mem_size, 4) layout = buf.array_info self.assertEqual(layout[:4], [4, 0, 0, 4]) #shape self.assertEqual(layout[4:], [1, 0, 0, 0]) #strides
def main(): ctx = cl.Context(device_type=cl.Device.GPU) ret = cl.empty(ctx, [16], "l") queue = cl.Queue(ctx) print setslice.compile(ctx, a=cl.global_memory("l"), value=c_int, source_only=True) # print setslice(queue, ret[::2], c_int(6)) # print setslice(queue, ret[1::2], c_int(5)) with ret.map(queue) as foo: print np.asarray(foo)
def __call__(self, x, out=None, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) if out is None: out = cl.empty(queue.context, x.shape, x.format) unary_ufunc_kernel(queue, self.device_func, x, out) array = CLArray._view_as_this(out) array.__array_init__(queue) return array
def reduce(self, context, x, out=None, initial=0.0, queue=None): if queue is None: queue = x.queue if not isinstance(x, cl.DeviceMemoryView): x = cl.from_host(queue.context, x) #output, input, shared, group_size, initial=0.0 size = x.size shared = cl.local_memory(x.ctype, ndim=1, shape=[size]) group_size = size // 2 for item in [2, 4, 8, 16, 32, 64, 128, 256, 512]: if group_size < item: group_size = item // 2 break else: group_size = 512 if out is None: out = cl.empty(queue.context, [1], x.format) kernel = reduce_kernel.compile(queue.context, function=self.device_func, output=cl.global_memory(out.ctype, flat=True), array=cl.global_memory(x.ctype, flat=True), shared=shared, group_size=cl.cl_uint, cly_meta=self.device_func.func_name) max_wgsize = kernel.work_group_size(queue.device) group_size = min(max_wgsize, group_size) kernel(queue, out, out.array_info, x, x.array_info, shared, shared.local_info, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) # reduce_kernel(queue, self.device_func, out, x, shared, group_size) array = CLArray._view_as_this(out) array.__array_init__(context, queue) return array
def main(): ctx = cl.Context(device_type=cl.Device.GPU) ret = cl.empty(ctx, [16], 'l') queue = cl.Queue(ctx) print setslice.compile(ctx, a=cl.global_memory('l'), value=c_int, source_only=True) # print setslice(queue, ret[::2], c_int(6)) # print setslice(queue, ret[1::2], c_int(5)) with ret.map(queue) as foo: print np.asarray(foo)
def test_set_args(self): program = Program(ctx, source=source) program.build() generate_sin = program.kernel('generate_sin') generate_sin.argtypes = [global_memory(), ctypes.c_float] buf = empty(ctx, [10], ctype=cl.cl_float2) queue = Queue(ctx, ctx.devices[0]) generate_sin.set_args(buf, 1.0) queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size]) expected = np.zeros([10], dtype=[('x', np.float32), ('y', np.float32)]) expected['x'] = np.arange(10) expected['y'] = np.sin(expected['x'] / 10) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1])) generate_sin.argnames = ['a', 'scale'] generate_sin.set_args(a=buf, scale=1.0) queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size]) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1])) with self.assertRaises(TypeError): generate_sin.set_args(a=buf) generate_sin.__defaults__ = [1.0] generate_sin.set_args(a=buf) queue.enqueue_nd_range_kernel(generate_sin, 1, global_work_size=[buf.size]) with buf.map(queue) as host: self.assertTrue(np.all(expected['x'] == np.asarray(host)[:, 0])) self.assertTrue(np.allclose(expected['y'], np.asarray(host)[:, 1]))
def reduce(queue, function, input, initial=0.0): ''' reduce(queue, function, sequence[, initial]) -> value Apply a function of two arguments cumulatively to the items of a sequence, from left to right, so as to reduce the sequence to a single value. For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates ((((1+2)+3)+4)+5). If initial is present, it is placed before the items of the sequence in the calculation, and serves as a default when the sequence is empty. ''' size = input.size shared = cl.local_memory(input.format, [size]) output = cl.empty(queue.context, [1], input.format) group_size = size // 2 cl_reduce(queue, function, output, input , shared, group_size, initial) return output
def reduce(queue, function, input, initial=0.0): ''' reduce(queue, function, sequence[, initial]) -> value Apply a function of two arguments cumulatively to the items of a sequence, from left to right, so as to reduce the sequence to a single value. For example, reduce(lambda x, y: x+y, [1, 2, 3, 4, 5]) calculates ((((1+2)+3)+4)+5). If initial is present, it is placed before the items of the sequence in the calculation, and serves as a default when the sequence is empty. ''' size = input.size shared = cl.local_memory(input.format, [size]) output = cl.empty(queue.context, [1], input.format) group_size = size // 2 cl_reduce(queue, function, output, input, shared, group_size, initial) return output
def main(): size = 10 a = np.random.rand(size).astype('f') b = np.random.rand(size).astype('f') ctx = cl.Context() queue = cl.Queue(ctx) cla = cl.from_host(ctx, a, copy=True) clb = cl.from_host(ctx, b, copy=True) clc = cl.empty(ctx, [size], ctype='f') prg = cl.Program( ctx, """ __kernel void add(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() add = prg.add add.argtypes = cl.global_memory('f'), cl.global_memory( 'f'), cl.global_memory('f') add.argnames = 'a', 'b', 'c' add.global_work_size = lambda a: a.shape add(queue, a=cla, b=clb, c=clc) with clc.map(queue) as view: print "view is a python memoryview object", view arr = np.asarray(view) print "Answer should be zero:" print(arr - (a + b)).sum()
def main(): size = 10 a = np.random.rand(size).astype('f') b = np.random.rand(size).astype('f') ctx = cl.Context() queue = cl.Queue(ctx) cla = cl.from_host(ctx, a, copy=True) clb = cl.from_host(ctx, b, copy=True) clc = cl.empty(ctx, [size], ctype='f') prg = cl.Program(ctx, """ __kernel void add(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() add = prg.add add.argtypes = cl.global_memory('f'), cl.global_memory('f'), cl.global_memory('f') add.argnames = 'a', 'b', 'c' add.global_work_size = lambda a: a.shape add(queue, a=cla, b=clb, c=clc) with clc.map(queue) as view: print "view is a python memoryview object", view arr = np.asarray(view) print "Answer should be zero:" print (arr - (a + b)).sum()
import clyther as cly import opencl as cl import clyther.runtime as clrt @cly.global_work_size(lambda a: a.shape) @cly.kernel def foo(a): x = clrt.get_global_id(0) y = clrt.get_global_id(1) a[x, y] = x + y * 100 ctx = cl.Context(device_type=cl.Device.CPU) queue = cl.Queue(ctx) a = cl.empty(ctx, [4, 4], 'f') foo(queue, a) print foo._compile(ctx, a=cl.global_memory('f'), source_only=True) import numpy as np with a.map(queue) as view: print np.asarray(view)
def empty(context, shape, ctype='f', cls=CLArray, queue=None): out = cl.empty(context, shape, ctype) array = cls._view_as_this(out) array.__array_init__(context, queue) return array
r = c_float(gid) / c_float(n) # sin wave with 8 peaks y = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = clrt.native_sin(y) queue = cl.Queue(ctx) a = cl.empty(ctx, [200], cly.float2) event = generate_sin(queue, a) event.wait() print a with a.map(queue) as view: print np.asarray(view) #=============================================================================== # Plotting #=============================================================================== from maka import roo ctx = roo.start()
from OpenGL import GL app = QApplication([]) qgl = QGLWidget() qgl.makeCurrent() props = cl.ContextProperties() cl.gl.set_opengl_properties(props) ctx = cl.Context(device_type=cl.Device.DEFAULT, properties=props) #print cl.ImageFormat.supported_formats(ctx) print ctx.devices view = cl.gl.empty_gl(ctx, [10], ctype='ff') view2 = cl.empty(ctx, [10], ctype='ff') view.shape print view queue = cl.Queue(ctx) with cl.gl.acquire(queue, view), view.map(queue) as buffer: print np.asarray(buffer) print print 'cl.gl.is_gl_object: view2', cl.gl.is_gl_object(view2) print 'cl.gl.is_gl_object: view ', cl.gl.is_gl_object(view) print 'cl.gl.get_gl_name', cl.gl.get_gl_name(view) print
n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) # sin wave with 8 peaks y = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = clrt.native_sin(y) queue = cl.Queue(ctx) a = cl.empty(ctx, [200], cly.float2) event = generate_sin(queue, a) event.wait() print a with a.map(queue) as view: print np.asarray(view) #=============================================================================== # Plotting #=============================================================================== from maka import roo ctx = roo.start()
@cly.global_work_size(lambda a: a.shape) @cly.kernel def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) # sin wave with 8 oscillations y = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = sin(y) queue = cl.Queue(ctx) a = cl.empty(ctx, [200], cl.cl_float2) event = generate_sin(queue, a) event.wait() print a with a.map(queue) as view: print np.asarray(view)
ctx = cl.Context() @cly.global_work_size(lambda a: a.shape) @cly.kernel def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = c_float(gid) / c_float(n) # sin wave with 8 oscillations y = r * c_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave a[gid].y = sin(y) queue = cl.Queue(ctx) a = cl.empty(ctx, [200], cl.cl_float2) event = generate_sin(queue, a) event.wait() print a with a.map(queue) as view: print np.asarray(view)