def main_ufunc(): ctx = cl.Context(device_type=cl.Device.GPU) size = 10 a = ca.arange(ctx, size, ctype=c_float) b = ca.arange(ctx, size, ctype=c_float).reshape([size, 1]) o1 = add(a, b) with o1.map() as view: print view with a.map() as view: print np.sum(view) result = add.reduce(a) result.queue.finish() with a.map() as view: print view print view.sum() print result.item()
def setUpModule(): global ctx DEVICE_TYPE_ATTR = os.environ.get('DEVICE_TYPE', 'DEFAULT') DEVICE_TYPE = getattr(cl.Device, DEVICE_TYPE_ATTR) ctx = cl.Context(device_type=DEVICE_TYPE) print(ctx.devices)
def main(): ctx = cl.Context() a = cl.empty(ctx, [256], cly.float2) queue = cl.Queue(ctx) generate_sin(queue, a) with a.map(queue) as view: array = np.asarray(view) print array
def main_reduce(): ctx = cl.Context(device_type=cl.Device.GPU) sum = add.reduce # for size in range(250, 258): size = 1027 a = ca.arange(ctx, size, ctype=cl.cl_int) result = sum(a) with a.map() as view: print size, view.sum(), result.item()
def main(): ctx = cl.Context(device_type=cl.Device.GPU) queue = cl.Queue(ctx) npa = np.arange(1.0 * 12.0, dtype=c_float) a = ca.arange(ctx, 12, ctype=c_float) out = ca.empty_like(a[:]) output = cl.broadcast(out, a[:].shape) ca.blitz(queue, lambda: a[:] + a[:] + 1, out=output) print npa[1:] + npa[:-1] with out.map() as view: print view
def main(): ctx = cl.Context(device_type=cl.Device.GPU) ret = cl.empty(ctx, [16], 'l') queue = cl.Queue(ctx) print setslice.compile(ctx, a=cl.global_memory('l'), value=c_int, source_only=True) # print setslice(queue, ret[::2], c_int(6)) # print setslice(queue, ret[1::2], c_int(5)) with ret.map(queue) as foo: print np.asarray(foo)
def main(): size = 10 a = np.random.rand(size).astype('f') b = np.random.rand(size).astype('f') ctx = cl.Context() queue = cl.Queue(ctx) cla = cl.from_host(ctx, a, copy=True) clb = cl.from_host(ctx, b, copy=True) clc = cl.empty(ctx, [size], ctype='f') prg = cl.Program( ctx, """ __kernel void add(__global const float *a, __global const float *b, __global float *c) { int gid = get_global_id(0); c[gid] = a[gid] + b[gid]; } """).build() add = prg.add add.argtypes = cl.global_memory('f'), cl.global_memory( 'f'), cl.global_memory('f') add.argnames = 'a', 'b', 'c' add.global_work_size = lambda a: a.shape add(queue, a=cla, b=clb, c=clc) with clc.map(queue) as view: print "view is a python memoryview object", view arr = np.asarray(view) print "Answer should be zero:" print(arr - (a + b)).sum()
def main(): ctx = cl.Context(device_type=cl.Device.GPU) queue = cl.Queue(ctx) host_init = np.arange(8, dtype=c_float) + 1 device_input = cl.from_host(ctx, host_init) output = ca.reduce(queue, lambda a, b: a + b, device_input) print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- " print "data:", host_init print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- " print "host sum:", host_init.sum() with output.map(queue) as view: print "device sum:", np.asarray(view).item() output = ca.reduce(queue, lambda a, b: a * b, device_input, initial=1.0) print "host product:", host_init.prod() with output.map(queue) as view: print "device product:", np.asarray(view).item()
def main(): import opencl as cl ctx = cl.Context(device_type=cl.Device.GPU) a = arange(ctx, 10.0)
''' Created on Dec 15, 2011 @author: sean ''' import opencl as cl import clyther as cly import clyther.runtime as clrt #Always have to create a context. ctx = cl.Context() @cly.global_work_size(lambda a: [a.size]) @cly.kernel def generate_sin(a): gid = clrt.get_global_id(0) n = clrt.get_global_size(0) r = cl.cl_float(gid) / cl.cl_float(n) # sin wave with 8 peaks y = r * cl.cl_float(16.0 * 3.1415) # x is a range from -1 to 1 a[gid].x = r * 2.0 - 1.0 # y is sin wave
# IPython log file import numpy as np import opencl as cl from PySide.QtGui import * from PySide.QtOpenGL import * from OpenGL import GL app = QApplication([]) qgl = QGLWidget() qgl.makeCurrent() props = cl.ContextProperties() cl.gl.set_opengl_properties(props) ctx = cl.Context(device_type=cl.Device.DEFAULT, properties=props) #print cl.ImageFormat.supported_formats(ctx) print ctx.devices view = cl.gl.empty_gl(ctx, [10], ctype='ff') view2 = cl.empty(ctx, [10], ctype='ff') view.shape print view queue = cl.Queue(ctx) with cl.gl.acquire(queue, view), view.map(queue) as buffer: print np.asarray(buffer)
import clyther as cly import opencl as cl import clyther.runtime as clrt @cly.global_work_size(lambda a: a.shape) @cly.kernel def foo(a): x = clrt.get_global_id(0) y = clrt.get_global_id(1) a[x, y] = x + y * 100 ctx = cl.Context(device_type=cl.Device.CPU) queue = cl.Queue(ctx) a = cl.empty(ctx, [4, 4], 'f') foo(queue, a) print foo._compile(ctx, a=cl.global_memory('f'), source_only=True) import numpy as np with a.map(queue) as view: print np.asarray(view)