Пример #1
0
def arange(ctx, *args, **kwargs):
    '''
    
    '''
    start = 0.0
    step = 1.0
    
    if len(args) == 1:
        stop = args[0]  
    elif len(args) == 2:
        start = args[0]
        stop = args[1]
    elif len(args) == 3:
        start = args[0]
        stop = args[1]
        step = args[2]
    else:
        raise Exception("wrong number of arguments expected between 2-4 (got %i)" % (len(args) + 1))
    
    size = int(math.ceil((stop - start) / float(step)))
    
    ctype = kwargs.get('ctype', 'f')
    
    queue = kwargs.get('queue', None)
    if queue is None:
        queue = cl.Queue(ctx) 

    arr = empty(ctx, [size], ctype=ctype, queue=queue)
    
    _arange(queue, arr, start, step)
    
    return arr
Пример #2
0
 def __init__(self, context, queue=None):
     self._context = context
     self.events = []
     
     if queue is None:
         queue = cl.Queue(context)
         
     self.queue = queue
Пример #3
0
def main():
    ctx = cl.Context()
    a = cl.empty(ctx, [256], cly.float2)

    queue = cl.Queue(ctx)

    generate_sin(queue, a)

    with a.map(queue) as view:
        array = np.asarray(view)
        print array
Пример #4
0
def linspace(ctx, start, stop, num=50, ctype='f', queue=None):
    '''
    
    '''
    
    if queue is None:
        queue = cl.Queue(ctx) 

    arr = empty(ctx, [num], ctype=ctype, queue=queue)
    _linspace(queue, arr, float(start), float(stop))
    
    return arr
Пример #5
0
    def test_broadcast_0D(self):

        with self.assertRaises(TypeError):
            cl.broadcast(None, [1])

        one = cl.from_host(ctx, c_int(1))

        a = cl.broadcast(one, [10, 10])
        self.assertEqual(a.shape, (10, 10))
        self.assertEqual(a.strides, (0, 0))

        queue = cl.Queue(ctx)
        with a.map(queue) as view:
            b = np.asarray(view)
            self.assertEqual(b.shape, (10, 10))
            self.assertEqual(b.strides, (0, 0))
Пример #6
0
def main():
    ctx = cl.Context(device_type=cl.Device.GPU)
    queue = cl.Queue(ctx)

    npa = np.arange(1.0 * 12.0, dtype=c_float)
    a = ca.arange(ctx, 12, ctype=c_float)

    out = ca.empty_like(a[:])
    output = cl.broadcast(out, a[:].shape)

    ca.blitz(queue, lambda: a[:] + a[:] + 1, out=output)

    print npa[1:] + npa[:-1]

    with out.map() as view:
        print view
Пример #7
0
def initialize():
    global coords_dev, n_vertices

    ctx = cl.gl.context()

    coords_dev = cl.gl.empty_gl(ctx, [n_vertices], ctype=float2)

    glClearColor(1, 1, 1, 1)
    glColor(0, 0, 1)

    queue = cl.Queue(ctx)

    with cl.gl.acquire(queue, coords_dev):
        generate_sin(queue, coords_dev)

    glEnableClientState(GL_VERTEX_ARRAY)
Пример #8
0
def main():

    ctx = cl.Context(device_type=cl.Device.GPU)

    ret = cl.empty(ctx, [16], 'l')

    queue = cl.Queue(ctx)

    print setslice.compile(ctx,
                           a=cl.global_memory('l'),
                           value=c_int,
                           source_only=True)

    #    print setslice(queue, ret[::2], c_int(6))
    #    print setslice(queue, ret[1::2], c_int(5))

    with ret.map(queue) as foo:
        print np.asarray(foo)
Пример #9
0
    def test_broadcast_2D(self):

        with self.assertRaises(TypeError):
            cl.broadcast(None, [1])

        npa = np.arange(10, dtype=c_float)
        z = np.zeros([10, 1])

        ten = cl.from_host(ctx, npa)

        a = cl.broadcast(ten, [10, 10])
        self.assertEqual(a.shape, (10, 10))
        self.assertEqual(a.strides, (0, sizeof(c_float)))

        queue = cl.Queue(ctx)
        with a.map(queue) as view:
            b = np.asarray(view)
            self.assertEqual(b.shape, (10, 10))
            self.assertEqual(b.strides, (0, sizeof(c_float)))
            self.assertTrue(np.all(b == z + npa))
Пример #10
0
    def test_from_host_no_copy(self):

        a = np.array([[1, 2], [3, 4]])

        refcount = sys.getrefcount(a)

        clmem = cl.from_host(ctx, a, copy=False)

        #        event = PyEvent()
        #        def set_event(mem):
        #            event.set()

        #        clmem.add_destructor_callback(set_event)

        self.assertEqual(refcount + 1, sys.getrefcount(a))

        del clmem
        gc.collect()

        #        self.assertTrue(event.wait(1), 'event timed out. destructor_callback not called')

        self.assertEqual(refcount, sys.getrefcount(a))

        clmem = cl.from_host(ctx, a, copy=False)

        view_a = memoryview(a)

        self.assertEqual(clmem.format, view_a.format)
        self.assertEqual(clmem.shape, view_a.shape)
        self.assertEqual(clmem.strides, view_a.strides)

        queue = cl.Queue(ctx)

        if queue.device.host_unified_memory:
            a[0, 0] = 100
            with clmem.map(queue) as view:
                b = np.asarray(view)
                self.assertEqual(b[0, 0], 100)
        else:
            #TODO: should there be a test here?
            pass
Пример #11
0
 def __call__(self, queue_or_context, *args, **kwargs):
     '''
     Call this kernel as a function.
     
     :param queue_or_context: a queue or context. if this is a context a queue is created and finish is called before return.
     
     :return: an OpenCL event.
     '''
     if isinstance(queue_or_context, cl.Context):
         queue = cl.Queue(queue_or_context)
     else:
         queue = queue_or_context
          
     argnames = self.func.func_code.co_varnames[:self.func.func_code.co_argcount]
     defaults = self.func.func_defaults
     
     kwargs_ = kwargs.copy()
     kwargs_.pop('global_work_size', None)
     kwargs_.pop('global_work_offset', None)
     kwargs_.pop('local_work_size', None)
     
     arglist = cl.kernel.parse_args(self.func.__name__, args, kwargs_, argnames, defaults)
     
     kwarg_types = {argnames[i]:typeof(queue.context, arglist[i]) for i in range(len(argnames))}
     
     cl_kernel = self.compile(queue.context, **kwarg_types)
     
     kernel_args = self._unpack(argnames, arglist, kwarg_types)
         
     event = self.run_kernel(cl_kernel, queue, kernel_args, kwargs)
     
     #FIXME: I don't like that this breaks encapsulation
     if isinstance(event, EventRecord):
         event.set_kernel_args(kernel_args)
         
     if isinstance(queue_or_context, cl.Context):
         queue.finish()
     
     return event
Пример #12
0
def main():

    size = 10
    a = np.random.rand(size).astype('f')
    b = np.random.rand(size).astype('f')

    ctx = cl.Context()
    queue = cl.Queue(ctx)

    cla = cl.from_host(ctx, a, copy=True)
    clb = cl.from_host(ctx, b, copy=True)
    clc = cl.empty(ctx, [size], ctype='f')

    prg = cl.Program(
        ctx, """
        __kernel void add(__global const float *a,
        __global const float *b, __global float *c)
        {
          int gid = get_global_id(0);
          c[gid] = a[gid] + b[gid];
        }
        """).build()

    add = prg.add
    add.argtypes = cl.global_memory('f'), cl.global_memory(
        'f'), cl.global_memory('f')
    add.argnames = 'a', 'b', 'c'
    add.global_work_size = lambda a: a.shape

    add(queue, a=cla, b=clb, c=clc)

    with clc.map(queue) as view:
        print "view is a python memoryview object", view

        arr = np.asarray(view)

        print "Answer should be zero:"
        print(arr - (a + b)).sum()
Пример #13
0
def main():
    
    ctx = cl.Context(device_type=cl.Device.GPU)
    queue = cl.Queue(ctx)
    
    host_init = np.arange(8, dtype=c_float) + 1
    device_input = cl.from_host(ctx, host_init)
    
    output = ca.reduce(queue, lambda a, b: a + b, device_input)
    
    print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "
    print "data:", host_init
    print "-- -- -- -- -- -- -- -- -- -- -- -- -- -- -- "
    print "host   sum:", host_init.sum()
    
    with output.map(queue) as view:
        print "device sum:", np.asarray(view).item()

    output = ca.reduce(queue, lambda a, b: a * b, device_input, initial=1.0)
    
    print "host   product:", host_init.prod()
    
    with output.map(queue) as view:
        print "device product:", np.asarray(view).item()
Пример #14
0
def initialize():
    global generate_sin, coords_dev, n_vertices

    ctx = cl.gl.context()

    if generate_sin is None:
        program = cl.Program(ctx, generate_sin_source).build()
        generate_sin = program.generate_sin

        generate_sin.argnames = 'a',
        generate_sin.argtypes = cl.global_memory(cl.cl_float2),
        generate_sin.global_work_size = lambda a: a.shape

    coords_dev = cl.gl.empty_gl(ctx, [n_vertices], ctype=cl.cl_float2)

    glClearColor(1, 1, 1, 1)
    glColor(0, 0, 1)

    queue = cl.Queue(ctx)

    with cl.gl.acquire(queue, coords_dev):
        generate_sin(queue, coords_dev)

    glEnableClientState(GL_VERTEX_ARRAY)
Пример #15
0
    gid = clrt.get_global_id(0)
    n = clrt.get_global_size(0)

    r = c_float(gid) / c_float(n)

    # sin wave with 8 peaks
    y = r * c_float(16.0 * 3.1415)

    # x is a range from -1 to 1
    a[gid].x = r * 2.0 - 1.0

    # y is sin wave
    a[gid].y = clrt.native_sin(y)


queue = cl.Queue(ctx)

a = cl.empty(ctx, [200], cly.float2)

event = generate_sin(queue, a)

event.wait()

print a
with a.map(queue) as view:
    print np.asarray(view)

#===============================================================================
# Plotting
#===============================================================================
from maka import roo
Пример #16
0
    def __init__(self, *args, **kwargs):
        cl.Context.__init__(self, *args, **kwargs)

        self._queue = cl.Queue(self)