Exemplo n.º 1
0
Arquivo: memory.py Projeto: 2php/veles
 def ocl_map(self, flags):
     if isinstance(self.device, NumpyDevice):
         return
     if self._map_arr_ is not None:
         # already mapped properly, nothing to do
         if self.map_flags != cl.CL_MAP_READ or flags == cl.CL_MAP_READ:
             return
         self.ocl_unmap()
     if (flags == cl.CL_MAP_WRITE_INVALIDATE_REGION and
             self.device.device_info.version < 1.1999):
         # 'cause available only starting with 1.2
         flags = cl.CL_MAP_WRITE
     assert self.devmem is not None
     try:
         ev, self._map_arr_ = self.device.queue_.map_buffer(
             self.devmem, flags, self._mem.nbytes)
         del ev
     except cl.CLRuntimeError as err:
         self.error("Failed to map %d OpenCL bytes: %s(%d)",
                    self._mem.nbytes, str(err), err.code)
         raise
     if (int(cl.get_ffi().cast("size_t", self._map_arr_)) !=
             self._mem.__array_interface__["data"][0]):
         raise RuntimeError("map_buffer returned different pointer")
     self.map_flags = flags
Exemplo n.º 2
0
 def ocl_map(self, flags):
     if isinstance(self.device, NumpyDevice):
         return
     if self._map_arr_ is not None:
         # already mapped properly, nothing to do
         if self.map_flags != cl.CL_MAP_READ or flags == cl.CL_MAP_READ:
             return
         self.ocl_unmap()
     if (flags == cl.CL_MAP_WRITE_INVALIDATE_REGION
             and self.device.device_info.version < 1.1999):
         # 'cause available only starting with 1.2
         flags = cl.CL_MAP_WRITE
     assert self.devmem is not None
     try:
         ev, self._map_arr_ = self.device.queue_.map_buffer(
             self.devmem, flags, self._mem.nbytes)
         del ev
     except cl.CLRuntimeError as err:
         self.error("Failed to map %d OpenCL bytes: %s(%d)",
                    self._mem.nbytes, str(err), err.code)
         raise
     if (int(cl.get_ffi().cast("size_t", self._map_arr_)) !=
             self._mem.__array_interface__["data"][0]):
         raise RuntimeError("map_buffer returned different pointer")
     self.map_flags = flags
Exemplo n.º 3
0
 def test_svm_alloc(self):
     ctx = cl.Platforms().create_some_context()
     if ctx.devices[0].version < 2.0:
         return
     svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096)
     svm.release()
     self.assertIsNone(svm.handle)
     del svm
     svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096)
     prg = ctx.create_program("""
         __kernel void test(__global void *p) {
             __global int *ptr = (__global int*)p;
             *ptr += 1;
         }
         """,
                              options="-cl-std=CL2.0")
     krn = prg.get_kernel("test")
     krn.set_arg(0, svm)
     krn.set_arg_svm(0, svm)
     queue = ctx.create_queue(ctx.devices[0])
     queue.svm_map(svm, cl.CL_MAP_WRITE_INVALIDATE_REGION, 4)
     p = cl.get_ffi().cast("int*", svm.handle)
     p[0] = 2
     queue.svm_unmap(svm)
     queue.execute_kernel(krn, [1], None)
     queue.svm_map(svm, cl.CL_MAP_READ, 4)
     self.assertEqual(p[0], 3)
     # always ensure that the last unmap had completed before
     # the svm destructor
     queue.svm_unmap(svm).wait()
     try:
         import numpy
         a = numpy.frombuffer(svm.buffer, dtype=numpy.int32)
         queue.execute_kernel(krn, [1], None)
         queue.svm_map(svm, cl.CL_MAP_READ, 4)
         self.assertEqual(a[0], 4)
         queue.svm_unmap(svm).wait()
     except ImportError:
         pass
     del svm  # svm destructor here
Exemplo n.º 4
0
 def test_svm_alloc(self):
     ctx = cl.Platforms().create_some_context()
     if ctx.devices[0].version < 2.0:
         return
     svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096)
     svm.release()
     self.assertIsNone(svm.handle)
     del svm
     svm = ctx.svm_alloc(cl.CL_MEM_READ_WRITE, 4096)
     prg = ctx.create_program("""
         __kernel void test(__global void *p) {
             __global int *ptr = (__global int*)p;
             *ptr += 1;
         }
         """, options="-cl-std=CL2.0")
     krn = prg.get_kernel("test")
     krn.set_arg(0, svm)
     krn.set_arg_svm(0, svm)
     queue = ctx.create_queue(ctx.devices[0])
     queue.svm_map(svm, cl.CL_MAP_WRITE_INVALIDATE_REGION, 4)
     p = cl.get_ffi().cast("int*", svm.handle)
     p[0] = 2
     queue.svm_unmap(svm)
     queue.execute_kernel(krn, [1], None)
     queue.svm_map(svm, cl.CL_MAP_READ, 4)
     self.assertEqual(p[0], 3)
     # always ensure that the last unmap had completed before
     # the svm destructor
     queue.svm_unmap(svm).wait()
     try:
         import numpy
         a = numpy.frombuffer(svm.buffer, dtype=numpy.int32)
         queue.execute_kernel(krn, [1], None)
         queue.svm_map(svm, cl.CL_MAP_READ, 4)
         self.assertEqual(a[0], 4)
         queue.svm_unmap(svm).wait()
     except ImportError:
         pass
     del svm  # svm destructor here
Exemplo n.º 5
0
    def test_api_nonumpy(self):
        import math

        # Create platform, context, program, kernel and queue
        platforms = cl.Platforms()
        ctx = platforms.create_some_context()
        prg = ctx.create_program(self.src_test, self.include_dirs)
        krn = prg.get_kernel("test")
        # Create command queue
        queue = ctx.create_queue(ctx.devices[0])

        # Create arrays with some values for testing
        N = 100000
        ffi = cl.get_ffi()
        _a = ffi.new("float[]", N + queue.device.memalign)
        sz = int(ffi.cast("size_t", _a))
        if sz % queue.device.memalign != 0:
            sz += queue.device.memalign - (sz % queue.device.memalign)
            a = ffi.cast("float*", sz)
        else:
            a = _a
        _b = ffi.new("float[]", N + queue.device.memalign)
        sz = int(ffi.cast("size_t", _b))
        if sz % queue.device.memalign != 0:
            sz += queue.device.memalign - (sz % queue.device.memalign)
            b = ffi.cast("float*", sz)
        else:
            b = _b
        c = ffi.new("float[]", 1)
        c[0] = 1.2345
        d = ffi.new("float[]", N)
        sz = ffi.sizeof(d)
        for i, t in enumerate(d):
            a[i] = math.sin(i)
            b[i] = math.cos(i)
            d[i] = a[i] + b[i] * c[0]
        a_copy = ffi.new("float[]", N)
        a_copy[0:N] = a[0:N]

        # Create buffers
        a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR,
                               a,
                               size=sz)
        b_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR,
                               b,
                               size=sz)

        # Set kernel arguments
        krn.set_arg(0, a_)
        krn.set_arg(1, b_)
        krn.set_arg(2, ffi.cast("const void*", c), ffi.sizeof(c))

        # Execute kernel
        global_size = [N]
        local_size = None
        queue.execute_kernel(krn, global_size, local_size, need_event=False)

        # Get results back from the device by map_buffer
        ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz)
        del ev
        queue.unmap_buffer(a_, ptr).wait()
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(a[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after map_buffer")

        # Get results back from the device by read_buffer
        aa = ffi.new("float[]", N)
        queue.read_buffer(a_, aa, size=sz)
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(aa[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after read_buffer")

        # Refill buffer with stored copy by map_buffer with event
        ev, ptr = queue.map_buffer(
            a_,
            cl.CL_MAP_WRITE if queue.device.version < 1.1999 else
            cl.CL_MAP_WRITE_INVALIDATE_REGION,
            sz,
            blocking=False,
            need_event=True)
        ev.wait()
        a[0:N] = a_copy[0:N]
        ev = queue.unmap_buffer(a_, ptr)

        # Execute kernel
        ev = queue.execute_kernel(krn,
                                  global_size,
                                  local_size,
                                  wait_for=(ev, ))
        # Get results back from the device by map_buffer
        ev, ptr = queue.map_buffer(a_,
                                   cl.CL_MAP_READ,
                                   sz,
                                   wait_for=(ev, ),
                                   need_event=True)
        ev.wait()
        queue.unmap_buffer(a_, ptr).wait()
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(a[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after map_buffer")

        # Refill buffer with stored copy by write_buffer
        ev = queue.write_buffer(a_,
                                a_copy,
                                size=sz,
                                blocking=False,
                                need_event=True)

        # Execute kernel
        ev = queue.execute_kernel(krn,
                                  global_size,
                                  local_size,
                                  wait_for=(ev, ))
        # Get results back from the device by map_buffer
        ev, ptr = queue.map_buffer(a_,
                                   cl.CL_MAP_READ,
                                   sz,
                                   wait_for=(ev, ),
                                   need_event=True)
        ev.wait()
        queue.unmap_buffer(a_, ptr).wait()
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(a[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after map_buffer")

        del _b
        del _a
Exemplo n.º 6
0
    def test_api_nonumpy(self):
        import math

        # Create platform, context, program, kernel and queue
        platforms = cl.Platforms()
        ctx = platforms.create_some_context()
        prg = ctx.create_program(self.src_test, self.include_dirs)
        krn = prg.get_kernel("test")
        # Create command queue
        queue = ctx.create_queue(ctx.devices[0])

        # Create arrays with some values for testing
        N = 100000
        ffi = cl.get_ffi()
        _a = ffi.new("float[]", N + queue.device.memalign)
        sz = int(ffi.cast("size_t", _a))
        if sz % queue.device.memalign != 0:
            sz += queue.device.memalign - (sz % queue.device.memalign)
            a = ffi.cast("float*", sz)
        else:
            a = _a
        _b = ffi.new("float[]", N + queue.device.memalign)
        sz = int(ffi.cast("size_t", _b))
        if sz % queue.device.memalign != 0:
            sz += queue.device.memalign - (sz % queue.device.memalign)
            b = ffi.cast("float*", sz)
        else:
            b = _b
        c = ffi.new("float[]", 1)
        c[0] = 1.2345
        d = ffi.new("float[]", N)
        sz = ffi.sizeof(d)
        for i, t in enumerate(d):
            a[i] = math.sin(i)
            b[i] = math.cos(i)
            d[i] = a[i] + b[i] * c[0]
        a_copy = ffi.new("float[]", N)
        a_copy[0:N] = a[0:N]

        # Create buffers
        a_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR,
                               a, size=sz)
        b_ = ctx.create_buffer(cl.CL_MEM_READ_WRITE | cl.CL_MEM_USE_HOST_PTR,
                               b, size=sz)

        # Set kernel arguments
        krn.set_arg(0, a_)
        krn.set_arg(1, b_)
        krn.set_arg(2, ffi.cast("const void*", c), ffi.sizeof(c))

        # Execute kernel
        global_size = [N]
        local_size = None
        queue.execute_kernel(krn, global_size, local_size, need_event=False)

        # Get results back from the device by map_buffer
        ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz)
        del ev
        queue.unmap_buffer(a_, ptr).wait()
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(a[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after map_buffer")

        # Get results back from the device by read_buffer
        aa = ffi.new("float[]", N)
        queue.read_buffer(a_, aa, size=sz)
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(aa[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after read_buffer")

        # Refill buffer with stored copy by map_buffer with event
        ev, ptr = queue.map_buffer(
            a_, cl.CL_MAP_WRITE if queue.device.version < 1.1999
            else cl.CL_MAP_WRITE_INVALIDATE_REGION, sz,
            blocking=False, need_event=True)
        ev.wait()
        a[0:N] = a_copy[0:N]
        ev = queue.unmap_buffer(a_, ptr)

        # Execute kernel
        ev = queue.execute_kernel(krn, global_size, local_size, wait_for=(ev,))
        # Get results back from the device by map_buffer
        ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz,
                                   wait_for=(ev,), need_event=True)
        ev.wait()
        queue.unmap_buffer(a_, ptr).wait()
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(a[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after map_buffer")

        # Refill buffer with stored copy by write_buffer
        ev = queue.write_buffer(a_, a_copy, size=sz,
                                blocking=False, need_event=True)

        # Execute kernel
        ev = queue.execute_kernel(krn, global_size, local_size, wait_for=(ev,))
        # Get results back from the device by map_buffer
        ev, ptr = queue.map_buffer(a_, cl.CL_MAP_READ, sz,
                                   wait_for=(ev,), need_event=True)
        ev.wait()
        queue.unmap_buffer(a_, ptr).wait()
        mx = 0
        for i, t in enumerate(d):
            mx = max(mx, math.fabs(a[i] - t))
        self.assertLess(mx, 0.0001, "Incorrect result after map_buffer")

        del _b
        del _a