def test_cuda_driver_basic(self): module = self.context.create_module_ptx(self.ptx) function = module.get_function('_Z10helloworldPi') array = (c_int * 100)() memory = self.context.memalloc(sizeof(array)) host_to_device(memory, array, sizeof(array)) launch_kernel( function.handle, # Kernel 1, 1, 1, # gx, gy, gz 100, 1, 1, # bx, by, bz 0, # dynamic shared mem 0, # stream [memory]) # arguments device_to_host(array, memory, sizeof(array)) for i, v in enumerate(array): self.assertEqual(i, v) module.unload()
def test_cuda_driver_basic(self): module = self.context.create_module_ptx(self.ptx) function = module.get_function('_Z10helloworldPi') array = (c_int * 100)() memory = self.context.memalloc(sizeof(array)) host_to_device(memory, array, sizeof(array)) ptr = memory.device_ctypes_pointer stream = 0 if _driver.USE_NV_BINDING: ptr = c_void_p(int(ptr)) stream = _driver.binding.CUstream(stream) launch_kernel( function.handle, # Kernel 1, 1, 1, # gx, gy, gz 100, 1, 1, # bx, by, bz 0, # dynamic shared mem stream, # stream [ptr]) # arguments device_to_host(array, memory, sizeof(array)) for i, v in enumerate(array): self.assertEqual(i, v) module.unload()
def test_cuda_driver_stream_operations(self): module = self.context.create_module_ptx(self.ptx) function = module.get_function('_Z10helloworldPi') array = (c_int * 100)() stream = self.context.create_stream() with stream.auto_synchronize(): memory = self.context.memalloc(sizeof(array)) host_to_device(memory, array, sizeof(array), stream=stream) launch_kernel( function.handle, # Kernel 1, 1, 1, # gx, gy, gz 100, 1, 1, # bx, by, bz 0, # dynamic shared mem stream.handle, # stream [memory]) # arguments device_to_host(array, memory, sizeof(array), stream=stream) for i, v in enumerate(array): self.assertEqual(i, v)
def launch(self, args, griddim, blockdim, stream=0, sharedmem=0): # Prepare kernel cufunc = self._codelibrary.get_cufunc() if self.debug: excname = cufunc.name + "__errcode__" excmem, excsz = cufunc.module.get_global_symbol(excname) assert excsz == ctypes.sizeof(ctypes.c_int) excval = ctypes.c_int() excmem.memset(0, stream=stream) # Prepare arguments retr = [] # hold functors for writeback kernelargs = [] for t, v in zip(self.argument_types, args): self._prepare_args(t, v, stream, retr, kernelargs) if driver.USE_NV_BINDING: zero_stream = driver.binding.CUstream(0) else: zero_stream = None stream_handle = stream and stream.handle or zero_stream # Invoke kernel driver.launch_kernel(cufunc.handle, *griddim, *blockdim, sharedmem, stream_handle, kernelargs, cooperative=self.cooperative) if self.debug: driver.device_to_host(ctypes.addressof(excval), excmem, excsz) if excval.value != 0: # An error occurred def load_symbol(name): mem, sz = cufunc.module.get_global_symbol( "%s__%s__" % (cufunc.name, name)) val = ctypes.c_int() driver.device_to_host(ctypes.addressof(val), mem, sz) return val.value tid = [load_symbol("tid" + i) for i in 'zyx'] ctaid = [load_symbol("ctaid" + i) for i in 'zyx'] code = excval.value exccls, exc_args, loc = self.call_helper.get_exception(code) # Prefix the exception message with the source location if loc is None: locinfo = '' else: sym, filepath, lineno = loc filepath = os.path.abspath(filepath) locinfo = 'In function %r, file %s, line %s, ' % ( sym, filepath, lineno, ) # Prefix the exception message with the thread position prefix = "%stid=%s ctaid=%s" % (locinfo, tid, ctaid) if exc_args: exc_args = ("%s: %s" % (prefix, exc_args[0]),) + \ exc_args[1:] else: exc_args = prefix, raise exccls(*exc_args) # retrieve auto converted arrays for wb in retr: wb()