def init_module(): global context, context_wrapper if context_wrapper is not None: return log_sys_info() device_id, device = select_device() context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST) debug("testing with context=%s", context) debug("api version=%s", context.get_api_version()) free, total = driver.mem_get_info() debug("using device %s", device_info(device)) debug("memory: free=%sMB, total=%sMB", int(free / 1024 / 1024), int(total / 1024 / 1024)) context_wrapper = CudaContextWrapper(context) #generate kernel sources: for rgb_format, yuv_formats in COLORSPACES_MAP.items(): m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats) KERNELS_MAP.update(m) _kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()])) log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_)) #now, pre-compile the kernels: for src_format, dst_format in KERNELS_MAP.keys(): get_CUDA_kernel(device_id, src_format, dst_format) context.pop()
def init_module(): global context, context_wrapper if context_wrapper is not None: return log_sys_info() device_id, device = select_device() context = device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST) debug("testing with context=%s", context) debug("api version=%s", context.get_api_version()) free, total = driver.mem_get_info() debug("using device %s", device_info(device)) debug("memory: free=%sMB, total=%sMB", int(free/1024/1024), int(total/1024/1024)) context_wrapper = CudaContextWrapper(context) #generate kernel sources: for rgb_format, yuv_formats in COLORSPACES_MAP.items(): m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats) KERNELS_MAP.update(m) _kernel_names_ = sorted(set([x[0] for x in KERNELS_MAP.values()])) log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_)) #now, pre-compile the kernels: for src_format, dst_format in KERNELS_MAP.keys(): get_CUDA_kernel(device_id, src_format, dst_format) context.pop()
def gen_all_kernels(): """ Generates the source code for all the kernels. Returns a dictionary: * key: (src_format, dst_format) * value: (function_name, kernel_src) """ kernels = {} for rgb_format, yuv_formats in COLORSPACES_MAP.items(): m = gen_rgb_to_yuv_kernels(rgb_format, yuv_formats) kernels.update(m) _kernel_names_ = sorted(set([x[0] for x in kernels.values()])) log.info("%s csc_nvcuda kernels: %s", len(_kernel_names_), ", ".join(_kernel_names_)) return kernels