def create_kernel_instance(self, kernel_options, params, verbose): """create kernel instance from kernel source, parameters, problem size, grid divisors, and so on""" instance_string = util.get_instance_string(params) grid_div = (kernel_options.grid_div_x, kernel_options.grid_div_y, kernel_options.grid_div_z) #setup thread block and grid dimensions threads, grid = util.setup_block_and_grid( kernel_options.problem_size, grid_div, params, kernel_options.block_size_names) if numpy.prod(threads) > self.dev.max_threads: if verbose: print("skipping config", instance_string, "reason: too many threads per block") return None #obtain the kernel_string and prepare additional files, if any temp_files = dict() original_kernel = kernel_options.kernel_string if isinstance(original_kernel, list): kernel_string, temp_files = util.prepare_list_of_files( original_kernel, params, grid) else: kernel_string = util.get_kernel_string(original_kernel, params) #prepare kernel_string for compilation name, kernel_string = util.setup_kernel_strings( kernel_options.kernel_name, kernel_string, params, grid) #collect everything we know about this instance and return it return KernelInstance(name, kernel_string, temp_files, threads, grid, params, kernel_options.arguments)
def _check_user_input(kernel_name, kernel_string, arguments, block_size_names): # see if the kernel arguments have correct type if not callable(kernel_string): if isinstance(kernel_string, list): for file in kernel_string: util.check_argument_list(kernel_name, util.get_kernel_string(file), arguments) else: util.check_argument_list(kernel_name, util.get_kernel_string(kernel_string), arguments) else: logging.debug( "Checking of arguments list not supported yet for code generators." ) # check for types and length of block_size_names util.check_block_size_names(block_size_names)
def get_kernel_string(self, index=0, params=None): """ retrieve the kernel source with the given index and return as a string See util.get_kernel_string() for details. :param index: Index of the kernel source in the list of sources. :type index: int :param params: Dictionary containing the tunable parameters for this specific kernel instance, only needed when kernel_source is a generator. :type param: dict :returns: A string containing the kernel code. :rtype: string """ #logging.debug('get_kernel_string called with %s', str(kernel_source)) logging.debug('get_kernel_string called') kernel_source = self.kernel_sources[index] return util.get_kernel_string(kernel_source, params)
def cpp(function_name, kernel_source, args, convert_to_array=None): """ Generate a wrapper to call C++ functions from Python This function allows Kernel Tuner to call templated C++ functions that use primitive data types (double, float, int, ...). There is support to convert function arguments from plain pointers to array references. If this is needed, there should be a True value in convert_to_array in the location corresponding to the location in the args array. For example, a Numpy array argument of type float64 and length 10 will be cast using: ``*reinterpret_cast<double(*)[10]>(arg)`` which allows it to be used to call a C++ that is defined as: ``template<typename T, int s>void my_function(T (&arg)[s], ...)`` Arrays of size 1 will be converted to simple non-array references. False indicates that no conversion is performed. Conversion is only support for numpy array arguments. If convert_to_array is passed it should have the same length as the args array. :param function_name: A string containing the name of the C++ function to be wrapped :type function_name: string :param kernel_source: One of the sources for the kernel, could be a function that generates the kernel code, a string containing a filename that points to the kernel source, or just a string that contains the code. :type kernel_source: string or callable :param args: A list of kernel arguments, use numpy arrays for arrays, use numpy.int32 or numpy.float32 for scalars. :type args: list :param convert_to_array: A list of same length as args, containing True or False values indicating whether the corresponding argument in args should be cast to a reference to an array or not. :type convert_to_array: list (True or False) :returns: A string containing the orignal code extended with the wrapper function. The wrapper has "extern C" binding and can be passed to other Kernel Tuner functions, for example run_kernel with lang="C". The name of the wrapper function will be the name of the function with a "_wrapper" postfix. :rtype: string """ if convert_to_array and len(args) != len(convert_to_array): raise ValueError("convert_to_array length should be same as args") type_map = { "int8": "char", "int16": "short", "int32": "int", "float32": "float", "float64": "double" } def type_str(arg): if not str(arg.dtype) in type_map: raise Value( "only primitive data types are supported by the C++ wrapper") typestring = type_map[str(arg.dtype)] if isinstance(arg, np.ndarray): typestring += " *" return typestring + " " signature = ",".join( [type_str(arg) + "arg" + str(i) for i, arg in enumerate(args)]) if not convert_to_array: call_args = ",".join(["arg" + str(i) for i in range(len(args))]) else: call_args = [] for i, arg in enumerate(args): if convert_to_array[i]: if not isinstance(arg, np.ndarray): ValueError( "conversion to array reference only supported for arguments that are numpy arrays, use length-1 numpy array to pass a scalar by reference" ) if np.prod(arg.shape) > 1: #convert pointer to a reference to an array arg_shape = "".join("[%d]" % i for i in arg.shape) arg_str = "*reinterpret_cast<" + type_map[str( arg.dtype)] + "(*)" + arg_shape + ">(arg" + str( i) + ")" else: #a reference is accepted rather than a pointer, just dereference arg_str = "*arg" + str(i) call_args.append(arg_str) #call_args = ",".join(["*reinterpret_cast<double(*)[9]>(arg" + str(i) + ")" for i in range(len(args))]) else: call_args.append("arg" + str(i)) call_args_str = ",".join(call_args) kernel_string = util.get_kernel_string(kernel_source) return """ %s extern "C" float %s_wrapper(%s) { %s(%s); return 0.0f; }""" % (kernel_string, function_name, signature, function_name, call_args_str)
def cpp(function_name, kernel_source, args, convert_to_array=None): """ Generate a wrapper to call C++ functions from Python This function allows Kernel Tuner to call templated C++ functions that use primitive data types (double, float, int, ...). There is support to convert function arguments from plain pointers to array references. If this is needed, there should be a True value in convert_to_array in the location corresponding to the location in the args array. For example, a Numpy array argument of type float64 and length 10 will be cast using: ``*reinterpret_cast<double(*)[10]>(arg)`` which allows it to be used to call a C++ that is defined as: ``template<typename T, int s>void my_function(T (&arg)[s], ...)`` Arrays of size 1 will be converted to simple non-array references. False indicates that no conversion is performed. Conversion is only support for numpy array arguments. If convert_to_array is passed it should have the same length as the args array. :param function_name: A string containing the name of the C++ function to be wrapped :type function_name: string :param kernel_source: One of the sources for the kernel, could be a function that generates the kernel code, a string containing a filename that points to the kernel source, or just a string that contains the code. :type kernel_source: string or callable :param args: A list of kernel arguments, use numpy arrays for arrays, use numpy.int32 or numpy.float32 for scalars. :type args: list :param convert_to_array: A list of same length as args, containing True or False values indicating whether the corresponding argument in args should be cast to a reference to an array or not. :type convert_to_array: list (True or False) :returns: A string containing the orignal code extended with the wrapper function. The wrapper has "extern C" binding and can be passed to other Kernel Tuner functions, for example run_kernel with lang="C". The name of the wrapper function will be the name of the function with a "_wrapper" postfix. :rtype: string """ if convert_to_array and len(args) != len(convert_to_array): raise ValueError("convert_to_array length should be same as args") type_map = {"int8": "char", "int16": "short", "int32": "int", "float32": "float", "float64": "double"} def type_str(arg): if not str(arg.dtype) in type_map: raise Value("only primitive data types are supported by the C++ wrapper") typestring = type_map[str(arg.dtype)] if isinstance(arg, np.ndarray): typestring += " *" return typestring + " " signature = ",".join([type_str(arg) + "arg" + str(i) for i, arg in enumerate(args)]) if not convert_to_array: call_args = ",".join(["arg" + str(i) for i in range(len(args))]) else: call_args = [] for i, arg in enumerate(args): if convert_to_array[i]: if not isinstance(arg, np.ndarray): ValueError("conversion to array reference only supported for arguments that are numpy arrays, use length-1 numpy array to pass a scalar by reference") if np.prod(arg.shape) > 1: #convert pointer to a reference to an array arg_shape = "".join("[%d]" % i for i in arg.shape) arg_str = "*reinterpret_cast<" + type_map[str(arg.dtype)] + "(*)" + arg_shape + ">(arg" + str(i) + ")" else: #a reference is accepted rather than a pointer, just dereference arg_str = "*arg" + str(i) call_args.append(arg_str) #call_args = ",".join(["*reinterpret_cast<double(*)[9]>(arg" + str(i) + ")" for i in range(len(args))]) else: call_args.append("arg" + str(i)) call_args_str = ",".join(call_args) kernel_string = util.get_kernel_string(kernel_source) return """ %s extern "C" float %s_wrapper(%s) { %s(%s); return 0.0f; }""" % (kernel_string, function_name, signature, function_name, call_args_str)