def __call__(self, kernel, codegen_result): """ Generates the wrapping python invoker for this execution target :arg kernel: the loopy :class:`LoopKernel`(s) to be executued :codegen_result: the loopy :class:`CodeGenerationResult` created by code generation :returns: A python callable that handles execution of this kernel """ options = kernel.options implemented_data_info = codegen_result.implemented_data_info from loopy.kernel.data import KernelArgument gen = PythonFunctionGenerator( "invoke_%s_loopy_kernel" % kernel.name, self.system_args + [ "%s=None" % idi.name for idi in implemented_data_info if issubclass(idi.arg_class, KernelArgument) ]) self.target_specific_preamble(gen) gen.add_to_preamble("") self.generate_host_code(gen, codegen_result) gen.add_to_preamble("") self.initialize_system_args(gen) self.generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) self.generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info) self.generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info) self.generate_value_arg_check(gen, kernel, implemented_data_info) args = self.generate_arg_setup(gen, kernel, implemented_data_info, options) self.generate_invocation(gen, codegen_result.host_program.name, args, kernel, implemented_data_info) self.generate_output_handler(gen, options, kernel, implemented_data_info) if options.write_wrapper: output = gen.get() if options.highlight_wrapper: output = get_highlighted_python_code(output) if options.write_wrapper is True: print(output) else: with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_picklable_function()
def generate_invoker(kernel, cl_kernel, impl_arg_info, options): system_args = [ "cl_kernel", "queue", "allocator=None", "wait_for=None", # ignored if options.no_numpy "out_host=None" ] gen = PythonFunctionGenerator( "invoke_%s_loopy_kernel" % kernel.name, system_args + ["%s=None" % iai.name for iai in impl_arg_info]) gen.add_to_preamble("from __future__ import division") gen.add_to_preamble("") gen.add_to_preamble("import pyopencl as _lpy_cl") gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") gen.add_to_preamble("import numpy as _lpy_np") gen.add_to_preamble("from struct import pack as _lpy_pack") gen.add_to_preamble("") gen("if allocator is None:") with Indentation(gen): gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options) generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, options) generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, options) arg_idx_to_cl_arg_idx = \ generate_value_arg_setup(gen, kernel, cl_kernel, impl_arg_info, options) generate_array_arg_setup(gen, kernel, impl_arg_info, options, arg_idx_to_cl_arg_idx) # {{{ generate invocation from loopy.symbolic import StringifyMapper strify = StringifyMapper() gsize_expr, lsize_expr = kernel.get_grid_sizes_as_exprs() if not gsize_expr: gsize_expr = (1,) if not lsize_expr: lsize_expr = (1,) def strify_tuple(t): return "(%s,)" % ( ", ".join("int(%s)" % strify(t_i) for t_i in t)) gen("_lpy_evt = _lpy_cl.enqueue_nd_range_kernel(queue, cl_kernel, " "%(gsize)s, %(lsize)s, wait_for=wait_for, g_times_l=True)" % dict( gsize=strify_tuple(gsize_expr), lsize=strify_tuple(lsize_expr))) gen("") # }}} # {{{ output if not options.no_numpy: gen("if out_host is None and (_lpy_encountered_numpy " "and not _lpy_encountered_dev):") with Indentation(gen): gen("out_host = True") gen("if out_host:") with Indentation(gen): gen("pass") # if no outputs (?!) for arg_idx, arg in enumerate(impl_arg_info): is_written = arg.base_name in kernel.get_written_variables() if is_written: gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) gen("") if options.return_dict: gen("return _lpy_evt, {%s}" % ", ".join("\"%s\": %s" % (arg.name, arg.name) for arg in impl_arg_info if arg.base_name in kernel.get_written_variables())) else: out_args = [arg for arg in impl_arg_info if arg.base_name in kernel.get_written_variables()] if out_args: gen("return _lpy_evt, (%s,)" % ", ".join(arg.name for arg in out_args)) else: gen("return _lpy_evt, ()") # }}} if options.write_wrapper: output = gen.get() if options.highlight_wrapper: output = get_highlighted_python_code(output) if options.write_wrapper is True: print(output) else: with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_function()
def _cache_kernel_stats(self, t_unit: lp.TranslationUnit, kwargs: dict) \ -> tuple: """Generate the kernel stats for a program with its args.""" args_tuple = tuple( (key, value.shape) if hasattr(value, "shape") else (key, value) for key, value in kwargs.items()) # Are kernel stats already in the cache? try: self.kernel_stats[t_unit][args_tuple] return args_tuple except KeyError: # If not, calculate and cache the stats ep_name = t_unit.default_entrypoint.name executor = t_unit.target.get_kernel_executor(t_unit, self.queue, entrypoint=ep_name) info = executor.translation_unit_info( ep_name, executor.arg_to_dtype_set(kwargs)) typed_t_unit = executor.get_typed_and_scheduled_translation_unit( ep_name, executor.arg_to_dtype_set(kwargs)) kernel = typed_t_unit[ep_name] idi = info.implemented_data_info param_dict = kwargs.copy() param_dict.update({ k: None for k in kernel.arg_dict.keys() if k not in param_dict }) param_dict.update( {d.name: None for d in idi if d.name not in param_dict}) # Generate the wrapper code wrapper = executor.get_wrapper_generator() gen = PythonFunctionGenerator("_mcom_gen_args_profile", list(param_dict)) wrapper.generate_integer_arg_finding_from_shapes(gen, kernel, idi) wrapper.generate_integer_arg_finding_from_offsets(gen, kernel, idi) wrapper.generate_integer_arg_finding_from_strides(gen, kernel, idi) param_names = kernel.all_params() gen("return {%s}" % ", ".join(f"{repr(name)}: {name}" for name in param_names)) # Run the wrapper code, save argument values in domain_params domain_params = gen.get_picklable_function()(**param_dict) # Get flops/memory statistics op_map = lp.get_op_map(typed_t_unit, subgroup_size="guess") bytes_accessed = lp.get_mem_access_map( typed_t_unit, subgroup_size="guess") \ .to_bytes().eval_and_sum(domain_params) flops = op_map.filter_by( dtype=[np.float32, np.float64]).eval_and_sum(domain_params) # Footprint gathering is not yet available in loopy with # kernel callables: # https://github.com/inducer/loopy/issues/399 if 0: try: footprint = lp.gather_access_footprint_bytes(typed_t_unit) footprint_bytes = sum( footprint[k].eval_with_dict(domain_params) for k in footprint) except lp.symbolic.UnableToDetermineAccessRange: footprint_bytes = None else: footprint_bytes = None res = SingleCallKernelProfile(time=0, flops=flops, bytes_accessed=bytes_accessed, footprint_bytes=footprint_bytes) self.kernel_stats.setdefault(t_unit, {})[args_tuple] = res if self.logmgr: if f"{ep_name}_time" not in self.logmgr.quantity_data: self.logmgr.add_quantity(KernelProfile(self, ep_name)) return args_tuple
def generate_invoker(kernel, codegen_result): options = kernel.options implemented_data_info = codegen_result.implemented_data_info host_code = codegen_result.host_code() system_args = [ "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", # ignored if options.no_numpy "out_host=None" ] from loopy.kernel.data import KernelArgument gen = PythonFunctionGenerator( "invoke_%s_loopy_kernel" % kernel.name, system_args + [ "%s=None" % idi.name for idi in implemented_data_info if issubclass(idi.arg_class, KernelArgument) ]) gen.add_to_preamble("from __future__ import division") gen.add_to_preamble("") gen.add_to_preamble("import pyopencl as _lpy_cl") gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") gen.add_to_preamble("import numpy as _lpy_np") gen.add_to_preamble("") gen.add_to_preamble(host_code) gen.add_to_preamble("") gen("if allocator is None:") with Indentation(gen): gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info) generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info) generate_value_arg_check(gen, kernel, implemented_data_info) args = generate_arg_setup(gen, kernel, implemented_data_info, options) # {{{ generate invocation gen("_lpy_evt = {kernel_name}({args})".format( kernel_name=codegen_result.host_program.name, args=", ".join(["_lpy_cl_kernels", "queue"] + args + ["wait_for=wait_for"]))) # }}} # {{{ output if not options.no_numpy: gen("if out_host is None and (_lpy_encountered_numpy " "and not _lpy_encountered_dev):") with Indentation(gen): gen("out_host = True") gen("if out_host:") with Indentation(gen): gen("pass") # if no outputs (?!) for arg in implemented_data_info: if not issubclass(arg.arg_class, KernelArgument): continue is_written = arg.base_name in kernel.get_written_variables() if is_written: gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) gen("") if options.return_dict: gen("return _lpy_evt, {%s}" % ", ".join("\"%s\": %s" % (arg.name, arg.name) for arg in implemented_data_info if issubclass(arg.arg_class, KernelArgument) if arg.base_name in kernel.get_written_variables())) else: out_args = [ arg for arg in implemented_data_info if issubclass(arg.arg_class, KernelArgument) if arg.base_name in kernel.get_written_variables() ] if out_args: gen("return _lpy_evt, (%s,)" % ", ".join(arg.name for arg in out_args)) else: gen("return _lpy_evt, ()") # }}} if options.write_wrapper: output = gen.get() if options.highlight_wrapper: output = get_highlighted_python_code(output) if options.write_wrapper is True: print(output) else: with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_function()
def generate_invoker(kernel, codegen_result): options = kernel.options implemented_data_info = codegen_result.implemented_data_info host_code = codegen_result.host_code() system_args = [ "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None", # ignored if options.no_numpy "out_host=None" ] from loopy.kernel.data import KernelArgument gen = PythonFunctionGenerator( "invoke_%s_loopy_kernel" % kernel.name, system_args + [ "%s=None" % idi.name for idi in implemented_data_info if issubclass(idi.arg_class, KernelArgument) ]) gen.add_to_preamble("from __future__ import division") gen.add_to_preamble("") gen.add_to_preamble("import pyopencl as _lpy_cl") gen.add_to_preamble("import pyopencl.array as _lpy_cl_array") gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools") gen.add_to_preamble("import numpy as _lpy_np") gen.add_to_preamble("") gen.add_to_preamble(host_code) gen.add_to_preamble("") gen("if allocator is None:") with Indentation(gen): gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)") gen("") generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info) generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info) generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info) generate_value_arg_check(gen, kernel, implemented_data_info) args = generate_arg_setup(gen, kernel, implemented_data_info, options) # {{{ generate invocation gen("_lpy_evt = {kernel_name}({args})" .format( kernel_name=codegen_result.host_program.name, args=", ".join( ["_lpy_cl_kernels", "queue"] + args + ["wait_for=wait_for"]))) # }}} # {{{ output if not options.no_numpy: gen("if out_host is None and (_lpy_encountered_numpy " "and not _lpy_encountered_dev):") with Indentation(gen): gen("out_host = True") gen("if out_host:") with Indentation(gen): gen("pass") # if no outputs (?!) for arg in implemented_data_info: if not issubclass(arg.arg_class, KernelArgument): continue is_written = arg.base_name in kernel.get_written_variables() if is_written: gen("%s = %s.get(queue=queue)" % (arg.name, arg.name)) gen("") if options.return_dict: gen("return _lpy_evt, {%s}" % ", ".join("\"%s\": %s" % (arg.name, arg.name) for arg in implemented_data_info if issubclass(arg.arg_class, KernelArgument) if arg.base_name in kernel.get_written_variables())) else: out_args = [arg for arg in implemented_data_info if issubclass(arg.arg_class, KernelArgument) if arg.base_name in kernel.get_written_variables()] if out_args: gen("return _lpy_evt, (%s,)" % ", ".join(arg.name for arg in out_args)) else: gen("return _lpy_evt, ()") # }}} if options.write_wrapper: output = gen.get() if options.highlight_wrapper: output = get_highlighted_python_code(output) if options.write_wrapper is True: print(output) else: with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_function()
def __call__(self, kernel, codegen_result): """ Generates the wrapping python invoker for this execution target :arg kernel: the loopy :class:`LoopKernel`(s) to be executued :codegen_result: the loopy :class:`CodeGenerationResult` created by code generation :returns: A python callable that handles execution of this kernel """ options = kernel.options implemented_data_info = codegen_result.implemented_data_info from loopy.kernel.data import KernelArgument gen = PythonFunctionGenerator( "invoke_%s_loopy_kernel" % kernel.name, self.system_args + [ "%s=None" % idi.name for idi in implemented_data_info if issubclass(idi.arg_class, KernelArgument) ]) gen.add_to_preamble("from __future__ import division") gen.add_to_preamble("") self.target_specific_preamble(gen) gen.add_to_preamble("") self.generate_host_code(gen, codegen_result) gen.add_to_preamble("") self.initialize_system_args(gen) self.generate_integer_arg_finding_from_shapes( gen, kernel, implemented_data_info) self.generate_integer_arg_finding_from_offsets( gen, kernel, implemented_data_info) self.generate_integer_arg_finding_from_strides( gen, kernel, implemented_data_info) self.generate_value_arg_check( gen, kernel, implemented_data_info) args = self.generate_arg_setup( gen, kernel, implemented_data_info, options) self.generate_invocation(gen, codegen_result.host_program.name, args, kernel, implemented_data_info) self.generate_output_handler(gen, options, kernel, implemented_data_info) if options.write_wrapper: output = gen.get() if options.highlight_wrapper: output = get_highlighted_python_code(output) if options.write_wrapper is True: print(output) else: with open(options.write_wrapper, "w") as outf: outf.write(output) return gen.get_picklable_function()
def emit_def_begin(self, name): self._emitter = PythonFunctionEmitter("phase_" + name, ("self", )) self._name_manager.clear_locals()
class CodeGenerator(StructuredCodeGenerator): """ .. automethod:: __init__ .. automethod:: __call__ """ def __init__(self, class_name, class_preamble=None, function_registry=None): """ :arg class_name: The name of the class to generate :arg class_preamble: A string to include at the beginning of the the class (in class scope) :arg function_registry: An instance of :class:`dagrt.function_registry.FunctionRegistry` """ if function_registry is None: from dagrt.function_registry import base_function_registry function_registry = base_function_registry from dagrt.codegen.utils import remove_common_indentation self.class_preamble = remove_common_indentation(class_preamble) self._class_name = class_name self._class_emitter = PythonClassEmitter(class_name) # Map from variable / RHS names to names in generated code self._name_manager = PythonNameManager() self._expr_mapper = PythonExpressionMapper(self._name_manager, function_registry, numpy="self._numpy") def __call__(self, dag): """ :returns: a class adhering to :class:`StepperInterface`. """ from dagrt.codegen.analysis import verify_code verify_code(dag) from dagrt.codegen.dag_ast import create_ast_from_phase self.begin_emit(dag) for phase_name in dag.phases.keys(): ast = create_ast_from_phase(dag, phase_name) self._pre_lower(ast) self.lower_function(phase_name, ast) self.finish_emit(dag) return self.get_code() def _pre_lower(self, ast): self._has_yield_inst = False from dagrt.language import YieldState from dagrt.codegen.dag_ast import get_statements_in_ast for inst in get_statements_in_ast(ast): if isinstance(inst, YieldState): self._has_yield_inst = True return def lower_function(self, function_name, ast): self.emit_def_begin(function_name) self.lower_ast(ast) self.emit_def_end() def get_class(self, code): """Return the compiled Python class for the method.""" python_code = self(code) namespace = exec_in_new_namespace(python_code) return namespace[self._class_name] def _expr(self, expr): return self._expr_mapper(expr) def _emit(self, line): level = self._class_emitter.level + self._emitter.level for wrapped_line in wrap_line(line, level): self._emitter(wrapped_line) def begin_emit(self, dag): if self.class_preamble: emit = PythonEmitter() for line in self.class_preamble: emit(line) emit("") self._class_emitter.incorporate(emit) self._emit_inner_classes() def _emit_inner_classes(self): """Emit the inner classes that describe objects returned by the method.""" emit = PythonEmitter() for line in _inner_class_code.splitlines(): emit(line) from inspect import getsourcefile import dagrt.builtins_python as builtins builtins_source_file = getsourcefile(builtins) if builtins_source_file is None: raise RuntimeError( "source code for built-in functions cannot be located") with open(builtins_source_file) as srcf: builtins_source = srcf.read() for line in builtins_source.split("\n"): if line.startswith("def builtin"): emit("@staticmethod") emit(line.replace("builtin", "_builtin")) self._class_emitter.incorporate(emit) def _emit_constructor(self, dag): """Emit the constructor.""" emit = PythonFunctionEmitter("__init__", ("self", "function_map")) # Perform necessary imports. emit("import numpy") emit("self._numpy = numpy") # Make function symbols available emit("self._functions = self._function_symbol_container()") for function_id in self._name_manager.function_map: py_function_id = self._name_manager.name_function(function_id) emit('{py_function_id} = function_map["{function_id}"]'.format( py_function_id=py_function_id, function_id=function_id)) emit("") emit("self.phase_transition_table = " + repr({ phase_name: (phase.next_phase, BareExpression("self.phase_" + phase_name)) for phase_name, phase in dag.phases.items() })) emit("") self._class_emitter.incorporate(emit) def _emit_set_up(self, dag): """Emit the set_up() method.""" emit = PythonFunctionEmitter( "set_up", ("self", "t_start", "dt_start", "context")) emit("self.t = t_start") emit("self.dt = dt_start") # Save all the context components. for component_id in self._name_manager.get_global_ids(): component = self._name_manager.name_global(component_id) if not component_id.startswith("<state>"): continue component_id = component_id[7:] emit('{component} = context.get("{component_id}")'.format( component=component, component_id=component_id)) emit("self.next_phase = " + repr(dag.initial_phase)) emit("") self._class_emitter.incorporate(emit) def _emit_run(self): emit = PythonFunctionEmitter("run", ("self", "t_end=None", "max_steps=None")) emit(""" n_steps = 0 while True: if t_end is not None and self.t >= t_end: return if max_steps is not None and n_steps >= max_steps: return cur_phase = self.next_phase try: for evt in self.run_single_step(): yield evt except self.FailStepException: yield self.StepFailed(t=self.t) continue except self.TransitionEvent as evt: self.next_phase = evt.next_phase yield self.StepCompleted(dt=self.dt, t=self.t, current_phase=cur_phase, next_phase=self.next_phase) n_steps += 1 """) self._class_emitter.incorporate(emit) def _emit_run_single_step(self): emit = PythonFunctionEmitter("run_single_step", ("self", )) emit(""" self.next_phase, phase_func = ( self.phase_transition_table[self.next_phase]) for evt in phase_func(): yield evt """) self._class_emitter.incorporate(emit) def finish_emit(self, dag): self._emit_constructor(dag) self._emit_set_up(dag) self._emit_run() self._emit_run_single_step() def get_code(self): return self._class_emitter.get() def emit_def_begin(self, name): self._emitter = PythonFunctionEmitter("phase_" + name, ("self", )) self._name_manager.clear_locals() def emit_def_end(self): self._emit("") self._class_emitter.incorporate(self._emitter) del self._emitter def emit_if_begin(self, expr): self._emit(f"if {self._expr(expr)}:") self._emitter.indent() def emit_if_end(self): self._emitter.dedent() def emit_for_begin(self, loop_var_name, lbound, ubound): self._emit(f"for {self._name_manager[loop_var_name]} in " f"range({self._expr(lbound)}, {self._expr(ubound)}):") self._emitter.indent() def emit_for_end(self, loop_var_name): self._emitter.dedent() def emit_else_begin(self): self._emitter.dedent() self._emit("else:") self._emitter.indent() def emit_return(self): self._emit("return") # Ensure that Python recognizes this method as a generator function by # adding a yield statement. Otherwise, calling methods that do not # yield any values may result in raising a naked StopIteration instead # of the creation of a generator, which does not interact well with the # run() implementation. # # TODO: Python 3.3+ has "yield from ()" which results in slightly less # awkward syntax. if not self._has_yield_inst: self._emit("yield") # {{{ statements def emit_inst_Assign(self, inst): emitter = self._emitter for ident, start, stop in inst.loops: managed_ident = self._name_manager[ident] emitter("for {ident} in range({start}, {stop}):".format( ident=managed_ident, start=self._expr(start), stop=self._expr(stop))) emitter.indent() if inst.assignee_subscript: subscript_code = "[%s]" % (", ".join( self._expr(sub_i) for sub_i in inst.assignee_subscript)) else: subscript_code = "" self._emit("{name}{sub} = {expr}".format( name=self._name_manager[inst.assignee], sub=subscript_code, expr=self._expr(inst.expression))) for _ident, _start, _stop in inst.loops: emitter.dedent() for ident, _start, _stop in inst.loops: managed_ident = self._name_manager[ident] emitter(f"del {managed_ident}") def emit_inst_AssignFunctionCall(self, inst): if len(inst.assignees) == 0: assign_code = "" else: assign_code = (", ".join(self._name_manager[n] for n in inst.assignees) + " = ") from pymbolic import var self._emit("{assign_code}{expr}".format( assign_code=assign_code, expr=self._expr_mapper.map_generic_call(var(inst.function_id), inst.parameters, inst.kw_parameters))) def emit_inst_YieldState(self, inst): self._emit("yield self.StateComputed(t={t}, time_id={time_id}, " "component_id={component_id}, " "state_component={state_component})".format( t=self._expr(inst.time), time_id=repr(inst.time_id), component_id=repr(inst.component_id), state_component=self._expr(inst.expression))) def emit_inst_Raise(self, inst): self._emit("raise self.StepError({condition}, {message})".format( condition=repr(inst.error_condition.__name__), message=repr(inst.error_message))) if not self._has_yield_inst: self._emit("yield") def emit_inst_FailStep(self, inst): self._emit("raise self.FailStepException()") if not self._has_yield_inst: self._emit("yield") def emit_inst_SwitchPhase(self, inst): assert "'" not in inst.next_phase self._emit('raise self.TransitionEvent("' + inst.next_phase + '")') if not self._has_yield_inst: self._emit("yield")
def _cache_kernel_stats(self, program: lp.kernel.LoopKernel, kwargs: dict) \ -> tuple: """Generate the kernel stats for a program with its args.""" args_tuple = tuple( (key, value.shape) if hasattr(value, "shape") else (key, value) for key, value in kwargs.items()) # Are kernel stats already in the cache? try: x = self.kernel_stats[program][args_tuple] # noqa return args_tuple except KeyError: # If not, calculate and cache the stats executor = program.target.get_kernel_executor(program, self.queue) info = executor.kernel_info(executor.arg_to_dtype_set(kwargs)) kernel = executor.get_typed_and_scheduled_kernel( executor.arg_to_dtype_set(kwargs)) idi = info.implemented_data_info types = { k: v for k, v in kwargs.items() if hasattr(v, "dtype") and not v.dtype == object } param_dict = kwargs.copy() param_dict.update({ k: None for k in kernel.arg_dict.keys() if k not in param_dict }) param_dict.update( {d.name: None for d in idi if d.name not in param_dict}) # Generate the wrapper code wrapper = executor.get_wrapper_generator() gen = PythonFunctionGenerator("_mcom_gen_args_profile", list(param_dict)) wrapper.generate_integer_arg_finding_from_shapes(gen, kernel, idi) wrapper.generate_integer_arg_finding_from_offsets(gen, kernel, idi) wrapper.generate_integer_arg_finding_from_strides(gen, kernel, idi) param_names = program.all_params() gen("return {%s}" % ", ".join(f"{repr(name)}: {name}" for name in param_names)) # Run the wrapper code, save argument values in domain_params domain_params = gen.get_picklable_function()(**param_dict) # Get flops/memory statistics kernel = lp.add_and_infer_dtypes(kernel, types) op_map = lp.get_op_map(kernel, subgroup_size="guess") bytes_accessed = lp.get_mem_access_map(kernel, subgroup_size="guess") \ .to_bytes().eval_and_sum(domain_params) flops = op_map.filter_by( dtype=[np.float32, np.float64]).eval_and_sum(domain_params) try: footprint = lp.gather_access_footprint_bytes(kernel) footprint_bytes = sum( footprint[k].eval_with_dict(domain_params) for k in footprint) except lp.symbolic.UnableToDetermineAccessRange: footprint_bytes = None res = ProfileResult(time=0, flops=flops, bytes_accessed=bytes_accessed, footprint_bytes=footprint_bytes) self.kernel_stats.setdefault(program, {})[args_tuple] = res return args_tuple