示例#1
0
    def __call__(self, kernel, codegen_result):
        """
        Generates the wrapping python invoker for this execution target

        :arg kernel: the loopy :class:`LoopKernel`(s) to be executued
        :codegen_result: the loopy :class:`CodeGenerationResult` created
        by code generation

        :returns: A python callable that handles execution of this
            kernel
        """

        options = kernel.options
        implemented_data_info = codegen_result.implemented_data_info

        from loopy.kernel.data import KernelArgument
        gen = PythonFunctionGenerator(
            "invoke_%s_loopy_kernel" % kernel.name, self.system_args + [
                "%s=None" % idi.name for idi in implemented_data_info
                if issubclass(idi.arg_class, KernelArgument)
            ])

        self.target_specific_preamble(gen)
        gen.add_to_preamble("")
        self.generate_host_code(gen, codegen_result)
        gen.add_to_preamble("")

        self.initialize_system_args(gen)

        self.generate_integer_arg_finding_from_shapes(gen, kernel,
                                                      implemented_data_info)
        self.generate_integer_arg_finding_from_offsets(gen, kernel,
                                                       implemented_data_info)
        self.generate_integer_arg_finding_from_strides(gen, kernel,
                                                       implemented_data_info)
        self.generate_value_arg_check(gen, kernel, implemented_data_info)

        args = self.generate_arg_setup(gen, kernel, implemented_data_info,
                                       options)

        self.generate_invocation(gen, codegen_result.host_program.name, args,
                                 kernel, implemented_data_info)

        self.generate_output_handler(gen, options, kernel,
                                     implemented_data_info)

        if options.write_wrapper:
            output = gen.get()
            if options.highlight_wrapper:
                output = get_highlighted_python_code(output)

            if options.write_wrapper is True:
                print(output)
            else:
                with open(options.write_wrapper, "w") as outf:
                    outf.write(output)

        return gen.get_picklable_function()
示例#2
0
文件: compiled.py 项目: navjotk/loopy
def generate_invoker(kernel, cl_kernel, impl_arg_info, options):
    system_args = [
            "cl_kernel", "queue", "allocator=None", "wait_for=None",
            # ignored if options.no_numpy
            "out_host=None"
            ]

    gen = PythonFunctionGenerator(
            "invoke_%s_loopy_kernel" % kernel.name,
            system_args + ["%s=None" % iai.name for iai in impl_arg_info])

    gen.add_to_preamble("from __future__ import division")
    gen.add_to_preamble("")
    gen.add_to_preamble("import pyopencl as _lpy_cl")
    gen.add_to_preamble("import pyopencl.array as _lpy_cl_array")
    gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools")
    gen.add_to_preamble("import numpy as _lpy_np")
    gen.add_to_preamble("from struct import pack as _lpy_pack")
    gen.add_to_preamble("")

    gen("if allocator is None:")
    with Indentation(gen):
        gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)")
    gen("")

    generate_integer_arg_finding_from_shapes(gen, kernel, impl_arg_info, options)
    generate_integer_arg_finding_from_offsets(gen, kernel, impl_arg_info, options)
    generate_integer_arg_finding_from_strides(gen, kernel, impl_arg_info, options)

    arg_idx_to_cl_arg_idx = \
            generate_value_arg_setup(gen, kernel, cl_kernel, impl_arg_info, options)
    generate_array_arg_setup(gen, kernel, impl_arg_info, options,
            arg_idx_to_cl_arg_idx)

    # {{{ generate invocation

    from loopy.symbolic import StringifyMapper

    strify = StringifyMapper()
    gsize_expr, lsize_expr = kernel.get_grid_sizes_as_exprs()

    if not gsize_expr:
        gsize_expr = (1,)
    if not lsize_expr:
        lsize_expr = (1,)

    def strify_tuple(t):
        return "(%s,)" % (
                ", ".join("int(%s)" % strify(t_i) for t_i in t))

    gen("_lpy_evt = _lpy_cl.enqueue_nd_range_kernel(queue, cl_kernel, "
            "%(gsize)s, %(lsize)s,  wait_for=wait_for, g_times_l=True)"
            % dict(
                gsize=strify_tuple(gsize_expr),
                lsize=strify_tuple(lsize_expr)))
    gen("")

    # }}}

    # {{{ output

    if not options.no_numpy:
        gen("if out_host is None and (_lpy_encountered_numpy "
                "and not _lpy_encountered_dev):")
        with Indentation(gen):
            gen("out_host = True")

        gen("if out_host:")
        with Indentation(gen):
            gen("pass")  # if no outputs (?!)
            for arg_idx, arg in enumerate(impl_arg_info):
                is_written = arg.base_name in kernel.get_written_variables()
                if is_written:
                    gen("%s = %s.get(queue=queue)" % (arg.name, arg.name))

        gen("")

    if options.return_dict:
        gen("return _lpy_evt, {%s}"
                % ", ".join("\"%s\": %s" % (arg.name, arg.name)
                    for arg in impl_arg_info
                    if arg.base_name in kernel.get_written_variables()))
    else:
        out_args = [arg
                for arg in impl_arg_info
                if arg.base_name in kernel.get_written_variables()]
        if out_args:
            gen("return _lpy_evt, (%s,)"
                    % ", ".join(arg.name for arg in out_args))
        else:
            gen("return _lpy_evt, ()")

    # }}}

    if options.write_wrapper:
        output = gen.get()
        if options.highlight_wrapper:
            output = get_highlighted_python_code(output)

        if options.write_wrapper is True:
            print(output)
        else:
            with open(options.write_wrapper, "w") as outf:
                outf.write(output)

    return gen.get_function()
示例#3
0
    def _cache_kernel_stats(self, t_unit: lp.TranslationUnit, kwargs: dict) \
      -> tuple:
        """Generate the kernel stats for a program with its args."""
        args_tuple = tuple(
            (key, value.shape) if hasattr(value, "shape") else (key, value)
            for key, value in kwargs.items())

        # Are kernel stats already in the cache?
        try:
            self.kernel_stats[t_unit][args_tuple]
            return args_tuple
        except KeyError:
            # If not, calculate and cache the stats
            ep_name = t_unit.default_entrypoint.name
            executor = t_unit.target.get_kernel_executor(t_unit,
                                                         self.queue,
                                                         entrypoint=ep_name)
            info = executor.translation_unit_info(
                ep_name, executor.arg_to_dtype_set(kwargs))

            typed_t_unit = executor.get_typed_and_scheduled_translation_unit(
                ep_name, executor.arg_to_dtype_set(kwargs))
            kernel = typed_t_unit[ep_name]

            idi = info.implemented_data_info

            param_dict = kwargs.copy()
            param_dict.update({
                k: None
                for k in kernel.arg_dict.keys() if k not in param_dict
            })

            param_dict.update(
                {d.name: None
                 for d in idi if d.name not in param_dict})

            # Generate the wrapper code
            wrapper = executor.get_wrapper_generator()

            gen = PythonFunctionGenerator("_mcom_gen_args_profile",
                                          list(param_dict))

            wrapper.generate_integer_arg_finding_from_shapes(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_offsets(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_strides(gen, kernel, idi)

            param_names = kernel.all_params()
            gen("return {%s}" % ", ".join(f"{repr(name)}: {name}"
                                          for name in param_names))

            # Run the wrapper code, save argument values in domain_params
            domain_params = gen.get_picklable_function()(**param_dict)

            # Get flops/memory statistics
            op_map = lp.get_op_map(typed_t_unit, subgroup_size="guess")
            bytes_accessed = lp.get_mem_access_map(
                typed_t_unit, subgroup_size="guess") \
                            .to_bytes().eval_and_sum(domain_params)

            flops = op_map.filter_by(
                dtype=[np.float32, np.float64]).eval_and_sum(domain_params)

            # Footprint gathering is not yet available in loopy with
            # kernel callables:
            # https://github.com/inducer/loopy/issues/399
            if 0:
                try:
                    footprint = lp.gather_access_footprint_bytes(typed_t_unit)
                    footprint_bytes = sum(
                        footprint[k].eval_with_dict(domain_params)
                        for k in footprint)

                except lp.symbolic.UnableToDetermineAccessRange:
                    footprint_bytes = None
            else:
                footprint_bytes = None

            res = SingleCallKernelProfile(time=0,
                                          flops=flops,
                                          bytes_accessed=bytes_accessed,
                                          footprint_bytes=footprint_bytes)

            self.kernel_stats.setdefault(t_unit, {})[args_tuple] = res

            if self.logmgr:
                if f"{ep_name}_time" not in self.logmgr.quantity_data:
                    self.logmgr.add_quantity(KernelProfile(self, ep_name))

            return args_tuple
示例#4
0
def generate_invoker(kernel, codegen_result):
    options = kernel.options
    implemented_data_info = codegen_result.implemented_data_info
    host_code = codegen_result.host_code()

    system_args = [
        "_lpy_cl_kernels",
        "queue",
        "allocator=None",
        "wait_for=None",
        # ignored if options.no_numpy
        "out_host=None"
    ]

    from loopy.kernel.data import KernelArgument
    gen = PythonFunctionGenerator(
        "invoke_%s_loopy_kernel" % kernel.name, system_args + [
            "%s=None" % idi.name for idi in implemented_data_info
            if issubclass(idi.arg_class, KernelArgument)
        ])

    gen.add_to_preamble("from __future__ import division")
    gen.add_to_preamble("")
    gen.add_to_preamble("import pyopencl as _lpy_cl")
    gen.add_to_preamble("import pyopencl.array as _lpy_cl_array")
    gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools")
    gen.add_to_preamble("import numpy as _lpy_np")
    gen.add_to_preamble("")
    gen.add_to_preamble(host_code)
    gen.add_to_preamble("")

    gen("if allocator is None:")
    with Indentation(gen):
        gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)")
    gen("")

    generate_integer_arg_finding_from_shapes(gen, kernel,
                                             implemented_data_info)
    generate_integer_arg_finding_from_offsets(gen, kernel,
                                              implemented_data_info)
    generate_integer_arg_finding_from_strides(gen, kernel,
                                              implemented_data_info)
    generate_value_arg_check(gen, kernel, implemented_data_info)

    args = generate_arg_setup(gen, kernel, implemented_data_info, options)

    # {{{ generate invocation

    gen("_lpy_evt = {kernel_name}({args})".format(
        kernel_name=codegen_result.host_program.name,
        args=", ".join(["_lpy_cl_kernels", "queue"] + args +
                       ["wait_for=wait_for"])))

    # }}}

    # {{{ output

    if not options.no_numpy:
        gen("if out_host is None and (_lpy_encountered_numpy "
            "and not _lpy_encountered_dev):")
        with Indentation(gen):
            gen("out_host = True")

        gen("if out_host:")
        with Indentation(gen):
            gen("pass")  # if no outputs (?!)
            for arg in implemented_data_info:
                if not issubclass(arg.arg_class, KernelArgument):
                    continue

                is_written = arg.base_name in kernel.get_written_variables()
                if is_written:
                    gen("%s = %s.get(queue=queue)" % (arg.name, arg.name))

        gen("")

    if options.return_dict:
        gen("return _lpy_evt, {%s}" %
            ", ".join("\"%s\": %s" % (arg.name, arg.name)
                      for arg in implemented_data_info
                      if issubclass(arg.arg_class, KernelArgument)
                      if arg.base_name in kernel.get_written_variables()))
    else:
        out_args = [
            arg for arg in implemented_data_info
            if issubclass(arg.arg_class, KernelArgument)
            if arg.base_name in kernel.get_written_variables()
        ]
        if out_args:
            gen("return _lpy_evt, (%s,)" % ", ".join(arg.name
                                                     for arg in out_args))
        else:
            gen("return _lpy_evt, ()")

    # }}}

    if options.write_wrapper:
        output = gen.get()
        if options.highlight_wrapper:
            output = get_highlighted_python_code(output)

        if options.write_wrapper is True:
            print(output)
        else:
            with open(options.write_wrapper, "w") as outf:
                outf.write(output)

    return gen.get_function()
示例#5
0
def generate_invoker(kernel, codegen_result):
    options = kernel.options
    implemented_data_info = codegen_result.implemented_data_info
    host_code = codegen_result.host_code()

    system_args = [
            "_lpy_cl_kernels", "queue", "allocator=None", "wait_for=None",
            # ignored if options.no_numpy
            "out_host=None"
            ]

    from loopy.kernel.data import KernelArgument
    gen = PythonFunctionGenerator(
            "invoke_%s_loopy_kernel" % kernel.name,
            system_args + [
                "%s=None" % idi.name
                for idi in implemented_data_info
                if issubclass(idi.arg_class, KernelArgument)
                ])

    gen.add_to_preamble("from __future__ import division")
    gen.add_to_preamble("")
    gen.add_to_preamble("import pyopencl as _lpy_cl")
    gen.add_to_preamble("import pyopencl.array as _lpy_cl_array")
    gen.add_to_preamble("import pyopencl.tools as _lpy_cl_tools")
    gen.add_to_preamble("import numpy as _lpy_np")
    gen.add_to_preamble("")
    gen.add_to_preamble(host_code)
    gen.add_to_preamble("")

    gen("if allocator is None:")
    with Indentation(gen):
        gen("allocator = _lpy_cl_tools.DeferredAllocator(queue.context)")
    gen("")

    generate_integer_arg_finding_from_shapes(gen, kernel, implemented_data_info)
    generate_integer_arg_finding_from_offsets(gen, kernel, implemented_data_info)
    generate_integer_arg_finding_from_strides(gen, kernel, implemented_data_info)
    generate_value_arg_check(gen, kernel, implemented_data_info)

    args = generate_arg_setup(gen, kernel, implemented_data_info, options)

    # {{{ generate invocation

    gen("_lpy_evt = {kernel_name}({args})"
            .format(
                kernel_name=codegen_result.host_program.name,
                args=", ".join(
                    ["_lpy_cl_kernels", "queue"]
                    + args
                    + ["wait_for=wait_for"])))

    # }}}

    # {{{ output

    if not options.no_numpy:
        gen("if out_host is None and (_lpy_encountered_numpy "
                "and not _lpy_encountered_dev):")
        with Indentation(gen):
            gen("out_host = True")

        gen("if out_host:")
        with Indentation(gen):
            gen("pass")  # if no outputs (?!)
            for arg in implemented_data_info:
                if not issubclass(arg.arg_class, KernelArgument):
                    continue

                is_written = arg.base_name in kernel.get_written_variables()
                if is_written:
                    gen("%s = %s.get(queue=queue)" % (arg.name, arg.name))

        gen("")

    if options.return_dict:
        gen("return _lpy_evt, {%s}"
                % ", ".join("\"%s\": %s" % (arg.name, arg.name)
                    for arg in implemented_data_info
                    if issubclass(arg.arg_class, KernelArgument)
                    if arg.base_name in kernel.get_written_variables()))
    else:
        out_args = [arg
                for arg in implemented_data_info
                    if issubclass(arg.arg_class, KernelArgument)
                if arg.base_name in kernel.get_written_variables()]
        if out_args:
            gen("return _lpy_evt, (%s,)"
                    % ", ".join(arg.name for arg in out_args))
        else:
            gen("return _lpy_evt, ()")

    # }}}

    if options.write_wrapper:
        output = gen.get()
        if options.highlight_wrapper:
            output = get_highlighted_python_code(output)

        if options.write_wrapper is True:
            print(output)
        else:
            with open(options.write_wrapper, "w") as outf:
                outf.write(output)

    return gen.get_function()
示例#6
0
    def __call__(self, kernel, codegen_result):
        """
        Generates the wrapping python invoker for this execution target

        :arg kernel: the loopy :class:`LoopKernel`(s) to be executued
        :codegen_result: the loopy :class:`CodeGenerationResult` created
        by code generation

        :returns: A python callable that handles execution of this
            kernel
        """

        options = kernel.options
        implemented_data_info = codegen_result.implemented_data_info

        from loopy.kernel.data import KernelArgument
        gen = PythonFunctionGenerator(
                "invoke_%s_loopy_kernel" % kernel.name,
                self.system_args + [
                    "%s=None" % idi.name
                    for idi in implemented_data_info
                    if issubclass(idi.arg_class, KernelArgument)
                    ])

        gen.add_to_preamble("from __future__ import division")
        gen.add_to_preamble("")
        self.target_specific_preamble(gen)
        gen.add_to_preamble("")
        self.generate_host_code(gen, codegen_result)
        gen.add_to_preamble("")

        self.initialize_system_args(gen)

        self.generate_integer_arg_finding_from_shapes(
            gen, kernel, implemented_data_info)
        self.generate_integer_arg_finding_from_offsets(
            gen, kernel, implemented_data_info)
        self.generate_integer_arg_finding_from_strides(
            gen, kernel, implemented_data_info)
        self.generate_value_arg_check(
            gen, kernel, implemented_data_info)

        args = self.generate_arg_setup(
            gen, kernel, implemented_data_info, options)

        self.generate_invocation(gen, codegen_result.host_program.name, args,
                kernel, implemented_data_info)

        self.generate_output_handler(gen, options, kernel, implemented_data_info)

        if options.write_wrapper:
            output = gen.get()
            if options.highlight_wrapper:
                output = get_highlighted_python_code(output)

            if options.write_wrapper is True:
                print(output)
            else:
                with open(options.write_wrapper, "w") as outf:
                    outf.write(output)

        return gen.get_picklable_function()
示例#7
0
 def emit_def_begin(self, name):
     self._emitter = PythonFunctionEmitter("phase_" + name, ("self", ))
     self._name_manager.clear_locals()
示例#8
0
class CodeGenerator(StructuredCodeGenerator):
    """
    .. automethod:: __init__
    .. automethod:: __call__
    """
    def __init__(self,
                 class_name,
                 class_preamble=None,
                 function_registry=None):
        """
        :arg class_name: The name of the class to generate
        :arg class_preamble: A string to include at the beginning of the
            the class (in class scope)
        :arg function_registry: An instance of
            :class:`dagrt.function_registry.FunctionRegistry`
        """
        if function_registry is None:
            from dagrt.function_registry import base_function_registry
            function_registry = base_function_registry

        from dagrt.codegen.utils import remove_common_indentation
        self.class_preamble = remove_common_indentation(class_preamble)

        self._class_name = class_name
        self._class_emitter = PythonClassEmitter(class_name)

        # Map from variable / RHS names to names in generated code
        self._name_manager = PythonNameManager()

        self._expr_mapper = PythonExpressionMapper(self._name_manager,
                                                   function_registry,
                                                   numpy="self._numpy")

    def __call__(self, dag):
        """
        :returns: a class adhering to :class:`StepperInterface`.
        """

        from dagrt.codegen.analysis import verify_code
        verify_code(dag)

        from dagrt.codegen.dag_ast import create_ast_from_phase

        self.begin_emit(dag)
        for phase_name in dag.phases.keys():
            ast = create_ast_from_phase(dag, phase_name)
            self._pre_lower(ast)
            self.lower_function(phase_name, ast)
        self.finish_emit(dag)

        return self.get_code()

    def _pre_lower(self, ast):
        self._has_yield_inst = False
        from dagrt.language import YieldState
        from dagrt.codegen.dag_ast import get_statements_in_ast
        for inst in get_statements_in_ast(ast):
            if isinstance(inst, YieldState):
                self._has_yield_inst = True
                return

    def lower_function(self, function_name, ast):
        self.emit_def_begin(function_name)
        self.lower_ast(ast)
        self.emit_def_end()

    def get_class(self, code):
        """Return the compiled Python class for the method."""
        python_code = self(code)
        namespace = exec_in_new_namespace(python_code)
        return namespace[self._class_name]

    def _expr(self, expr):
        return self._expr_mapper(expr)

    def _emit(self, line):
        level = self._class_emitter.level + self._emitter.level
        for wrapped_line in wrap_line(line, level):
            self._emitter(wrapped_line)

    def begin_emit(self, dag):
        if self.class_preamble:
            emit = PythonEmitter()
            for line in self.class_preamble:
                emit(line)
            emit("")
            self._class_emitter.incorporate(emit)

        self._emit_inner_classes()

    def _emit_inner_classes(self):
        """Emit the inner classes that describe objects returned by the method."""
        emit = PythonEmitter()

        for line in _inner_class_code.splitlines():
            emit(line)

        from inspect import getsourcefile
        import dagrt.builtins_python as builtins
        builtins_source_file = getsourcefile(builtins)

        if builtins_source_file is None:
            raise RuntimeError(
                "source code for built-in functions cannot be located")

        with open(builtins_source_file) as srcf:
            builtins_source = srcf.read()

        for line in builtins_source.split("\n"):
            if line.startswith("def builtin"):
                emit("@staticmethod")
            emit(line.replace("builtin", "_builtin"))

        self._class_emitter.incorporate(emit)

    def _emit_constructor(self, dag):
        """Emit the constructor."""
        emit = PythonFunctionEmitter("__init__", ("self", "function_map"))
        # Perform necessary imports.
        emit("import numpy")
        emit("self._numpy = numpy")

        # Make function symbols available
        emit("self._functions = self._function_symbol_container()")
        for function_id in self._name_manager.function_map:
            py_function_id = self._name_manager.name_function(function_id)
            emit('{py_function_id} = function_map["{function_id}"]'.format(
                py_function_id=py_function_id, function_id=function_id))
        emit("")
        emit("self.phase_transition_table = " + repr({
            phase_name: (phase.next_phase,
                         BareExpression("self.phase_" + phase_name))
            for phase_name, phase in dag.phases.items()
        }))
        emit("")

        self._class_emitter.incorporate(emit)

    def _emit_set_up(self, dag):
        """Emit the set_up() method."""
        emit = PythonFunctionEmitter(
            "set_up", ("self", "t_start", "dt_start", "context"))
        emit("self.t = t_start")
        emit("self.dt = dt_start")
        # Save all the context components.
        for component_id in self._name_manager.get_global_ids():
            component = self._name_manager.name_global(component_id)
            if not component_id.startswith("<state>"):
                continue
            component_id = component_id[7:]
            emit('{component} = context.get("{component_id}")'.format(
                component=component, component_id=component_id))

        emit("self.next_phase = " + repr(dag.initial_phase))

        emit("")
        self._class_emitter.incorporate(emit)

    def _emit_run(self):
        emit = PythonFunctionEmitter("run",
                                     ("self", "t_end=None", "max_steps=None"))
        emit("""
            n_steps = 0
            while True:
                if t_end is not None and self.t >= t_end:
                    return

                if max_steps is not None and n_steps >= max_steps:
                    return

                cur_phase = self.next_phase
                try:
                    for evt in self.run_single_step():
                        yield evt

                except self.FailStepException:
                    yield self.StepFailed(t=self.t)
                    continue

                except self.TransitionEvent as evt:
                    self.next_phase = evt.next_phase

                yield self.StepCompleted(dt=self.dt, t=self.t,
                    current_phase=cur_phase, next_phase=self.next_phase)

                n_steps += 1
            """)

        self._class_emitter.incorporate(emit)

    def _emit_run_single_step(self):
        emit = PythonFunctionEmitter("run_single_step", ("self", ))

        emit("""
            self.next_phase, phase_func = (
                self.phase_transition_table[self.next_phase])

            for evt in phase_func():
                yield evt
            """)
        self._class_emitter.incorporate(emit)

    def finish_emit(self, dag):
        self._emit_constructor(dag)
        self._emit_set_up(dag)
        self._emit_run()
        self._emit_run_single_step()

    def get_code(self):
        return self._class_emitter.get()

    def emit_def_begin(self, name):
        self._emitter = PythonFunctionEmitter("phase_" + name, ("self", ))
        self._name_manager.clear_locals()

    def emit_def_end(self):
        self._emit("")
        self._class_emitter.incorporate(self._emitter)
        del self._emitter

    def emit_if_begin(self, expr):
        self._emit(f"if {self._expr(expr)}:")
        self._emitter.indent()

    def emit_if_end(self):
        self._emitter.dedent()

    def emit_for_begin(self, loop_var_name, lbound, ubound):
        self._emit(f"for {self._name_manager[loop_var_name]} in "
                   f"range({self._expr(lbound)}, {self._expr(ubound)}):")
        self._emitter.indent()

    def emit_for_end(self, loop_var_name):
        self._emitter.dedent()

    def emit_else_begin(self):
        self._emitter.dedent()
        self._emit("else:")
        self._emitter.indent()

    def emit_return(self):
        self._emit("return")
        # Ensure that Python recognizes this method as a generator function by
        # adding a yield statement. Otherwise, calling methods that do not
        # yield any values may result in raising a naked StopIteration instead
        # of the creation of a generator, which does not interact well with the
        # run() implementation.
        #
        # TODO: Python 3.3+ has "yield from ()" which results in slightly less
        # awkward syntax.
        if not self._has_yield_inst:
            self._emit("yield")

    # {{{ statements

    def emit_inst_Assign(self, inst):
        emitter = self._emitter
        for ident, start, stop in inst.loops:
            managed_ident = self._name_manager[ident]
            emitter("for {ident} in range({start}, {stop}):".format(
                ident=managed_ident,
                start=self._expr(start),
                stop=self._expr(stop)))
            emitter.indent()

        if inst.assignee_subscript:
            subscript_code = "[%s]" % (", ".join(
                self._expr(sub_i) for sub_i in inst.assignee_subscript))
        else:
            subscript_code = ""

        self._emit("{name}{sub} = {expr}".format(
            name=self._name_manager[inst.assignee],
            sub=subscript_code,
            expr=self._expr(inst.expression)))

        for _ident, _start, _stop in inst.loops:
            emitter.dedent()

        for ident, _start, _stop in inst.loops:
            managed_ident = self._name_manager[ident]
            emitter(f"del {managed_ident}")

    def emit_inst_AssignFunctionCall(self, inst):
        if len(inst.assignees) == 0:
            assign_code = ""
        else:
            assign_code = (", ".join(self._name_manager[n]
                                     for n in inst.assignees) + " = ")

        from pymbolic import var
        self._emit("{assign_code}{expr}".format(
            assign_code=assign_code,
            expr=self._expr_mapper.map_generic_call(var(inst.function_id),
                                                    inst.parameters,
                                                    inst.kw_parameters)))

    def emit_inst_YieldState(self, inst):
        self._emit("yield self.StateComputed(t={t}, time_id={time_id}, "
                   "component_id={component_id}, "
                   "state_component={state_component})".format(
                       t=self._expr(inst.time),
                       time_id=repr(inst.time_id),
                       component_id=repr(inst.component_id),
                       state_component=self._expr(inst.expression)))

    def emit_inst_Raise(self, inst):
        self._emit("raise self.StepError({condition}, {message})".format(
            condition=repr(inst.error_condition.__name__),
            message=repr(inst.error_message)))
        if not self._has_yield_inst:
            self._emit("yield")

    def emit_inst_FailStep(self, inst):
        self._emit("raise self.FailStepException()")
        if not self._has_yield_inst:
            self._emit("yield")

    def emit_inst_SwitchPhase(self, inst):
        assert "'" not in inst.next_phase
        self._emit('raise self.TransitionEvent("' + inst.next_phase + '")')
        if not self._has_yield_inst:
            self._emit("yield")
示例#9
0
    def _cache_kernel_stats(self, program: lp.kernel.LoopKernel, kwargs: dict) \
      -> tuple:
        """Generate the kernel stats for a program with its args."""
        args_tuple = tuple(
            (key, value.shape) if hasattr(value, "shape") else (key, value)
            for key, value in kwargs.items())

        # Are kernel stats already in the cache?
        try:
            x = self.kernel_stats[program][args_tuple]  # noqa
            return args_tuple
        except KeyError:
            # If not, calculate and cache the stats
            executor = program.target.get_kernel_executor(program, self.queue)
            info = executor.kernel_info(executor.arg_to_dtype_set(kwargs))

            kernel = executor.get_typed_and_scheduled_kernel(
                executor.arg_to_dtype_set(kwargs))

            idi = info.implemented_data_info

            types = {
                k: v
                for k, v in kwargs.items()
                if hasattr(v, "dtype") and not v.dtype == object
            }

            param_dict = kwargs.copy()
            param_dict.update({
                k: None
                for k in kernel.arg_dict.keys() if k not in param_dict
            })

            param_dict.update(
                {d.name: None
                 for d in idi if d.name not in param_dict})

            # Generate the wrapper code
            wrapper = executor.get_wrapper_generator()

            gen = PythonFunctionGenerator("_mcom_gen_args_profile",
                                          list(param_dict))

            wrapper.generate_integer_arg_finding_from_shapes(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_offsets(gen, kernel, idi)
            wrapper.generate_integer_arg_finding_from_strides(gen, kernel, idi)

            param_names = program.all_params()
            gen("return {%s}" % ", ".join(f"{repr(name)}: {name}"
                                          for name in param_names))

            # Run the wrapper code, save argument values in domain_params
            domain_params = gen.get_picklable_function()(**param_dict)

            # Get flops/memory statistics
            kernel = lp.add_and_infer_dtypes(kernel, types)
            op_map = lp.get_op_map(kernel, subgroup_size="guess")
            bytes_accessed = lp.get_mem_access_map(kernel, subgroup_size="guess") \
              .to_bytes().eval_and_sum(domain_params)

            flops = op_map.filter_by(
                dtype=[np.float32, np.float64]).eval_and_sum(domain_params)

            try:
                footprint = lp.gather_access_footprint_bytes(kernel)
                footprint_bytes = sum(
                    footprint[k].eval_with_dict(domain_params)
                    for k in footprint)

            except lp.symbolic.UnableToDetermineAccessRange:
                footprint_bytes = None

            res = ProfileResult(time=0,
                                flops=flops,
                                bytes_accessed=bytes_accessed,
                                footprint_bytes=footprint_bytes)

            self.kernel_stats.setdefault(program, {})[args_tuple] = res
            return args_tuple