示例#1
0
def test_latex_mapper():
    from pymbolic import parse
    from pymbolic.mapper.stringifier import LaTeXMapper, StringifyMapper

    tm = LaTeXMapper()
    sm = StringifyMapper()

    equations = []

    def add(expr):
        # Add an equation to the list of tests.
        equations.append(r"\[%s\] %% from: %s" % (tm(expr), sm(expr)))

    add(parse("a * b + c"))
    add(parse("f(a,b,c)"))
    add(parse("a ** b ** c"))
    add(parse("(a | b) ^ ~c"))
    add(parse("a << b"))
    add(parse("a >> b"))
    add(parse("a[i,j,k]"))
    add(parse("a[1:3]"))
    add(parse("a // b"))
    add(parse("not (a or b) and c"))
    add(parse("(a % b) % c"))
    add(parse("(a >= b) or (b <= c)"))
    add(prim.Min((1,)) + prim.Max((1, 2)))
    add(prim.Substitution(prim.Variable("x") ** 2, ("x",), (2,)))
    add(prim.Derivative(parse("x**2"), ("x",)))

    # Run LaTeX and ensure the file compiles.
    import os
    import tempfile
    import subprocess
    import shutil

    latex_dir = tempfile.mkdtemp("pymbolic")

    try:
        tex_file_path = os.path.join(latex_dir, "input.tex")

        with open(tex_file_path, "w") as tex_file:
            contents = LATEX_TEMPLATE % "\n".join(equations)
            tex_file.write(contents)

        try:
            subprocess.check_output(
                    ["latex",
                     "-interaction=nonstopmode",
                     "-output-directory=%s" % latex_dir,
                     tex_file_path],
                    universal_newlines=True)
        except OSError:  # FIXME: Should be FileNotFoundError on Py3
            pytest.skip("latex command not found")
        except subprocess.CalledProcessError as err:
            assert False, str(err.output)

    finally:
        shutil.rmtree(latex_dir)
示例#2
0
    def get_temporary_decls(self, codegen_state, schedule_index):
        from loopy.kernel.data import temp_var_scope

        kernel = codegen_state.kernel

        base_storage_decls = []
        temp_decls = []

        # {{{ declare temporaries

        base_storage_sizes = {}
        base_storage_to_scope = {}
        base_storage_to_align_bytes = {}

        from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line

        for tv in sorted(six.itervalues(kernel.temporary_variables),
                         key=lambda tv: tv.name):
            decl_info = tv.decl_info(self.target,
                                     index_dtype=kernel.index_dtype)

            if not tv.base_storage:
                for idi in decl_info:
                    # global temp vars are mapped to arguments or global declarations
                    if tv.scope != temp_var_scope.GLOBAL:
                        decl = self.wrap_temporary_decl(
                            self.get_temporary_decl(codegen_state,
                                                    schedule_index, tv, idi),
                            tv.scope)

                        if tv.initializer is not None:
                            assert tv.read_only
                            decl = Initializer(
                                decl,
                                generate_array_literal(codegen_state, tv,
                                                       tv.initializer))

                        temp_decls.append(decl)

            else:
                assert tv.initializer is None

                offset = 0
                base_storage_sizes.setdefault(tv.base_storage,
                                              []).append(tv.nbytes)
                base_storage_to_scope.setdefault(tv.base_storage,
                                                 []).append(tv.scope)

                align_size = tv.dtype.itemsize

                from loopy.kernel.array import VectorArrayDimTag
                for dim_tag, axis_len in zip(tv.dim_tags, tv.shape):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        align_size *= axis_len

                base_storage_to_align_bytes.setdefault(tv.base_storage,
                                                       []).append(align_size)

                for idi in decl_info:
                    cast_decl = POD(self, idi.dtype, "")
                    temp_var_decl = POD(self, idi.dtype, idi.name)

                    cast_decl = self.wrap_temporary_decl(cast_decl, tv.scope)
                    temp_var_decl = self.wrap_temporary_decl(
                        temp_var_decl, tv.scope)

                    # The 'restrict' part of this is a complete lie--of course
                    # all these temporaries are aliased. But we're promising to
                    # not use them to shovel data from one representation to the
                    # other. That counts, right?

                    cast_decl = _ConstRestrictPointer(cast_decl)
                    temp_var_decl = _ConstRestrictPointer(temp_var_decl)

                    cast_tp, cast_d = cast_decl.get_decl_pair()
                    temp_var_decl = Initializer(
                        temp_var_decl, "(%s %s) (%s + %s)" %
                        (" ".join(cast_tp), cast_d, tv.base_storage, offset))

                    temp_decls.append(temp_var_decl)

                    from pytools import product
                    offset += (idi.dtype.itemsize *
                               product(si for si in idi.shape))

        ecm = self.get_expression_to_code_mapper(codegen_state)

        for bs_name, bs_sizes in sorted(six.iteritems(base_storage_sizes)):
            bs_var_decl = Value("char", bs_name)
            from pytools import single_valued
            bs_var_decl = self.wrap_temporary_decl(
                bs_var_decl, single_valued(base_storage_to_scope[bs_name]))

            # FIXME: Could try to use isl knowledge to simplify max.
            if all(isinstance(bs, int) for bs in bs_sizes):
                bs_size_max = max(bs_sizes)
            else:
                bs_size_max = p.Max(tuple(bs_sizes))

            bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max))

            alignment = max(base_storage_to_align_bytes[bs_name])
            bs_var_decl = AlignedAttribute(alignment, bs_var_decl)

            base_storage_decls.append(bs_var_decl)

        # }}}

        result = base_storage_decls + temp_decls

        if result:
            result.append(Line())

        return result
示例#3
0
    def get_temporary_decls(self, codegen_state, schedule_index):
        from genpy import Assign, Comment, Line
        from collections import defaultdict
        from numbers import Number
        import pymbolic.primitives as prim

        def alloc_nbytes(tv):
            from functools import reduce
            from operator import mul
            return tv.dtype.numpy_dtype.itemsize * reduce(mul, tv.shape, 1)

        from pymbolic.mapper.stringifier import PREC_NONE
        ecm = self.get_expression_to_code_mapper(codegen_state)

        global_temporaries = self._get_global_temporaries(codegen_state)
        if not global_temporaries:
            return []

        # {{{ allocate space for the base_storage

        base_storage_sizes = defaultdict(set)

        for tv in global_temporaries:
            if tv.base_storage:
                base_storage_sizes[tv.base_storage].add(tv.nbytes)

        # }}}

        allocated_var_names = []
        code_lines = []
        code_lines.append(Line())
        code_lines.append(Comment("{{{ allocate global temporaries"))
        code_lines.append(Line())

        for name, sizes in base_storage_sizes.items():
            if all(isinstance(s, Number) for s in sizes):
                size = max(sizes)
            else:
                size = prim.Max(tuple(sizes))

            allocated_var_names.append(name)
            code_lines.append(
                Assign(name, f"allocator({ecm(size, PREC_NONE, 'i')})"))

        for tv in global_temporaries:
            if tv.base_storage:
                assert tv.base_storage in base_storage_sizes
                code_lines.append(Assign(tv.name, tv.base_storage))
            else:
                nbytes_str = ecm(tv.nbytes, PREC_NONE, "i")
                allocated_var_names.append(tv.name)
                code_lines.append(Assign(tv.name, f"allocator({nbytes_str})"))

        code_lines.append(
            Assign(
                "_global_temporaries", "[{tvs}]".format(tvs=", ".join(
                    tv for tv in allocated_var_names))))

        code_lines.append(Line())
        code_lines.append(Comment("}}}"))
        code_lines.append(Line())

        return code_lines
示例#4
0
    def get_temporary_decls(self, codegen_state, schedule_index):
        from loopy.kernel.data import AddressSpace

        kernel = codegen_state.kernel

        base_storage_decls = []
        temp_decls = []

        # {{{ declare temporaries

        base_storage_sizes = {}
        base_storage_to_scope = {}
        base_storage_to_align_bytes = {}

        from cgen import ArrayOf, Initializer, AlignedAttribute, Value, Line
        # Getting the temporary variables that are needed for the current
        # sub-kernel.
        from loopy.schedule.tools import (temporaries_read_in_subkernel,
                                          temporaries_written_in_subkernel)
        subkernel = kernel.linearization[schedule_index].kernel_name
        sub_knl_temps = (temporaries_read_in_subkernel(kernel, subkernel)
                         | temporaries_written_in_subkernel(kernel, subkernel))

        for tv in sorted(kernel.temporary_variables.values(),
                         key=lambda tv: tv.name):
            decl_info = tv.decl_info(self.target,
                                     index_dtype=kernel.index_dtype)

            if not tv.base_storage:
                for idi in decl_info:
                    # global temp vars are mapped to arguments or global declarations
                    if tv.address_space != AddressSpace.GLOBAL and (
                            tv.name in sub_knl_temps):
                        decl = self.wrap_temporary_decl(
                            self.get_temporary_decl(codegen_state,
                                                    schedule_index, tv, idi),
                            tv.address_space)

                        if tv.initializer is not None:
                            assert tv.read_only
                            decl = Initializer(
                                decl,
                                generate_array_literal(codegen_state, tv,
                                                       tv.initializer))

                        temp_decls.append(decl)

            else:
                assert tv.initializer is None
                if (tv.address_space == AddressSpace.GLOBAL
                        and codegen_state.is_generating_device_code):
                    # global temps trigger no codegen in the device code
                    continue

                offset = 0
                base_storage_sizes.setdefault(tv.base_storage,
                                              []).append(tv.nbytes)
                base_storage_to_scope.setdefault(tv.base_storage,
                                                 []).append(tv.address_space)

                align_size = tv.dtype.itemsize

                from loopy.kernel.array import VectorArrayDimTag
                for dim_tag, axis_len in zip(tv.dim_tags, tv.shape):
                    if isinstance(dim_tag, VectorArrayDimTag):
                        align_size *= axis_len

                base_storage_to_align_bytes.setdefault(tv.base_storage,
                                                       []).append(align_size)

                for idi in decl_info:
                    cast_decl = POD(self, idi.dtype, "")
                    temp_var_decl = POD(self, idi.dtype, idi.name)

                    cast_decl = self.wrap_temporary_decl(
                        cast_decl, tv.address_space)
                    temp_var_decl = self.wrap_temporary_decl(
                        temp_var_decl, tv.address_space)

                    if tv._base_storage_access_may_be_aliasing:
                        ptrtype = _ConstPointer
                    else:
                        # The 'restrict' part of this is a complete lie--of course
                        # all these temporaries are aliased. But we're promising to
                        # not use them to shovel data from one representation to the
                        # other. That counts, right?
                        ptrtype = _ConstRestrictPointer

                    cast_decl = ptrtype(cast_decl)
                    temp_var_decl = ptrtype(temp_var_decl)

                    cast_tp, cast_d = cast_decl.get_decl_pair()
                    temp_var_decl = Initializer(
                        temp_var_decl,
                        "({} {}) ({} + {})".format(" ".join(cast_tp), cast_d,
                                                   tv.base_storage, offset))

                    temp_decls.append(temp_var_decl)

                    from pytools import product
                    offset += (idi.dtype.itemsize *
                               product(si for si in idi.shape))

        ecm = self.get_expression_to_code_mapper(codegen_state)

        for bs_name, bs_sizes in sorted(base_storage_sizes.items()):
            bs_var_decl = Value("char", bs_name)
            from pytools import single_valued
            bs_var_decl = self.wrap_temporary_decl(
                bs_var_decl, single_valued(base_storage_to_scope[bs_name]))

            # FIXME: Could try to use isl knowledge to simplify max.
            if all(isinstance(bs, int) for bs in bs_sizes):
                bs_size_max = max(bs_sizes)
            else:
                bs_size_max = p.Max(tuple(bs_sizes))

            bs_var_decl = ArrayOf(bs_var_decl, ecm(bs_size_max))

            alignment = max(base_storage_to_align_bytes[bs_name])
            bs_var_decl = AlignedAttribute(alignment, bs_var_decl)

            base_storage_decls.append(bs_var_decl)

        # }}}

        result = base_storage_decls + temp_decls

        if result:
            result.append(Line())

        return result