Exemplo n.º 1
0
def test_shape_translation_through_sub_array_ref(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    x1 = cl.clrandom.rand(queue, (3, 2), dtype=np.float64)
    x2 = cl.clrandom.rand(queue, (6, ), dtype=np.float64)
    x3 = cl.clrandom.rand(queue, (6, 6), dtype=np.float64)

    callee1 = lp.make_function("{[i]: 0<=i<6}",
                               """
            b[i] = 2*abs(a[i])
            """,
                               name="callee_fn1")

    callee2 = lp.make_function("{[i, j]: 0<=i<3 and 0 <= j < 2}",
                               """
            b[i, j] = 3*a[i, j]
            """,
                               name="callee_fn2")

    callee3 = lp.make_function("{[i]: 0<=i<6}",
                               """
            b[i] = 5*a[i]
            """,
                               name="callee_fn3")

    knl = lp.make_kernel(
        "{[i, j, k, l]:  0<= i < 6 and 0 <= j < 3 and 0 <= k < 2 and 0<=l<6}",
        """
            [i]: y1[i//2, i%2] = callee_fn1([i]: x1[i//2, i%2])
            [j, k]: y2[2*j+k] = callee_fn2([j, k]: x2[2*j+k])
            [l]: y3[l, l] = callee_fn3([l]: x3[l, l])
            """)

    knl = lp.merge([knl, callee1])
    knl = lp.merge([knl, callee2])
    knl = lp.merge([knl, callee3])

    if inline:
        knl = lp.inline_callable_kernel(knl, "callee_fn1")
        knl = lp.inline_callable_kernel(knl, "callee_fn2")
        knl = lp.inline_callable_kernel(knl, "callee_fn3")

    knl = lp.set_options(knl, "write_cl")
    knl = lp.set_options(knl, "return_dict")
    evt, out_dict = knl(queue, x1=x1, x2=x2, x3=x3)

    y1 = out_dict["y1"].get()
    y2 = out_dict["y2"].get()
    y3 = out_dict["y3"].get()

    assert (np.linalg.norm(y1 - 2 * x1.get())) < 1e-15
    assert (np.linalg.norm(y2 - 3 * x2.get())) < 1e-15
    assert (np.linalg.norm(np.diag(y3 - 5 * x3.get()))) < 1e-15
Exemplo n.º 2
0
def test_inlining_with_indirections(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    ones_and_zeros = lp.make_function("{[i, j]: 0<=i<6 and 0<=j<3}",
                                      """
            x[i] = 0.0f
            ...gbarrier
            x[map[j]] = 1.0f
            """,
                                      seq_dependencies=True,
                                      name="ones_and_zeros")

    t_unit = lp.make_kernel(
        "{ : }", """
            y[:] = ones_and_zeros(mymap[:])
            """, [
            lp.GlobalArg("y", shape=6, dtype=lp.auto),
            lp.GlobalArg("mymap", dtype=np.int32, shape=3)
        ])

    t_unit = lp.merge([t_unit, ones_and_zeros])
    t_unit = lp.inline_callable_kernel(t_unit, "ones_and_zeros")

    map_in = np.arange(3).astype(np.int32)

    evt, (out, ) = t_unit(queue, mymap=map_in)

    expected_out = np.array([1, 1, 1, 0, 0, 0]).astype(np.float32)
    assert (expected_out == out).all()
Exemplo n.º 3
0
def test_array_inputs_to_callee_kernels(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    n = 2**3

    x = np.random.rand(n, n)
    y = np.random.rand(n, n)

    child_knl = lp.make_function("{[i, j]:0<=i, j < 8}",
                                 """
            g[i, j] = 2*e[i, j] + 3*f[i, j]
            """,
                                 name="linear_combo")

    parent_knl = lp.make_kernel("{:}",
                                """
            z[:, :] = linear_combo(x, y)
            """,
                                kernel_data=[
                                    lp.GlobalArg(name="x, y, z",
                                                 dtype=np.float64,
                                                 shape=(n, n)), ...
                                ])

    knl = lp.merge([parent_knl, child_knl])
    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo")

    evt, (out, ) = knl(queue, x=x, y=y)

    assert (np.linalg.norm(2 * x + 3 * y - out) /
            (np.linalg.norm(2 * x + 3 * y))) < 1e-15
Exemplo n.º 4
0
def test_empty_sub_array_refs(ctx_factory, inline):
    # See: https://github.com/OP2/PyOP2/pull/559#discussion_r272208618
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    x = np.random.randn(10)
    y = np.random.randn(10)

    callee = lp.make_function("{[d]:0<=d<1}",
                              """
            c[d] = a[d] - b[d]
            """,
                              name="wence_function")

    caller = lp.make_kernel(
        "{[i,k]: 0<=i<10 and 0<=k<1}", """
            [k]:z[i+k] = wence_function([k]:x[i+k], [k]:y[i+k])
            """, [lp.GlobalArg("x, y", dtype=np.float64, shape=(10, )), ...])

    caller = lp.merge([caller, callee])

    if inline:
        caller = lp.inline_callable_kernel(caller, "wence_function")

    evt, (out, ) = caller(queue, x=x, y=y)
    assert np.allclose(out, x - y)
Exemplo n.º 5
0
def test_slices_with_negative_step(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    n = 4

    x = np.random.rand(n, n, n, n, n)
    y = np.random.rand(n, n, n, n, n)

    child_knl = lp.make_function("{[i, j]:0<=i, j < 4}",
                                 """
            g[i, j] = 2*e[i, j] + 3*f[i, j]
            """,
                                 name="linear_combo")

    parent_knl = lp.make_kernel("{[i, k, m]: 0<=i, k, m<4}",
                                """
            z[i, 3:-1:-1, k, :, m] = linear_combo(x[i, :, k, :, m],
                                                   y[i, :, k, :, m])
            """,
                                kernel_data=[
                                    lp.GlobalArg(name="x, y, z",
                                                 dtype=np.float64,
                                                 shape=(n, n, n, n, n)), ...
                                ])

    knl = lp.merge([parent_knl, child_knl])
    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo")

    evt, (out, ) = knl(queue, x=x, y=y)

    assert (np.linalg.norm(2 * x + 3 * y - out[:, ::-1, :, :, :]) /
            (np.linalg.norm(2 * x + 3 * y))) < 1e-15
Exemplo n.º 6
0
def test_double_hw_axes_used_in_knl_call(inline):
    from loopy.diagnostic import LoopyError

    twice = lp.make_function("{[i]: 0<=i<10}",
                             """
            y[i] = 2*x[i]
            """,
                             name="twice")

    knl = lp.make_kernel("{[i]: 0<=i<10}",
                         """
            y[:, i] = twice(x[:, i])
            """, [
                             lp.GlobalArg("x", shape=(10, 10), dtype=float),
                             lp.GlobalArg("y", shape=(10, 10))
                         ],
                         name="outer")

    twice = lp.tag_inames(twice, {"i": "l.0"})
    knl = lp.tag_inames(knl, {"i": "l.0"})
    knl = lp.merge([knl, twice])

    if inline:
        knl = lp.inline_callable_kernel(knl, "twice")

    with pytest.raises(LoopyError):
        lp.generate_code_v2(knl)
Exemplo n.º 7
0
def test_unused_hw_axes_in_callee(ctx_factory, inline):
    ctx = ctx_factory()

    twice = lp.make_function("{[i]: 0<=i<10}",
                             """
            y[i] = 2*x[i]
            """,
                             name="twice")

    knl = lp.make_kernel("{[i]: 0<=i<10}",
                         """
            y[:, i] = twice(x[:, i])
            """, [
                             lp.GlobalArg("x", shape=(10, 10), dtype=float),
                             lp.GlobalArg("y", shape=(10, 10))
                         ],
                         name="outer")

    twice = lp.tag_inames(twice, {"i": "l.1"})
    knl = lp.tag_inames(knl, {"i": "l.0"})
    knl = lp.merge([knl, twice])

    if inline:
        knl = lp.inline_callable_kernel(knl, "twice")

    lp.auto_test_vs_ref(knl, ctx, knl)
Exemplo n.º 8
0
def test_packing_unpacking(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    x1 = cl.clrandom.rand(queue, (3, 2), dtype=np.float64)
    x2 = cl.clrandom.rand(queue, (6, ), dtype=np.float64)

    callee1 = lp.make_function("{[i]: 0<=i<6}",
                               """
            b[i] = 2*a[i]
            """,
                               name="callee_fn1")

    callee2 = lp.make_function("{[i, j]: 0<=i<2 and 0 <= j < 3}",
                               """
            b[i, j] = 3*a[i, j]
            """,
                               name="callee_fn2")

    knl = lp.make_kernel(
        "{[i, j, k]:  0<= i < 3 and 0 <= j < 2 and 0 <= k < 6}", """
            [i, j]: y1[i, j] = callee_fn1([i, j]: x1[i, j])
            [k]: y2[k] = callee_fn2([k]: x2[k])
            """)

    knl = lp.merge([knl, callee1])
    knl = lp.merge([knl, callee2])

    knl = lp.pack_and_unpack_args_for_call(knl, "callee_fn1")
    knl = lp.pack_and_unpack_args_for_call(knl, "callee_fn2")

    if inline:
        knl = lp.inline_callable_kernel(knl, "callee_fn1")
        knl = lp.inline_callable_kernel(knl, "callee_fn2")

    knl = lp.set_options(knl, "write_cl")
    knl = lp.set_options(knl, "return_dict")
    evt, out_dict = knl(queue, x1=x1, x2=x2)

    y1 = out_dict["y1"].get()
    y2 = out_dict["y2"].get()

    assert np.linalg.norm(2 * x1.get() - y1) / np.linalg.norm(
        2 * x1.get()) < 1e-15
    assert np.linalg.norm(3 * x2.get() - y2) / np.linalg.norm(
        3 * x2.get()) < 1e-15
Exemplo n.º 9
0
def test_register_knl(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)
    n = 4

    x = np.random.rand(n, n, n, n, n)
    y = np.random.rand(n, n, n, n, n)

    grandchild_knl = lp.make_function("{[i, j]:0<= i, j< 4}",
                                      """
            c[i, j] = 2*a[i, j] + 3*b[i, j]
            """,
                                      name="linear_combo1")

    child_knl = lp.make_function("{[i, j]:0<=i, j < 4}",
                                 """
            [i, j]: g[i, j] = linear_combo1([i, j]: e[i, j], [i, j]: f[i, j])
            """,
                                 name="linear_combo2")

    parent_knl = lp.make_kernel("{[i, j, k, l, m]: 0<=i, j, k, l, m<4}",
                                """
            [j, l]: z[i, j, k, l, m] = linear_combo2([j, l]: x[i, j, k, l, m],
                                                     [j, l]: y[i, j, k, l, m])
            """,
                                kernel_data=[
                                    lp.GlobalArg(name="x, y",
                                                 dtype=np.float64,
                                                 shape=(n, n, n, n, n)), ...
                                ])

    knl = lp.merge([grandchild_knl, child_knl, parent_knl])

    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo2")
        knl = lp.inline_callable_kernel(knl, "linear_combo1")

    evt, (out, ) = knl(queue, x=x, y=y)

    assert (np.linalg.norm(2 * x + 3 * y - out) /
            (np.linalg.norm(2 * x + 3 * y))) < 1e-15
Exemplo n.º 10
0
def test_register_knl_with_hw_axes(ctx_factory, inline):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    n = 4

    x_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64)
    y_dev = cl.clrandom.rand(queue, (n, n, n, n, n), np.float64)

    callee_knl = lp.make_function("{[i, j]:0<=i, j < 4}",
                                  """
            g[i, j] = 2*e[i, j] + 3*f[i, j]
            """,
                                  name="linear_combo")

    callee_knl = lp.split_iname(callee_knl,
                                "i",
                                1,
                                inner_tag="l.0",
                                outer_tag="g.0")

    caller_knl = lp.make_kernel("{[i, j, k, l, m]: 0<=i, j, k, l, m<4}",
                                """
            [j, l]: z[i, j, k, l, m] = linear_combo([j, l]: x[i, j, k, l, m],
                                                     [j, l]: y[i, j, k, l, m])
            """,
                                name="caller")
    caller_knl = lp.split_iname(caller_knl,
                                "i",
                                4,
                                inner_tag="l.1",
                                outer_tag="g.1")

    knl = lp.merge([caller_knl, callee_knl])

    knl = lp.set_options(knl, "return_dict")

    if inline:
        knl = lp.inline_callable_kernel(knl, "linear_combo")

    evt, out = knl(queue, x=x_dev, y=y_dev)

    x_host = x_dev.get()
    y_host = y_dev.get()

    assert np.linalg.norm(2 * x_host + 3 * y_host - out["z"].get()
                          ) / np.linalg.norm(2 * x_host + 3 * y_host) < 1e-15
Exemplo n.º 11
0
def test_inlining_with_callee_domain_param(ctx_factory):
    ctx = ctx_factory()
    queue = cl.CommandQueue(ctx)

    fill2 = lp.make_function("{[i]: 0<=i<n}",
                             """
            y[i] = 2.0
            """,
                             name="fill2")

    caller = lp.make_kernel(
        "{[i]: 0<=i<10}", """
            [i]: res[i] = fill2(10)
            """)

    caller = lp.merge([caller, fill2])
    caller = lp.inline_callable_kernel(caller, "fill2")
    evt, (out, ) = caller(queue)

    assert (out == 2).all()
Exemplo n.º 12
0
def test_valueargs_being_mapped_in_inling(ctx_factory):
    doublify = lp.make_function(
        "{[i]: 0<=i<n}",
        """
            y[i] = n*x[i]
            """,
        [lp.ValueArg("n", dtype=np.int32), ...],
        name="doublify",
    )

    knl = lp.make_kernel(
        "{[i, j]: 0<=i, j<10}",
        """
            [i]: bar[i] = doublify(10, [j]: foo[j])
            """,
        [lp.GlobalArg("foo", dtype=float, shape=lp.auto), ...],
    )
    knl = lp.merge([knl, doublify])
    knl = lp.inline_callable_kernel(knl, "doublify")

    lp.auto_test_vs_ref(knl, ctx_factory(), knl)
Exemplo n.º 13
0
def test_passing_and_getting_scalar_in_clbl_knl(ctx_factory, inline):
    ctx = cl.create_some_context()
    cq = cl.CommandQueue(ctx)

    call_sin = lp.make_function("{:}",
                                """
        y = sin(x)
        """,
                                name="call_sin")

    knl = lp.make_kernel(
        "{:}", """
        []: real_y[()] = call_sin(real_x)
        """)

    knl = lp.merge([knl, call_sin])
    knl = lp.set_options(knl, "write_cl")
    if inline:
        knl = lp.inline_callable_kernel(knl, "call_sin")

    evt, (out, ) = knl(cq, real_x=np.asarray(3.0, dtype=float))
Exemplo n.º 14
0
def test_simplify_indices(ctx_factory):
    ctx = ctx_factory()
    twice = lp.make_function("{[i, j]: 0<=i<10 and 0<=j<4}",
                             """
        y[i,j] = 2*x[i,j]
        """,
                             name="zerozerozeroonezeroify")

    knl = lp.make_kernel(
        "{:}", """
        Y[:,:] = zerozerozeroonezeroify(X[:,:])
        """, [lp.GlobalArg("X,Y", shape=(10, 4), dtype=np.float64)])

    class ContainsFloorDiv(lp.symbolic.CombineMapper):
        def combine(self, values):
            return any(values)

        def map_floor_div(self, expr):
            return True

        def map_variable(self, expr):
            return False

        def map_constant(self, expr):
            return False

    knl = lp.merge([knl, twice])
    knl = lp.inline_callable_kernel(knl, "zerozerozeroonezeroify")
    simplified_knl = lp.simplify_indices(knl)
    contains_floordiv = ContainsFloorDiv()

    assert any(
        contains_floordiv(insn.expression)
        for insn in knl.default_entrypoint.instructions
        if isinstance(insn, lp.MultiAssignmentBase))
    assert all(not contains_floordiv(insn.expression)
               for insn in simplified_knl.default_entrypoint.instructions
               if isinstance(insn, lp.MultiAssignmentBase))

    lp.auto_test_vs_ref(knl, ctx, simplified_knl)
Exemplo n.º 15
0
def test_non1_step_slices(ctx_factory, start, inline):
    # See https://github.com/inducer/loopy/pull/222#discussion_r645905188

    ctx = ctx_factory()
    cq = cl.CommandQueue(ctx)

    callee = lp.make_function("{[i]: 0<=i<n}",
                              """
            y[i] = i**2
            """, [lp.ValueArg("n"), ...],
                              name="squared_arange")

    t_unit = lp.make_kernel("{[i_init, j_init]: 0<=i_init, j_init<40}",
                            f"""
            X[i_init] = 42
            X[{start}:40:3] = squared_arange({len(range(start, 40, 3))})

            Y[j_init] = 1729
            Y[39:{start}:-3] = squared_arange({len(range(39, start, -3))})
            """, [lp.GlobalArg("X,Y", shape=40)],
                            seq_dependencies=True)

    expected_out1 = 42 * np.ones(40, dtype=np.int64)
    expected_out1[start:40:3] = np.arange(len(range(start, 40, 3)))**2

    expected_out2 = 1729 * np.ones(40, dtype=np.int64)
    expected_out2[39:start:-3] = np.arange(len(range(39, start, -3)))**2

    t_unit = lp.merge([t_unit, callee])

    t_unit = lp.set_options(t_unit, "return_dict")

    if inline:
        t_unit = lp.inline_callable_kernel(t_unit, "squared_arange")

    evt, out_dict = t_unit(cq)

    np.testing.assert_allclose(out_dict["X"].get(), expected_out1)
    np.testing.assert_allclose(out_dict["Y"].get(), expected_out2)
Exemplo n.º 16
0
def test_kc_with_floor_div_in_expr(ctx_factory, inline):
    # See https://github.com/inducer/loopy/issues/366
    import loopy as lp

    ctx = ctx_factory()
    callee = lp.make_function("{[i]: 0<=i<10}",
                              """
            x[i] = 2*x[i]
            """,
                              name="callee_with_update")

    knl = lp.make_kernel(
        "{[i]: 0<=i<10}", """
            [i]: x[2*(i//2) + (i%2)] = callee_with_update([i]: x[i])
            """)

    knl = lp.merge([knl, callee])

    if inline:
        knl = lp.inline_callable_kernel(knl, "callee_with_update")

    lp.auto_test_vs_ref(knl, ctx, knl)
Exemplo n.º 17
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset([builder._loop_index.name,
                                  builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [tuple(merger(i) for i in inits) for inits in mapper.initialisers]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node, (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [rename_nodes(inits, replacements) for inits in mapper.initialisers]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments, replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents], replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames, parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"n": 1, "start": -1})))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set, 0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(d.add_constraint(isl.Constraint.eq_from_names(d.space, {"layer": 1, t1: -1, 1: 1})))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(operator.and_,
                          parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True, ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(["#include <math.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code, tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper, [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(wrapper, petsc_function_lookup)

    return wrapper
Exemplo n.º 18
0
def generate(builder, wrapper_name=None):
    if builder.layer_index is not None:
        outer_inames = frozenset(
            [builder._loop_index.name, builder.layer_index.name])
    else:
        outer_inames = frozenset([builder._loop_index.name])

    instructions = list(builder.emit_instructions())

    parameters = Bag()
    parameters.domains = OrderedDict()
    parameters.assumptions = OrderedDict()
    parameters.wrapper_arguments = builder.wrapper_args
    parameters.layer_start = builder.layer_extents[0].name
    parameters.layer_end = builder.layer_extents[1].name
    parameters.conditions = []
    parameters.kernel_data = list(None for _ in parameters.wrapper_arguments)
    parameters.temporaries = OrderedDict()
    parameters.kernel_name = builder.kernel.name

    # replace Materialise
    mapper = Memoizer(replace_materialise)
    mapper.initialisers = []
    instructions = list(mapper(i) for i in instructions)

    # merge indices
    merger = index_merger(instructions)
    instructions = list(merger(i) for i in instructions)
    initialiser = list(itertools.chain(*mapper.initialisers))
    merger = index_merger(initialiser)
    initialiser = list(merger(i) for i in initialiser)
    instructions = instructions + initialiser
    mapper.initialisers = [
        tuple(merger(i) for i in inits) for inits in mapper.initialisers
    ]

    # rename indices and nodes (so that the counters start from zero)
    pattern = re.compile(r"^([a-zA-Z_]+)([0-9]+)(_offset)?$")
    replacements = {}
    counter = defaultdict(itertools.count)
    for node in traversal(instructions):
        if isinstance(node,
                      (Index, RuntimeIndex, Variable, Argument, NamedLiteral)):
            match = pattern.match(node.name)
            if match is None:
                continue
            prefix, _, postfix = match.groups()
            if postfix is None:
                postfix = ""
            replacements[node] = "%s%d%s" % (
                prefix, next(counter[(prefix, postfix)]), postfix)

    instructions = rename_nodes(instructions, replacements)
    mapper.initialisers = [
        rename_nodes(inits, replacements) for inits in mapper.initialisers
    ]
    parameters.wrapper_arguments = rename_nodes(parameters.wrapper_arguments,
                                                replacements)
    s, e = rename_nodes([mapper(e) for e in builder.layer_extents],
                        replacements)
    parameters.layer_start = s.name
    parameters.layer_end = e.name

    # scheduling and loop nesting
    deps = instruction_dependencies(instructions, mapper.initialisers)
    within_inames = loop_nesting(instructions, deps, outer_inames,
                                 parameters.kernel_name)

    # generate loopy
    context = Bag()
    context.parameters = parameters
    context.within_inames = within_inames
    context.conditions = []
    context.index_ordering = []
    context.instruction_dependencies = deps

    statements = list(statement(insn, context) for insn in instructions)
    # remote the dummy instructions (they were only used to ensure
    # that the kernel knows about the outer inames).
    statements = list(s for s in statements
                      if not isinstance(s, DummyInstruction))

    domains = list(parameters.domains.values())
    if builder.single_cell:
        new_domains = []
        for d in domains:
            if d.get_dim_name(isl.dim_type.set, 0) == builder._loop_index.name:
                # n = start
                new_domains.append(
                    d.add_constraint(
                        isl.Constraint.eq_from_names(d.space, {
                            "n": 1,
                            "start": -1
                        })))
            else:
                new_domains.append(d)
        domains = new_domains
        if builder.extruded:
            new_domains = []
            for d in domains:
                if d.get_dim_name(isl.dim_type.set,
                                  0) == builder.layer_index.name:
                    # layer = t1 - 1
                    t1 = parameters.layer_end
                    new_domains.append(
                        d.add_constraint(
                            isl.Constraint.eq_from_names(
                                d.space, {
                                    "layer": 1,
                                    t1: -1,
                                    1: 1
                                })))
                else:
                    new_domains.append(d)
        domains = new_domains

    assumptions, = reduce(
        operator.and_,
        parameters.assumptions.values()).params().get_basic_sets()
    options = loopy.Options(check_dep_resolution=True,
                            ignore_boostable_into=True)

    # sometimes masks are not used, but we still need to create the function arguments
    for i, arg in enumerate(parameters.wrapper_arguments):
        if parameters.kernel_data[i] is None:
            arg = loopy.GlobalArg(arg.name, dtype=arg.dtype, shape=arg.shape)
            parameters.kernel_data[i] = arg

    if wrapper_name is None:
        wrapper_name = "wrap_%s" % builder.kernel.name

    pwaffd = isl.affs_from_space(assumptions.get_space())
    assumptions = assumptions & pwaffd["start"].ge_set(pwaffd[0])
    if builder.single_cell:
        assumptions = assumptions & pwaffd["start"].lt_set(pwaffd["end"])
    else:
        assumptions = assumptions & pwaffd["start"].le_set(pwaffd["end"])
    if builder.extruded:
        assumptions = assumptions & pwaffd[parameters.layer_start].le_set(
            pwaffd[parameters.layer_end])
    assumptions = reduce(operator.and_, assumptions.get_basic_sets())

    wrapper = loopy.make_kernel(domains,
                                statements,
                                kernel_data=parameters.kernel_data,
                                target=loopy.CTarget(),
                                temporary_variables=parameters.temporaries,
                                symbol_manglers=[symbol_mangler],
                                options=options,
                                assumptions=assumptions,
                                lang_version=(2018, 2),
                                name=wrapper_name)

    # prioritize loops
    for indices in context.index_ordering:
        wrapper = loopy.prioritize_loops(wrapper, indices)

    # register kernel
    kernel = builder.kernel
    headers = set(kernel._headers)
    headers = headers | set(
        ["#include <math.h>", "#include <complex.h>", "#include <petsc.h>"])
    preamble = "\n".join(sorted(headers))

    from coffee.base import Node

    if isinstance(kernel._code, loopy.LoopKernel):
        knl = kernel._code
        wrapper = loopy.register_callable_kernel(wrapper, knl)
        from loopy.transform.callable import _match_caller_callee_argument_dimension_
        wrapper = _match_caller_callee_argument_dimension_(wrapper, knl.name)
        wrapper = loopy.inline_callable_kernel(wrapper, knl.name)
    else:
        # kernel is a string, add it to preamble
        if isinstance(kernel._code, Node):
            code = kernel._code.gencode()
        else:
            code = kernel._code
        wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
            wrapper,
            PyOP2KernelLookup(kernel.name, code,
                              tuple(builder.argument_accesses)))
        preamble = preamble + "\n" + code

    wrapper = loopy.register_preamble_generators(wrapper,
                                                 [_PreambleGen(preamble)])

    # register petsc functions
    wrapper = loopy.register_function_id_to_in_knl_callable_mapper(
        wrapper, petsc_function_lookup)

    return wrapper