Exemplo n.º 1
0
def test_magma_fermi_matrix_mul(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = get_suitable_size(ctx)

    if (not ctx.devices[0].image_support
            or ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
    if image_format not in cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D):
        pytest.skip("image format not supported")

    knl = lp.make_kernel(
        "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [
            lp.ImageArg("a", dtype, shape=(n, n)),
            lp.ImageArg("b", dtype, shape=(n, n)),
            lp.GlobalArg("c", dtype, shape=(n, n), order=order),
        ],
        name="matmul")

    seq_knl = knl

    i_reg = 4
    j_reg = 4
    i_chunks = 16
    j_chunks = 16

    knl = lp.split_iname(knl, "i", i_reg * i_chunks, outer_tag="g.0")
    knl = lp.split_iname(knl,
                         "i_inner",
                         i_reg,
                         outer_tag="l.0",
                         inner_tag="ilp")
    knl = lp.split_iname(knl, "j", j_reg * j_chunks, outer_tag="g.1")
    knl = lp.split_iname(knl,
                         "j_inner",
                         j_reg,
                         outer_tag="l.1",
                         inner_tag="ilp")
    knl = lp.split_iname(knl, "k", 16)
    knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr")
    # FIXME
    #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"],
    #           default_tag="l.auto")
    #knl = lp.add_prefetch(knl, 'b',
    #    ["k_inner", ("j_inner_inner", "j_inner_outer"),], default_tag="l.auto")

    lp.auto_test_vs_ref(seq_knl,
                        ctx,
                        knl,
                        op_count=[2 * n**3 / 1e9],
                        op_label=["GFlops"],
                        parameters={},
                        blacklist_ref_vendors="pocl")
Exemplo n.º 2
0
def no_test_image_matrix_mul_ilp(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    if (not ctx.devices[0].image_support
            or ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
    if image_format not in cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D):
        pytest.skip("image format not supported")

    n = get_suitable_size(ctx)

    knl = lp.make_kernel(
        "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [
            lp.ImageArg("a", dtype, shape=(n, n)),
            lp.ImageArg("b", dtype, shape=(n, n)),
            lp.GlobalArg("c", dtype, shape=(n, n), order=order),
        ],
        name="matmul")

    seq_knl = knl

    ilp = 4
    knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
    j_inner_split = 4
    knl = lp.split_iname(knl, "j", ilp * j_inner_split, outer_tag="g.1")
    knl = lp.split_iname(knl,
                         "j_inner",
                         j_inner_split,
                         outer_tag="ilp",
                         inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 2)
    # conflict-free?
    knl = lp.add_prefetch(knl,
                          'a', ["i_inner", "k_inner"],
                          default_tag="l.auto")
    knl = lp.add_prefetch(knl,
                          'b', ["j_inner_outer", "j_inner_inner", "k_inner"],
                          default_tag="l.auto")

    lp.auto_test_vs_ref(seq_knl,
                        ctx,
                        knl,
                        op_count=[2 * n**3 / 1e9],
                        op_label=["GFlops"],
                        parameters={})