示例#1
0
文件: data.py 项目: shwina/loopy
def remove_unused_arguments(knl):
    new_args = []

    import loopy as lp
    exp_knl = lp.expand_subst(knl)

    refd_vars = set(knl.all_params())
    for insn in exp_knl.instructions:
        refd_vars.update(insn.dependency_names())

    from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag
    from loopy.symbolic import get_dependencies
    from itertools import chain

    def tolerant_get_deps(expr):
        if expr is None or expr is lp.auto:
            return set()
        return get_dependencies(expr)

    for ary in chain(knl.args, six.itervalues(knl.temporary_variables)):
        if isinstance(ary, ArrayBase):
            refd_vars.update(
                    tolerant_get_deps(ary.shape)
                    | tolerant_get_deps(ary.offset))

            for dim_tag in ary.dim_tags:
                if isinstance(dim_tag, FixedStrideArrayDimTag):
                    refd_vars.update(
                            tolerant_get_deps(dim_tag.stride))

    for arg in knl.args:
        if arg.name in refd_vars:
            new_args.append(arg)

    return knl.copy(args=new_args)
示例#2
0
def test_complicated_subst(ctx_factory):
    #ctx = ctx_factory()

    knl = lp.make_kernel(
            "{[i]: 0<=i<n}",
            """
                f(x) := x*a[x]
                g(x) := 12 + f(x)
                h(x) := 1 + g(x) + 20*g$two(x)

                a[i] = h$one(i) * h$two(i)
                """)

    knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two")

    print(knl)

    sr_keys = list(knl.substitutions.keys())
    for letter, how_many in [
            ("f", 1),
            ("g", 1),
            ("h", 2)
            ]:
        substs_with_letter = sum(1 for k in sr_keys if k.startswith(letter))
        assert substs_with_letter == how_many
示例#3
0
文件: data.py 项目: cmsquared/loopy
def remove_unused_arguments(knl):
    new_args = []

    import loopy as lp
    exp_knl = lp.expand_subst(knl)

    refd_vars = set(knl.all_params())
    for insn in exp_knl.instructions:
        refd_vars.update(insn.dependency_names())

    from loopy.kernel.array import ArrayBase, FixedStrideArrayDimTag
    from loopy.symbolic import get_dependencies
    from itertools import chain

    def tolerant_get_deps(expr):
        if expr is None or expr is lp.auto:
            return set()
        return get_dependencies(expr)

    for ary in chain(knl.args, six.itervalues(knl.temporary_variables)):
        if isinstance(ary, ArrayBase):
            refd_vars.update(
                    tolerant_get_deps(ary.shape)
                    | tolerant_get_deps(ary.offset))

            for dim_tag in ary.dim_tags:
                if isinstance(dim_tag, FixedStrideArrayDimTag):
                    refd_vars.update(
                            tolerant_get_deps(dim_tag.stride))

    for arg in knl.args:
        if arg.name in refd_vars:
            new_args.append(arg)

    return knl.copy(args=new_args)
示例#4
0
 def variant_gpu(knl):
     knl = lp.expand_subst(knl)
     knl = lp.split_iname(knl, "i", 256,
             outer_tag="g.0", inner_tag="l.0")
     knl = lp.split_iname(knl, "j", 256)
     knl = lp.add_prefetch(knl, "x[j,k]", ["j_inner", "k"],
             ["x_fetch_j", "x_fetch_k"], default_tag=None)
     knl = lp.tag_inames(knl, dict(x_fetch_k="unr", x_fetch_j="l.0"))
     knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None)
     knl = lp.prioritize_loops(knl, ["j_outer", "j_inner"])
     return knl
示例#5
0
def remove_unused_inames(knl, inames=None):
    """Delete those among *inames* that are unused, i.e. project them
    out of the domain. If these inames pose implicit restrictions on
    other inames, these restrictions will persist as existentially
    quantified variables.

    :arg inames: may be an iterable of inames or a string of comma-separated inames.
    """

    # {{{ normalize arguments

    if inames is None:
        inames = knl.all_inames()
    elif isinstance(inames, str):
        inames = inames.split(",")

    # }}}

    # {{{ check which inames are unused

    import loopy as lp
    exp_knl = lp.expand_subst(knl)

    inames = set(inames)
    used_inames = set()
    for insn in exp_knl.instructions:
        used_inames.update(
            exp_knl.insn_inames(insn.id)
            | insn.reduction_inames())

    unused_inames = inames - used_inames

    # }}}

    # {{{ remove them

    from loopy.kernel.tools import DomainChanger

    for iname in unused_inames:
        domch = DomainChanger(knl, (iname, ))

        dom = domch.domain
        dt, idx = dom.get_var_dict()[iname]
        dom = dom.project_out(dt, idx, 1)

        knl = knl.copy(domains=domch.get_domains_with(dom))

    # }}}

    return knl
示例#6
0
def test_nested_substs_in_insns(ctx_factory):
    ctx = ctx_factory()
    import loopy as lp

    ref_knl = lp.make_kernel(
        "{[i]: 0<=i<10}", """
        a(x) := 2 * x
        b(x) := x**2
        c(x) := 7 * x
        f[i] = c(b(a(i)))
        """)

    knl = lp.expand_subst(ref_knl)
    assert not knl.substitutions

    lp.auto_test_vs_ref(ref_knl, ctx, knl)
示例#7
0
def test_nested_substs_in_insns(ctx_factory):
    ctx = ctx_factory()
    import loopy as lp

    ref_prg = lp.make_kernel(
        "{[i]: 0<=i<10}", """
        a(x) := 2 * x
        b(x) := x**2
        c(x) := 7 * x
        f[i] = c(b(a(i)))
        """)

    t_unit = lp.expand_subst(ref_prg)
    assert not any(cknl.subkernel.substitutions
                   for cknl in t_unit.callables_table.values())

    lp.auto_test_vs_ref(ref_prg, ctx, t_unit)
示例#8
0
def test_nested_substs_in_insns(ctx_factory):
    ctx = ctx_factory()
    import loopy as lp

    ref_knl = lp.make_kernel(
        "{[i]: 0<=i<10}",
        """
        a(x) := 2 * x
        b(x) := x**2
        c(x) := 7 * x
        f[i] = c(b(a(i)))
        """
    )

    knl = lp.expand_subst(ref_knl)
    assert not knl.substitutions

    lp.auto_test_vs_ref(ref_knl, ctx, knl)
示例#9
0
def test_complicated_subst(ctx_factory):
    #ctx = ctx_factory()

    knl = lp.make_kernel(
        "{[i]: 0<=i<n}", """
                f(x) := x*a[x]
                g(x) := 12 + f(x)
                h(x) := 1 + g(x) + 20*g$two(x)

                a[i] = h$one(i) * h$two(i)
                """)

    knl = lp.expand_subst(knl, "... > id:h and tag:two > id:g and tag:two")

    print(knl)

    sr_keys = list(knl.substitutions.keys())
    for letter, how_many in [("f", 1), ("g", 1), ("h", 2)]:
        substs_with_letter = sum(1 for k in sr_keys if k.startswith(letter))
        assert substs_with_letter == how_many
示例#10
0
 def variant_cpu(knl):
     knl = lp.expand_subst(knl)
     knl = lp.split_iname(knl, "i", 1024,
             outer_tag="g.0", slabs=(0, 1))
     knl = lp.add_prefetch(knl, "x[i,k]", ["k"], default_tag=None)
     return knl
示例#11
0
def test_lbm(ctx_factory):
    ctx = ctx_factory()

    # D2Q4Q4Q4 lattice Boltzmann scheme for the shallow water equations
    # Example by Loic Gouarin <*****@*****.**>
    knl = lp.make_kernel(
        "{[ii,jj]:0<=ii<nx-2 and 0<=jj<ny-2}",
        """  # noqa (silences flake8 line length warning)
        i := ii + 1
        j := jj + 1
        for ii, jj
            with {id_prefix=init_m}
                <> m[0] =   +    f[i-1, j, 0] +    f[i, j-1, 1] + f[i+1, j, 2] +  f[i, j+1, 3]
                m[1] =   + 4.*f[i-1, j, 0] - 4.*f[i+1, j, 2]
                m[2] =   + 4.*f[i, j-1, 1] - 4.*f[i, j+1, 3]
                m[3] =   +    f[i-1, j, 0] -    f[i, j-1, 1] + f[i+1, j, 2] -  f[i, j+1, 3]
                m[4] =   +    f[i-1, j, 4] +    f[i, j-1, 5] + f[i+1, j, 6] +  f[i, j+1, 7]
                m[5] =   + 4.*f[i-1, j, 4] - 4.*f[i+1, j, 6]
                m[6] =   + 4.*f[i, j-1, 5] - 4.*f[i, j+1, 7]
                m[7] =   +    f[i-1, j, 4] -    f[i, j-1, 5] + f[i+1, j, 6] -  f[i, j+1, 7]
                m[8] =   +    f[i-1, j, 8] +    f[i, j-1, 9] + f[i+1, j, 10] + f[i, j+1, 11]
                m[9] =   + 4.*f[i-1, j, 8] - 4.*f[i+1, j, 10]
                m[10] =  + 4.*f[i, j-1, 9] - 4.*f[i, j+1, 11]
                m[11] =  +    f[i-1, j, 8] -    f[i, j-1, 9] + f[i+1, j, 10] - f[i, j+1, 11]
            end

            with {id_prefix=update_m,dep=init_m*}
                m[1] = m[1] + 2.*(m[4] - m[1])
                m[2] = m[2] + 2.*(m[8] - m[2])
                m[3] = m[3]*(1. - 1.5)
                m[5] = m[5] + 1.5*(0.5*(m[0]*m[0]) + (m[4]*m[4])/m[0] - m[5])
                m[6] = m[6] + 1.5*(m[4]*m[8]/m[0] - m[6])
                m[7] = m[7]*(1. - 1.2000000000000000)
                m[9] = m[9] + 1.5*(m[4]*m[8]/m[0] - m[9])
                m[10] = m[10] + 1.5*(0.5*(m[0]*m[0]) + (m[8]*m[8])/m[0] - m[10])
                m[11] = m[11]*(1. - 1.2)
            end

            with {dep=update_m*}
                f_new[i, j, 0] =  + 0.25*m[0] + 0.125*m[1] + 0.25*m[3]
                f_new[i, j, 1] =  + 0.25*m[0] + 0.125*m[2] - 0.25*m[3]
                f_new[i, j, 2] =  + 0.25*m[0] - 0.125*m[1] + 0.25*m[3]
                f_new[i, j, 3] =  + 0.25*m[0] - 0.125*m[2] - 0.25*m[3]
                f_new[i, j, 4] =  + 0.25*m[4] + 0.125*m[5] + 0.25*m[7]
                f_new[i, j, 5] =  + 0.25*m[4] + 0.125*m[6] - 0.25*m[7]
                f_new[i, j, 6] =  + 0.25*m[4] - 0.125*m[5] + 0.25*m[7]
                f_new[i, j, 7] =  + 0.25*m[4] - 0.125*m[6] - 0.25*m[7]
                f_new[i, j, 8] =  + 0.25*m[8] + 0.125*m[9] + 0.25*m[11]
                f_new[i, j, 9] =  + 0.25*m[8] + 0.125*m[10] - 0.25*m[11]
                f_new[i, j, 10] =  + 0.25*m[8] - 0.125*m[9] + 0.25*m[11]
                f_new[i, j, 11] =  + 0.25*m[8] - 0.125*m[10] - 0.25*m[11]
           end
        end
        """)

    knl = lp.add_and_infer_dtypes(knl, {"f": np.float32})

    ref_knl = knl

    knl = lp.split_iname(knl, "ii", 16, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "jj", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.expand_subst(knl)
    knl = lp.add_prefetch(knl, "f", "ii_inner,jj_inner", fetch_bounding_box=True)

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"nx": 20, "ny": 20})
示例#12
0
def test_lbm(ctx_factory):
    ctx = ctx_factory()

    # D2Q4Q4Q4 lattice Boltzmann scheme for the shallow water equations
    # Example by Loic Gouarin <*****@*****.**>
    knl = lp.make_kernel(
        "{[ii,jj]:0<=ii<nx-2 and 0<=jj<ny-2}",
        """  # noqa (silences flake8 line length warning)
        i := ii + 1
        j := jj + 1
        for ii, jj
            with {id_prefix=init_m}
                <> m[0] =   +    f[i-1, j, 0] +    f[i, j-1, 1] + f[i+1, j, 2] +  f[i, j+1, 3]
                m[1] =   + 4.*f[i-1, j, 0] - 4.*f[i+1, j, 2]
                m[2] =   + 4.*f[i, j-1, 1] - 4.*f[i, j+1, 3]
                m[3] =   +    f[i-1, j, 0] -    f[i, j-1, 1] + f[i+1, j, 2] -  f[i, j+1, 3]
                m[4] =   +    f[i-1, j, 4] +    f[i, j-1, 5] + f[i+1, j, 6] +  f[i, j+1, 7]
                m[5] =   + 4.*f[i-1, j, 4] - 4.*f[i+1, j, 6]
                m[6] =   + 4.*f[i, j-1, 5] - 4.*f[i, j+1, 7]
                m[7] =   +    f[i-1, j, 4] -    f[i, j-1, 5] + f[i+1, j, 6] -  f[i, j+1, 7]
                m[8] =   +    f[i-1, j, 8] +    f[i, j-1, 9] + f[i+1, j, 10] + f[i, j+1, 11]
                m[9] =   + 4.*f[i-1, j, 8] - 4.*f[i+1, j, 10]
                m[10] =  + 4.*f[i, j-1, 9] - 4.*f[i, j+1, 11]
                m[11] =  +    f[i-1, j, 8] -    f[i, j-1, 9] + f[i+1, j, 10] - f[i, j+1, 11]
            end

            with {id_prefix=update_m,dep=init_m*}
                m[1] = m[1] + 2.*(m[4] - m[1])
                m[2] = m[2] + 2.*(m[8] - m[2])
                m[3] = m[3]*(1. - 1.5)
                m[5] = m[5] + 1.5*(0.5*(m[0]*m[0]) + (m[4]*m[4])/m[0] - m[5])
                m[6] = m[6] + 1.5*(m[4]*m[8]/m[0] - m[6])
                m[7] = m[7]*(1. - 1.2000000000000000)
                m[9] = m[9] + 1.5*(m[4]*m[8]/m[0] - m[9])
                m[10] = m[10] + 1.5*(0.5*(m[0]*m[0]) + (m[8]*m[8])/m[0] - m[10])
                m[11] = m[11]*(1. - 1.2)
            end

            with {dep=update_m*}
                f_new[i, j, 0] =  + 0.25*m[0] + 0.125*m[1] + 0.25*m[3]
                f_new[i, j, 1] =  + 0.25*m[0] + 0.125*m[2] - 0.25*m[3]
                f_new[i, j, 2] =  + 0.25*m[0] - 0.125*m[1] + 0.25*m[3]
                f_new[i, j, 3] =  + 0.25*m[0] - 0.125*m[2] - 0.25*m[3]
                f_new[i, j, 4] =  + 0.25*m[4] + 0.125*m[5] + 0.25*m[7]
                f_new[i, j, 5] =  + 0.25*m[4] + 0.125*m[6] - 0.25*m[7]
                f_new[i, j, 6] =  + 0.25*m[4] - 0.125*m[5] + 0.25*m[7]
                f_new[i, j, 7] =  + 0.25*m[4] - 0.125*m[6] - 0.25*m[7]
                f_new[i, j, 8] =  + 0.25*m[8] + 0.125*m[9] + 0.25*m[11]
                f_new[i, j, 9] =  + 0.25*m[8] + 0.125*m[10] - 0.25*m[11]
                f_new[i, j, 10] =  + 0.25*m[8] - 0.125*m[9] + 0.25*m[11]
                f_new[i, j, 11] =  + 0.25*m[8] - 0.125*m[10] - 0.25*m[11]
           end
        end
        """)

    knl = lp.add_and_infer_dtypes(knl, {"f": np.float32})

    ref_knl = knl

    knl = lp.split_iname(knl, "ii", 16, outer_tag="g.1", inner_tag="l.1")
    knl = lp.split_iname(knl, "jj", 16, outer_tag="g.0", inner_tag="l.0")
    knl = lp.expand_subst(knl)
    knl = lp.add_prefetch(knl, "f", "ii_inner,jj_inner", fetch_bounding_box=True,
            default_tag="l.auto")

    lp.auto_test_vs_ref(ref_knl, ctx, knl, parameters={"nx": 20, "ny": 20})