def _make_slab_set(iname, size): v = isl.make_zero_and_vars([iname]) bs, = ( v[0].le_set(v[iname]) & v[iname].lt_set(v[0] + size)).get_basic_sets() return bs
def _get_scalar_func_loopy_program(self, name, nargs, naxes): if name == "arctan2": name = "atan2" elif name == "atan2": from warnings import warn warn( "'atan2' in ArrayContext.np is deprecated. Use 'arctan2', " "as in numpy2. This will be disallowed in 2021.", DeprecationWarning, stacklevel=3) from pymbolic import var var_names = ["i%d" % i for i in range(naxes)] size_names = ["n%d" % i for i in range(naxes)] subscript = tuple(var(vname) for vname in var_names) from islpy import make_zero_and_vars v = make_zero_and_vars(var_names, params=size_names) domain = v[0].domain() for vname, sname in zip(var_names, size_names): domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname]) domain_bset, = domain.get_basic_sets() return make_loopy_program([domain_bset], [ lp.Assignment( var("out")[subscript], var(name)(*[var("inp%d" % i)[subscript] for i in range(nargs)])) ], name="actx_special_%s" % name)
def expression_kernel(expr, args): r"""Produce a :class:`pyop2.Kernel` from the processed UFL expression expr and the corresponding args.""" # Empty slot indicating assignment to indexed LHS, so don't do anything if type(expr) is Zero: return fs = args[0].function.function_space() import islpy as isl inames = isl.make_zero_and_vars(["d"]) domain = (inames[0].le_set( inames["d"])) & (inames["d"].lt_set(inames[0] + fs.dof_dset.cdim)) context = Bag() context.within_inames = frozenset(["d"]) context.indices = (p.Variable("d"), ) insn = loopy_instructions(expr, context) data = [arg.arg for arg in args] knl = loopy.make_function([domain], [insn], data, name="expression", silenced_warnings=["summing_if_branches_ops"]) return op2.Kernel(knl, "expression")
def _get_lp_domains(_inames, _extents): domains = [] for idx, extent in zip(_inames, _extents): inames = isl.make_zero_and_vars([idx]) domains.append(((inames[0].le_set(inames[idx])) & (inames[idx].lt_set(inames[0] + extent)))) return domains
def expression_index(expr, parameters): name = expr.name if name not in parameters.domains: vars = isl.make_zero_and_vars([name]) zero = vars[0] domain = (vars[name].ge_set(zero) & vars[name].lt_set(zero + expr.extent)) parameters.domains[name] = domain return pym.Variable(name)
def test_make_zero_and_vars(): v = isl.make_zero_and_vars("i,j,k", "n") myset = (v[0].le_set(v["i"] + v["j"]) & (v["i"] + v["j"]).lt_set(v["n"]) & (v[0].le_set(v["i"])) & (v["i"].le_set(13 + v["n"]))) print(myset)
def generate(impero_c, args, precision, scalar_type, kernel_name="loopy_kernel", index_names=[]): """Generates loopy code. :arg impero_c: ImperoC tuple with Impero AST and other data :arg args: list of loopy.GlobalArgs :arg precision: floating-point precision for printing :arg scalar_type: type of scalars as C typename string :arg kernel_name: function name of the kernel :arg index_names: pre-assigned index names :returns: loopy kernel """ ctx = LoopyContext() ctx.indices = impero_c.indices ctx.index_names = defaultdict(lambda: "i", index_names) ctx.precision = precision ctx.scalar_type = scalar_type ctx.epsilon = 10.0 ** (-precision) # Create arguments data = list(args) for i, temp in enumerate(impero_c.temporaries): name = "t%d" % i if isinstance(temp, gem.Constant): data.append(lp.TemporaryVariable(name, shape=temp.shape, dtype=temp.array.dtype, initializer=temp.array, address_space=lp.AddressSpace.LOCAL, read_only=True)) else: shape = tuple([i.extent for i in ctx.indices[temp]]) + temp.shape data.append(lp.TemporaryVariable(name, shape=shape, dtype=numpy.float64, initializer=None, address_space=lp.AddressSpace.LOCAL, read_only=False)) ctx.gem_to_pymbolic[temp] = p.Variable(name) # Create instructions instructions = statement(impero_c.tree, ctx) # Create domains domains = [] for idx, extent in ctx.index_extent.items(): inames = isl.make_zero_and_vars([idx]) domains.append(((inames[0].le_set(inames[idx])) & (inames[idx].lt_set(inames[0] + extent)))) if not domains: domains = [isl.BasicSet("[] -> {[]}")] # Create loopy kernel knl = lp.make_function(domains, instructions, data, name=kernel_name, target=lp.CTarget(), seq_dependencies=True, silenced_warnings=["summing_if_branches_ops"]) # Prevent loopy interchange by loopy knl = lp.prioritize_loops(knl, ",".join(ctx.index_extent.keys())) # Help loopy in scheduling by assigning priority to instructions insn_new = [] for i, insn in enumerate(knl.instructions): insn_new.append(insn.copy(priority=len(knl.instructions) - i)) knl = knl.copy(instructions=insn_new) return knl
def expression_runtimeindex(expr, parameters): @singledispatch def translate(expr, vars): raise AssertionError("Unhandled type '%s' in domain translation" % type(expr)) @translate.register(Sum) def translate_sum(expr, vars): return operator.add(*(translate(c, vars) for c in expr.children)) @translate.register(Argument) def translate_argument(expr, vars): expr = expression(expr, parameters) return vars[expr.name] @translate.register(Variable) def translate_variable(expr, vars): return vars[expr.name] @translate.register(Zero) def translate_zero(expr, vars): assert expr.shape == () return vars[0] @translate.register(LogicalAnd) def translate_logicaland(expr, vars): a, b = (translate(c, vars) for c in expr.children) return a & b @translate.register(Comparison) def translate_comparison(expr, vars): a, b = (translate(c, vars) for c in expr.children) fn = { ">": "gt_set", ">=": "ge_set", "==": "eq_set", "!=": "ne_set", "<": "lt_set", "<=": "le_set" }[expr.operator] return getattr(a, fn)(b) name = expr.name if name not in parameters.domains: lo, hi, constraint = expr.children params = list(v.name for v in traversal([lo, hi]) if isinstance(v, (Argument, Variable))) vars = isl.make_zero_and_vars([name], params) domain = (vars[name].ge_set(translate(lo, vars)) & vars[name].lt_set(translate(hi, vars))) parameters.domains[name] = domain if constraint is not None: parameters.assumptions[name] = translate(constraint, vars) return pym.Variable(name)
def test_make_zero_and_vars(): v = isl.make_zero_and_vars("i,j,k", "n") myset = ( v[0].le_set(v["i"] + v["j"]) & (v["i"] + v["j"]).lt_set(v["n"]) & (v[0].le_set(v["i"])) & (v["i"].le_set(13 + v["n"])) ) print(myset)
def expression_runtimeindex(expr, parameters): @singledispatch def translate(expr, vars): raise AssertionError("Unhandled type '%s' in domain translation" % type(expr)) @translate.register(Sum) def translate_sum(expr, vars): return operator.add(*(translate(c, vars) for c in expr.children)) @translate.register(Argument) def translate_argument(expr, vars): expr = expression(expr, parameters) return vars[expr.name] @translate.register(Variable) def translate_variable(expr, vars): return vars[expr.name] @translate.register(Zero) def translate_zero(expr, vars): assert expr.shape == () return vars[0] @translate.register(LogicalAnd) def translate_logicaland(expr, vars): a, b = (translate(c, vars) for c in expr.children) return a & b @translate.register(Comparison) def translate_comparison(expr, vars): a, b = (translate(c, vars) for c in expr.children) fn = {">": "gt_set", ">=": "ge_set", "==": "eq_set", "!=": "ne_set", "<": "lt_set", "<=": "le_set"}[expr.operator] return getattr(a, fn)(b) name = expr.name if name not in parameters.domains: lo, hi, constraint = expr.children params = list(v.name for v in traversal([lo, hi]) if isinstance(v, (Argument, Variable))) vars = isl.make_zero_and_vars([name], params) domain = (vars[name].ge_set(translate(lo, vars)) & vars[name].lt_set(translate(hi, vars))) parameters.domains[name] = domain if constraint is not None: parameters.assumptions[name] = translate(constraint, vars) return pym.Variable(name)
def create_domains(indices): """ Create ISL domains from indices :arg indices: iterable of (index_name, extent) pairs :returns: A list of ISL sets representing the iteration domain of the indices.""" domains = [] for idx, extent in indices: inames = isl.make_zero_and_vars([idx]) domains.append(((inames[0].le_set(inames[idx])) & (inames[idx].lt_set(inames[0] + extent)))) if not domains: domains = [isl.BasicSet("[] -> {[]}")] return domains
def _get_scalar_func_loopy_program(self, c_name, nargs, naxes): from pymbolic import var var_names = ["i%d" % i for i in range(naxes)] size_names = ["n%d" % i for i in range(naxes)] subscript = tuple(var(vname) for vname in var_names) from islpy import make_zero_and_vars v = make_zero_and_vars(var_names, params=size_names) domain = v[0].domain() for vname, sname in zip(var_names, size_names): domain = domain & v[0].le_set(v[vname]) & v[vname].lt_set(v[sname]) domain_bset, = domain.get_basic_sets() return make_loopy_program( [domain_bset], [ lp.Assignment( var("out")[subscript], var(c_name)(*[ var("inp%d" % i)[subscript] for i in range(nargs)])) ], name="actx_special_%s" % c_name)
def expression_kernel(expr, args): r"""Produce a :class:`pyop2.Kernel` from the processed UFL expression expr and the corresponding args.""" # Empty slot indicating assignment to indexed LHS, so don't do anything if type(expr) is Zero: return fs = args[0].function.function_space() import islpy as isl inames = isl.make_zero_and_vars(["d"]) domain = (inames[0].le_set(inames["d"])) & (inames["d"].lt_set(inames[0] + fs.dof_dset.cdim)) context = Bag() context.within_inames = frozenset(["d"]) context.indices = (p.Variable("d"),) insn = loopy_instructions(expr, context) data = [arg.arg for arg in args] knl = loopy.make_function([domain], [insn], data, name="expression", silenced_warnings=["summing_if_branches_ops"]) return op2.Kernel(knl, "expression")
def __generate_loopy(self, knl_name: str, verbose: bool = False, **kwargs): """Generate cell kernel for the Laplace operator using Loopy""" n_dof, n_dim = self.n_dof, self.n_dim # Inputs to the kernel arg_names = ["A_T", "A0", "G_T"] # Kernel parameters that will be fixed later param_names = ["n", "m"] # Tuples of inames and extents of their loops loops = [("i", "n"), ("j", "n"), ("k", "m")] # Generate the domains for the loops isl_domains = [] for idx, extent in loops: # Create dict of loop variables (inames) and parameters vs = isl.make_zero_and_vars([idx], [extent]) # Create the loop domain using '<=' and '>' restrictions isl_domains.append(((vs[0].le_set(vs[idx])) & (vs[idx].lt_set(vs[0] + vs[extent])))) if verbose: print("ISL loop domains:") print(isl_domains) print("") # Generate pymbolic variables for all used symbols args = {arg: pb.Variable(arg) for arg in arg_names} params = {param: pb.Variable(param) for param in param_names} inames = {iname: pb.Variable(iname) for iname, extent in loops} # Input arguments for the loopy kernel n, m = params["n"], params["m"] lp_args = { "A_T": lp.GlobalArg("A_T", dtype=np.double, shape=(n, n)), "A0": lp.GlobalArg("A0", dtype=np.double, shape=(n, n, m)), "G_T": lp.GlobalArg("G_T", dtype=np.double, shape=(m)) } # Generate the list of arguments & parameters that will be passed to loopy data = [] data += [arg for arg in lp_args.values()] data += [lp.ValueArg(param) for param in param_names] # Build the kernel instruction: computation and assignment of the element matrix def build_ass(): # A_T[i,j] = sum(k, A0[i,j,k] * G_T[k]); # Get variable symbols for all required variables i, j, k = inames["i"], inames["j"], inames["k"] A_T, A0, G_T = args["A_T"], args["A0"], args["G_T"] # The target of the assignment target = pb.Subscript(A_T, (i, j)) # The rhs expression: Frobenius inner product <A0[i,j],G_T> reduce_op = lp.library.reduction.SumReductionOperation() reduce_expr = pb.Subscript(A0, (i, j, k)) * pb.Subscript(G_T, (k)) expr = lp.Reduction(reduce_op, k, reduce_expr) return lp.Assignment(target, expr) ass = build_ass() if verbose: print("Assignment expression:") print(ass) print("") instructions = [ass] # Construct the kernel knl = lp.make_kernel(isl_domains, instructions, data, name=knl_name, target=lp.CTarget(), lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) knl = lp.fix_parameters(knl, n=n_dof, m=n_dim**2) knl = lp.prioritize_loops(knl, "i,j") if verbose: print("") print(knl) print("") # Generate kernel code knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str( lp.generate_header(knl)[0]) if verbose: print(knl_c) print("") # Postprocess kernel code knl_c = knl_c.replace("__restrict__", "restrict") knl_h = knl_h.replace("__restrict__", "restrict") return knl_c, knl_h
def build_loopy_kernel_A_auto(): knl_name = "kernel_tensor_A" # Inputs to the kernel arg_names = ["A", "B", "c"] # Kernel parameters that will be fixed later param_names = ["n", "m"] # Tuples of inames and extents of their loops loops = [("i", "n"), ("j", "n"), ("k", "m")] # Generate the domains for the loops isl_domains = [] for idx, extent in loops: # Create dict of loop variables (inames) and parameters vs = isl.make_zero_and_vars([idx], [extent]) # Create the loop domain using '<=' and '>' restrictions isl_domains.append( ((vs[0].le_set(vs[idx])) & (vs[idx].lt_set(vs[0] + vs[extent])))) print("ISL loop domains:") print(isl_domains) print("") # Generate pymbolic variables for all used symbols args = {arg: pb.Variable(arg) for arg in arg_names} params = {param: pb.Variable(param) for param in param_names} inames = {iname: pb.Variable(iname) for iname, extent in loops} # Input arguments for the loopy kernel lp_args = { "A": lp.GlobalArg("A", dtype=np.double, shape=(params["n"], params["n"])), "B": lp.GlobalArg("B", dtype=np.double, shape=(params["m"], params["n"])), "c": lp.ValueArg("c", dtype=np.double) } # Generate the list of arguments & parameters that will be passed to loopy data = [] data += [arg for arg in lp_args.values()] data += [lp.ValueArg(param) for param in ["n", "m"]] # Build the kernel instruction: computation and assignment of the element matrix def build_ass(): """ A[i,j] = c*sum(k, B[k,i]*B[k,j]) """ # The target of the assignment target = pb.Subscript(args["A"], (inames["i"], inames["j"])) # The rhs expression: A reduce operation of the matrix columns # Maybe replace with manual increment? reduce_op = lp.library.reduction.SumReductionOperation() reduce_expr = pb.Subscript(args["B"], (inames["k"], inames["i"])) * pb.Subscript( args["B"], (inames["k"], inames["j"])) expr = args["c"] * lp.Reduction(reduce_op, inames["k"], reduce_expr) return lp.Assignment(target, expr) ass = build_ass() print("Assignment expression:") print(ass) print("") instructions = [ass] # Construct the kernel knl = lp.make_kernel(isl_domains, instructions, data, name=knl_name, target=lp.CTarget(), lang_version=lp.MOST_RECENT_LANGUAGE_VERSION) knl = lp.fix_parameters(knl, n=3, m=2) knl = lp.prioritize_loops(knl, "i,j") print(knl) print("") # Generate kernel code knl_c, knl_h = lp.generate_code_v2(knl).device_code(), str( lp.generate_header(knl)[0]) print(knl_c) print("") # Postprocess kernel code replacements = [("__restrict__", "restrict")] knl_c = utils.replace_strings(knl_c, replacements) knl_h = utils.replace_strings(knl_h, replacements) knl_call = "kernel_tensor_A(A, &B[0][0], 1.0/(2.0*Ae));" return knl_name, knl_call, knl_c, knl_h