def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): ecm = self.get_expression_to_code_mapper(codegen_state) from pymbolic.mapper.stringifier import PREC_COMPARISON, PREC_NONE result = [] from cgen import Statement as S, Block if lsize: result.append( S("assert(programCount == %s)" % ecm(lsize[0], PREC_COMPARISON))) if gsize: launch_spec = "[%s]" % ", ".join( ecm(gs_i, PREC_NONE) for gs_i in gsize) else: launch_spec = "" arg_names, arg_decls = self._arg_names_and_decls(codegen_state) result.append(S( "launch%s %s(%s)" % ( launch_spec, name, ", ".join(arg_names) ))) return Block(result)
def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args): ecm = self.get_expression_to_code_mapper(codegen_state) from pymbolic.mapper.stringifier import PREC_NONE result = [] from cgen import Statement as S, Block if lsize: result.append( S("assert(programCount == (%s))" % ecm(lsize[0], PREC_NONE))) arg_names, arg_decls = self._arg_names_and_decls(codegen_state) from cgen.ispc import ISPCLaunch result.append( ISPCLaunch(tuple(ecm(gs_i, PREC_NONE) for gs_i in gsize), "%s(%s)" % (name, ", ".join(arg_names)))) return Block(result)
def generate_assignment_instruction_code(codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper from loopy.expression import VectorizabilityChecker # {{{ vectorization handling if codegen_state.vectorization_info: if insn.atomicity: raise Unvectorizable("atomic operation") vinfo = codegen_state.vectorization_info vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length) lhs_is_vector = vcheck(insn.assignee) rhs_is_vector = vcheck(insn.expression) if not lhs_is_vector and rhs_is_vector: raise Unvectorizable("LHS is scalar, RHS is vector, cannot assign") is_vector = lhs_is_vector del lhs_is_vector del rhs_is_vector # }}} from pymbolic.primitives import Variable, Subscript, Lookup from loopy.symbolic import LinearSubscript lhs = insn.assignee if isinstance(lhs, Lookup): lhs = lhs.aggregate if isinstance(lhs, Variable): assignee_var_name = lhs.name assignee_indices = () elif isinstance(lhs, Subscript): assignee_var_name = lhs.aggregate.name assignee_indices = lhs.index_tuple elif isinstance(lhs, LinearSubscript): assignee_var_name = lhs.aggregate.name assignee_indices = (lhs.index, ) else: raise RuntimeError("invalid lvalue '%s'" % lhs) del lhs result = codegen_state.ast_builder.emit_assignment(codegen_state, insn) # {{{ tracing lhs_dtype = codegen_state.kernel.get_var_descriptor( assignee_var_name).dtype if kernel.options.trace_assignments or kernel.options.trace_assignment_values: if codegen_state.vectorization_info and is_vector: raise Unvectorizable("tracing does not support vectorization") from pymbolic.mapper.stringifier import PREC_NONE lhs_code = codegen_state.expression_to_code_mapper( insn.assignee, PREC_NONE) from cgen import Statement as S # noqa gs, ls = kernel.get_grid_size_upper_bounds() printf_format = "{}.{}[{}][{}]: {}".format( kernel.name, insn.id, ", ".join("gid%d=%%d" % i for i in range(len(gs))), ", ".join("lid%d=%%d" % i for i in range(len(ls))), assignee_var_name) printf_args = (["gid(%d)" % i for i in range(len(gs))] + ["lid(%d)" % i for i in range(len(ls))]) if assignee_indices: printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"]) printf_args.extend( ecm(i, prec=PREC_NONE, type_context="i") for i in assignee_indices) if kernel.options.trace_assignment_values: if lhs_dtype.numpy_dtype.kind == "i": printf_format += " = %d" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "f": printf_format += " = %g" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "c": printf_format += " = %g + %gj" printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code]) if printf_args: printf_args_str = ", " + ", ".join(str(v) for v in printf_args) else: printf_args_str = "" printf_insn = S('printf("{}\\n"{})'.format(printf_format, printf_args_str)) from cgen import Block if kernel.options.trace_assignment_values: result = Block([result, printf_insn]) else: # print first, execute later -> helps find segfaults result = Block([printf_insn, result]) # }}} return result
def generate_assignment_instruction_code(codegen_state, insn): kernel = codegen_state.kernel ecm = codegen_state.expression_to_code_mapper from loopy.expression import dtype_to_type_context, VectorizabilityChecker # {{{ vectorization handling if codegen_state.vectorization_info: if insn.atomicity: raise Unvectorizable("atomic operation") vinfo = codegen_state.vectorization_info vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length) lhs_is_vector = vcheck(insn.assignee) rhs_is_vector = vcheck(insn.expression) if not lhs_is_vector and rhs_is_vector: raise Unvectorizable("LHS is scalar, RHS is vector, cannot assign") is_vector = lhs_is_vector del lhs_is_vector del rhs_is_vector # }}} from pymbolic.primitives import Variable, Subscript from loopy.symbolic import LinearSubscript lhs = insn.assignee if isinstance(lhs, Variable): assignee_var_name = lhs.name assignee_indices = () elif isinstance(lhs, Subscript): assignee_var_name = lhs.aggregate.name assignee_indices = lhs.index_tuple elif isinstance(lhs, LinearSubscript): assignee_var_name = lhs.aggregate.name assignee_indices = (lhs.index, ) else: raise RuntimeError("invalid lvalue '%s'" % lhs) lhs_var = kernel.get_var_descriptor(assignee_var_name) lhs_dtype = lhs_var.dtype if insn.atomicity is not None: lhs_atomicity = [ a for a in insn.atomicity if a.var_name == assignee_var_name ] assert len(lhs_atomicity) <= 1 if lhs_atomicity: lhs_atomicity, = lhs_atomicity else: lhs_atomicity = None else: lhs_atomicity = None from loopy.kernel.data import AtomicInit, AtomicUpdate lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None) rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype) if lhs_atomicity is None: result = codegen_state.ast_builder.emit_assignment( codegen_state, lhs_code, ecm(insn.expression, prec=PREC_NONE, type_context=rhs_type_context, needed_dtype=lhs_dtype)) elif isinstance(lhs_atomicity, AtomicInit): raise NotImplementedError("atomic init") elif isinstance(lhs_atomicity, AtomicUpdate): codegen_state.seen_atomic_dtypes.add(lhs_dtype) result = codegen_state.ast_builder.generate_atomic_update( kernel, codegen_state, lhs_atomicity, lhs_var, insn.assignee, insn.expression, lhs_dtype, rhs_type_context) else: raise ValueError("unexpected lhs atomicity type: %s" % type(lhs_atomicity).__name__) # {{{ tracing if kernel.options.trace_assignments or kernel.options.trace_assignment_values: if codegen_state.vectorization_info and is_vector: raise Unvectorizable("tracing does not support vectorization") from cgen import Statement as S # noqa gs, ls = kernel.get_grid_size_upper_bounds() printf_format = "%s.%s[%s][%s]: %s" % (kernel.name, insn.id, ", ".join( "gid%d=%%d" % i for i in range(len(gs))), ", ".join( "lid%d=%%d" % i for i in range(len(ls))), assignee_var_name) printf_args = (["gid(%d)" % i for i in range(len(gs))] + ["lid(%d)" % i for i in range(len(ls))]) if assignee_indices: printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"]) printf_args.extend( ecm(i, prec=PREC_NONE, type_context="i") for i in assignee_indices) if kernel.options.trace_assignment_values: if lhs_dtype.numpy_dtype.kind == "i": printf_format += " = %d" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "f": printf_format += " = %g" printf_args.append(lhs_code) elif lhs_dtype.numpy_dtype.kind == "c": printf_format += " = %g + %gj" printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code]) if printf_args: printf_args_str = ", " + ", ".join(printf_args) else: printf_args_str = "" printf_insn = S("printf(\"%s\\n\"%s)" % (printf_format, printf_args_str)) from cgen import Block if kernel.options.trace_assignment_values: result = Block([result, printf_insn]) else: # print first, execute later -> helps find segfaults result = Block([printf_insn, result]) # }}} return result