Пример #1
0
    def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args):
        ecm = self.get_expression_to_code_mapper(codegen_state)

        from pymbolic.mapper.stringifier import PREC_COMPARISON, PREC_NONE
        result = []
        from cgen import Statement as S, Block
        if lsize:
            result.append(
                    S("assert(programCount == %s)"
                        % ecm(lsize[0], PREC_COMPARISON)))

        if gsize:
            launch_spec = "[%s]" % ", ".join(
                                ecm(gs_i, PREC_NONE)
                                for gs_i in gsize)
        else:
            launch_spec = ""

        arg_names, arg_decls = self._arg_names_and_decls(codegen_state)

        result.append(S(
            "launch%s %s(%s)" % (
                launch_spec,
                name,
                ", ".join(arg_names)
                )))

        return Block(result)
Пример #2
0
    def get_kernel_call(self, codegen_state, name, gsize, lsize, extra_args):
        ecm = self.get_expression_to_code_mapper(codegen_state)

        from pymbolic.mapper.stringifier import PREC_NONE
        result = []
        from cgen import Statement as S, Block
        if lsize:
            result.append(
                S("assert(programCount == (%s))" % ecm(lsize[0], PREC_NONE)))

        arg_names, arg_decls = self._arg_names_and_decls(codegen_state)

        from cgen.ispc import ISPCLaunch
        result.append(
            ISPCLaunch(tuple(ecm(gs_i, PREC_NONE) for gs_i in gsize),
                       "%s(%s)" % (name, ", ".join(arg_names))))

        return Block(result)
Пример #3
0
def generate_assignment_instruction_code(codegen_state, insn):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import VectorizabilityChecker

    # {{{ vectorization handling

    if codegen_state.vectorization_info:
        if insn.atomicity:
            raise Unvectorizable("atomic operation")

        vinfo = codegen_state.vectorization_info
        vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length)
        lhs_is_vector = vcheck(insn.assignee)
        rhs_is_vector = vcheck(insn.expression)

        if not lhs_is_vector and rhs_is_vector:
            raise Unvectorizable("LHS is scalar, RHS is vector, cannot assign")

        is_vector = lhs_is_vector

        del lhs_is_vector
        del rhs_is_vector

    # }}}

    from pymbolic.primitives import Variable, Subscript, Lookup
    from loopy.symbolic import LinearSubscript

    lhs = insn.assignee
    if isinstance(lhs, Lookup):
        lhs = lhs.aggregate

    if isinstance(lhs, Variable):
        assignee_var_name = lhs.name
        assignee_indices = ()

    elif isinstance(lhs, Subscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = lhs.index_tuple

    elif isinstance(lhs, LinearSubscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = (lhs.index, )

    else:
        raise RuntimeError("invalid lvalue '%s'" % lhs)

    del lhs

    result = codegen_state.ast_builder.emit_assignment(codegen_state, insn)

    # {{{ tracing

    lhs_dtype = codegen_state.kernel.get_var_descriptor(
        assignee_var_name).dtype

    if kernel.options.trace_assignments or kernel.options.trace_assignment_values:
        if codegen_state.vectorization_info and is_vector:
            raise Unvectorizable("tracing does not support vectorization")

        from pymbolic.mapper.stringifier import PREC_NONE
        lhs_code = codegen_state.expression_to_code_mapper(
            insn.assignee, PREC_NONE)

        from cgen import Statement as S  # noqa

        gs, ls = kernel.get_grid_size_upper_bounds()

        printf_format = "{}.{}[{}][{}]: {}".format(
            kernel.name, insn.id, ", ".join("gid%d=%%d" % i
                                            for i in range(len(gs))),
            ", ".join("lid%d=%%d" % i for i in range(len(ls))),
            assignee_var_name)

        printf_args = (["gid(%d)" % i for i in range(len(gs))] +
                       ["lid(%d)" % i for i in range(len(ls))])

        if assignee_indices:
            printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"])
            printf_args.extend(
                ecm(i, prec=PREC_NONE, type_context="i")
                for i in assignee_indices)

        if kernel.options.trace_assignment_values:
            if lhs_dtype.numpy_dtype.kind == "i":
                printf_format += " = %d"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "f":
                printf_format += " = %g"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "c":
                printf_format += " = %g + %gj"
                printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code])

        if printf_args:
            printf_args_str = ", " + ", ".join(str(v) for v in printf_args)
        else:
            printf_args_str = ""

        printf_insn = S('printf("{}\\n"{})'.format(printf_format,
                                                   printf_args_str))

        from cgen import Block
        if kernel.options.trace_assignment_values:
            result = Block([result, printf_insn])
        else:
            # print first, execute later -> helps find segfaults
            result = Block([printf_insn, result])

    # }}}

    return result
Пример #4
0
def generate_assignment_instruction_code(codegen_state, insn):
    kernel = codegen_state.kernel

    ecm = codegen_state.expression_to_code_mapper

    from loopy.expression import dtype_to_type_context, VectorizabilityChecker

    # {{{ vectorization handling

    if codegen_state.vectorization_info:
        if insn.atomicity:
            raise Unvectorizable("atomic operation")

        vinfo = codegen_state.vectorization_info
        vcheck = VectorizabilityChecker(kernel, vinfo.iname, vinfo.length)
        lhs_is_vector = vcheck(insn.assignee)
        rhs_is_vector = vcheck(insn.expression)

        if not lhs_is_vector and rhs_is_vector:
            raise Unvectorizable("LHS is scalar, RHS is vector, cannot assign")

        is_vector = lhs_is_vector

        del lhs_is_vector
        del rhs_is_vector

    # }}}

    from pymbolic.primitives import Variable, Subscript
    from loopy.symbolic import LinearSubscript

    lhs = insn.assignee
    if isinstance(lhs, Variable):
        assignee_var_name = lhs.name
        assignee_indices = ()

    elif isinstance(lhs, Subscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = lhs.index_tuple

    elif isinstance(lhs, LinearSubscript):
        assignee_var_name = lhs.aggregate.name
        assignee_indices = (lhs.index, )

    else:
        raise RuntimeError("invalid lvalue '%s'" % lhs)

    lhs_var = kernel.get_var_descriptor(assignee_var_name)
    lhs_dtype = lhs_var.dtype

    if insn.atomicity is not None:
        lhs_atomicity = [
            a for a in insn.atomicity if a.var_name == assignee_var_name
        ]
        assert len(lhs_atomicity) <= 1
        if lhs_atomicity:
            lhs_atomicity, = lhs_atomicity
        else:
            lhs_atomicity = None
    else:
        lhs_atomicity = None

    from loopy.kernel.data import AtomicInit, AtomicUpdate

    lhs_code = ecm(insn.assignee, prec=PREC_NONE, type_context=None)
    rhs_type_context = dtype_to_type_context(kernel.target, lhs_dtype)
    if lhs_atomicity is None:
        result = codegen_state.ast_builder.emit_assignment(
            codegen_state, lhs_code,
            ecm(insn.expression,
                prec=PREC_NONE,
                type_context=rhs_type_context,
                needed_dtype=lhs_dtype))

    elif isinstance(lhs_atomicity, AtomicInit):
        raise NotImplementedError("atomic init")

    elif isinstance(lhs_atomicity, AtomicUpdate):
        codegen_state.seen_atomic_dtypes.add(lhs_dtype)
        result = codegen_state.ast_builder.generate_atomic_update(
            kernel, codegen_state, lhs_atomicity, lhs_var, insn.assignee,
            insn.expression, lhs_dtype, rhs_type_context)

    else:
        raise ValueError("unexpected lhs atomicity type: %s" %
                         type(lhs_atomicity).__name__)

    # {{{ tracing

    if kernel.options.trace_assignments or kernel.options.trace_assignment_values:
        if codegen_state.vectorization_info and is_vector:
            raise Unvectorizable("tracing does not support vectorization")

        from cgen import Statement as S  # noqa

        gs, ls = kernel.get_grid_size_upper_bounds()

        printf_format = "%s.%s[%s][%s]: %s" % (kernel.name, insn.id, ", ".join(
            "gid%d=%%d" % i for i in range(len(gs))), ", ".join(
                "lid%d=%%d" % i for i in range(len(ls))), assignee_var_name)

        printf_args = (["gid(%d)" % i for i in range(len(gs))] +
                       ["lid(%d)" % i for i in range(len(ls))])

        if assignee_indices:
            printf_format += "[%s]" % ",".join(len(assignee_indices) * ["%d"])
            printf_args.extend(
                ecm(i, prec=PREC_NONE, type_context="i")
                for i in assignee_indices)

        if kernel.options.trace_assignment_values:
            if lhs_dtype.numpy_dtype.kind == "i":
                printf_format += " = %d"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "f":
                printf_format += " = %g"
                printf_args.append(lhs_code)
            elif lhs_dtype.numpy_dtype.kind == "c":
                printf_format += " = %g + %gj"
                printf_args.extend(["(%s).x" % lhs_code, "(%s).y" % lhs_code])

        if printf_args:
            printf_args_str = ", " + ", ".join(printf_args)
        else:
            printf_args_str = ""

        printf_insn = S("printf(\"%s\\n\"%s)" %
                        (printf_format, printf_args_str))

        from cgen import Block
        if kernel.options.trace_assignment_values:
            result = Block([result, printf_insn])
        else:
            # print first, execute later -> helps find segfaults
            result = Block([printf_insn, result])

    # }}}

    return result