Exemplo n.º 1
0
def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
    ctx = {
            "idx_tp": dtype_to_ctype(idx_dtype),
            "tp": dtype_to_ctype(dtype),
            }

    args = [
            VectorArg(dtype, "dest%d" % i, with_offset=True)
            for i in range(vec_count)
            ] + [
                VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
            ] + [
                VectorArg(dtype, "src%d" % i, with_offset=True)
                for i in range(vec_count)
            ] + [
                VectorArg(np.uint8, "use_fill", with_offset=True)
            ] + [
                VectorArg(np.int64, "val_ary_lengths", with_offset=True)
            ]

    body = (
            "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx
            + "\n".join(
                    "dest{i}[dest_idx] = (use_fill[{i}] ? src{i}[0] : "
                    "src{i}[i % val_ary_lengths[{i}]]);".format(i=i)
                    for i in range(vec_count)
                    )
            )

    return get_elwise_kernel(context, args, body,
            preamble=dtype_to_c_struct(context.devices[0], dtype),
            name="put")
Exemplo n.º 2
0
def get_fill_kernel(context, dtype):
    return get_elwise_kernel(
        context,
        "{tp} *z, {tp} a".format(tp=dtype_to_ctype(dtype), ),
        "z[i] = a",
        preamble=dtype_to_c_struct(context.devices[0], dtype),
        name="fill")
Exemplo n.º 3
0
    def _get_kernel(self, dtype, src_index_dtype, dst_index_dtype,
                    have_src_indices, have_dst_indices, map_values):
        from boxtree.tools import VectorArg

        args = [
                VectorArg(dtype, "input_ary"),
                VectorArg(dtype, "output_ary"),
               ]

        if have_src_indices:
            args.append(VectorArg(src_index_dtype, "from_indices"))

        if have_dst_indices:
            args.append(VectorArg(dst_index_dtype, "to_indices"))

        if map_values:
            args.append(VectorArg(dtype, "value_map"))

        from pyopencl.tools import dtype_to_ctype
        src = GAPPY_COPY_TPL.render(
                dtype=dtype,
                dtype_to_ctype=dtype_to_ctype,
                from_dtype=src_index_dtype,
                to_dtype=dst_index_dtype,
                from_indices=have_src_indices,
                to_indices=have_dst_indices,
                map_values=map_values)

        from pyopencl.elementwise import ElementwiseKernel
        return ElementwiseKernel(self.context,
                args, str(src),
                preamble=dtype_to_c_struct(self.context.devices[0], dtype),
                name="gappy_copy_and_map")
Exemplo n.º 4
0
def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
    ctx = {"idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype)}

    args = (
        [VectorArg(dtype, "dest%d" % i) for i in range(vec_count)]
        + [
            VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
            VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
        ]
        + [VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count)]
        + [ScalarArg(idx_dtype, "offset%d" % i) for i in range(vec_count) if with_offsets]
    )

    if with_offsets:

        def get_copy_insn(i):
            return "dest%d[dest_idx] = " "src%d[src_idx+offset%d];" % (i, i, i)

    else:

        def get_copy_insn(i):
            return "dest%d[dest_idx] = " "src%d[src_idx];" % (i, i)

    body = ("%(idx_tp)s src_idx = gmem_src_idx[i];\n" "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx) + "\n".join(
        get_copy_insn(i) for i in range(vec_count)
    )

    return get_elwise_kernel(
        context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="take_put"
    )
Exemplo n.º 5
0
def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
    ctx = {
        "idx_tp": dtype_to_ctype(idx_dtype),
        "tp": dtype_to_ctype(dtype),
    }

    args = [
        VectorArg(dtype, "dest%d" % i, with_offset=True)
        for i in range(vec_count)
    ] + [
        VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
    ] + [
        VectorArg(dtype, "src%d" % i, with_offset=True)
        for i in range(vec_count)
    ]

    body = ("%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx +
            "\n".join("dest%d[dest_idx] = src%d[i];" % (i, i)
                      for i in range(vec_count)))

    return get_elwise_kernel(context,
                             args,
                             body,
                             preamble=dtype_to_c_struct(
                                 context.devices[0], dtype),
                             name="put")
Exemplo n.º 6
0
    def _get_kernel(self, dtype, src_index_dtype, dst_index_dtype,
                    have_src_indices, have_dst_indices, map_values):
        from pyopencl.tools import VectorArg

        args = [
                VectorArg(dtype, "input_ary", with_offset=True),
                VectorArg(dtype, "output_ary", with_offset=True),
               ]

        if have_src_indices:
            args.append(VectorArg(src_index_dtype, "from_indices", with_offset=True))

        if have_dst_indices:
            args.append(VectorArg(dst_index_dtype, "to_indices", with_offset=True))

        if map_values:
            args.append(VectorArg(dtype, "value_map", with_offset=True))

        from pyopencl.tools import dtype_to_ctype
        src = GAPPY_COPY_TPL.render(
                dtype=dtype,
                dtype_to_ctype=dtype_to_ctype,
                from_dtype=src_index_dtype,
                to_dtype=dst_index_dtype,
                from_indices=have_src_indices,
                to_indices=have_dst_indices,
                map_values=map_values)

        from pyopencl.elementwise import ElementwiseKernel
        return ElementwiseKernel(self.context,
                args, str(src),
                preamble=dtype_to_c_struct(self.context.devices[0], dtype),
                name="gappy_copy_and_map")
Exemplo n.º 7
0
def get_fill_kernel(context, dtype):
    return get_elwise_kernel(context,
            "%(tp)s *z, %(tp)s a" % {
                "tp": dtype_to_ctype(dtype),
                },
            "z[i] = a",
            preamble=dtype_to_c_struct(context.devices[0], dtype),
            name="fill")
Exemplo n.º 8
0
def get_take_kernel(context, dtype, idx_dtype, vec_count=1):
    ctx = {"idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype)}

    args = (
        [VectorArg(dtype, "dest" + str(i), with_offset=True) for i in range(vec_count)]
        + [VectorArg(dtype, "src" + str(i), with_offset=True) for i in range(vec_count)]
        + [VectorArg(idx_dtype, "idx", with_offset=True)]
    )
    body = ("%(idx_tp)s src_idx = idx[i];\n" % ctx) + "\n".join(
        "dest%d[i] = src%d[src_idx];" % (i, i) for i in range(vec_count)
    )

    return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="take")
Exemplo n.º 9
0
def get_put_kernel(context, dtype, idx_dtype, vec_count=1):
    ctx = {"idx_tp": dtype_to_ctype(idx_dtype), "tp": dtype_to_ctype(dtype)}

    args = (
        [VectorArg(dtype, "dest%d" % i, with_offset=True) for i in range(vec_count)]
        + [VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True)]
        + [VectorArg(dtype, "src%d" % i, with_offset=True) for i in range(vec_count)]
    )

    body = "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx + "\n".join(
        "dest%d[dest_idx] = src%d[i];" % (i, i) for i in range(vec_count)
    )

    return get_elwise_kernel(context, args, body, preamble=dtype_to_c_struct(context.devices[0], dtype), name="put")
Exemplo n.º 10
0
def get_copy_kernel(context, dtype_dest, dtype_src):
    src = "src[i]"
    if dtype_dest.kind == "c" != dtype_src.kind:
        src = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_dest), src)

    if dtype_dest.kind == "c" and dtype_src != dtype_dest:
        src = ("%s_cast(%s)" % (complex_dtype_to_name(dtype_dest), src),)

    if dtype_dest != dtype_src and (dtype_dest.kind == "V" or dtype_src.kind == "V"):
        raise TypeError("copying between non-identical struct types")

    return get_elwise_kernel(
        context,
        "%(tp_dest)s *dest, %(tp_src)s *src"
        % {"tp_dest": dtype_to_ctype(dtype_dest), "tp_src": dtype_to_ctype(dtype_src)},
        "dest[i] = %s" % src,
        preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
        name="copy",
    )
Exemplo n.º 11
0
def get_copy_kernel(context, dtype_dest, dtype_src):
    src = "src[i]"
    if dtype_dest.kind == "c" != dtype_src.kind:
        src = "%s_fromreal(%s)" % (complex_dtype_to_name(dtype_dest), src)

    if dtype_dest.kind == "c" and dtype_src != dtype_dest:
        src = "%s_cast(%s)" % (complex_dtype_to_name(dtype_dest), src),

    if dtype_dest != dtype_src and (
            dtype_dest.kind == "V" or dtype_src.kind == "V"):
        raise TypeError("copying between non-identical struct types")

    return get_elwise_kernel(context,
            "%(tp_dest)s *dest, %(tp_src)s *src" % {
                "tp_dest": dtype_to_ctype(dtype_dest),
                "tp_src": dtype_to_ctype(dtype_src),
                },
            "dest[i] = %s" % src,
            preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
            name="copy")
Exemplo n.º 12
0
def get_take_kernel(context, dtype, idx_dtype, vec_count=1):
    ctx = {
            "idx_tp": dtype_to_ctype(idx_dtype),
            "tp": dtype_to_ctype(dtype),
            }

    args = ([VectorArg(dtype, "dest" + str(i), with_offset=True)
             for i in range(vec_count)]
            + [VectorArg(dtype, "src" + str(i), with_offset=True)
               for i in range(vec_count)]
            + [VectorArg(idx_dtype, "idx", with_offset=True)])
    body = (
            ("%(idx_tp)s src_idx = idx[i];\n" % ctx)
            + "\n".join(
                "dest%d[i] = src%d[src_idx];" % (i, i)
                for i in range(vec_count)))

    return get_elwise_kernel(context, args, body,
            preamble=dtype_to_c_struct(context.devices[0], dtype),
            name="take")
Exemplo n.º 13
0
def get_copy_kernel(context, dtype_dest, dtype_src):
    src = "src[i]"
    if dtype_dest.kind == "c" != dtype_src.kind:
        src = "{}_fromreal({})".format(complex_dtype_to_name(dtype_dest), src)

    if dtype_dest.kind == "c" and dtype_src != dtype_dest:
        src = "{}_cast({})".format(complex_dtype_to_name(dtype_dest), src),

    if dtype_dest != dtype_src and (
            dtype_dest.kind == "V" or dtype_src.kind == "V"):
        raise TypeError("copying between non-identical struct types")

    return get_elwise_kernel(context,
            "{tp_dest} *dest, {tp_src} *src".format(
                tp_dest=dtype_to_ctype(dtype_dest),
                tp_src=dtype_to_ctype(dtype_src),
                ),
            "dest[i] = %s" % src,
            preamble=dtype_to_c_struct(context.devices[0], dtype_dest),
            name="copy")
Exemplo n.º 14
0
def get_take_put_kernel(context, dtype, idx_dtype, with_offsets, vec_count=1):
    ctx = {
            "idx_tp": dtype_to_ctype(idx_dtype),
            "tp": dtype_to_ctype(dtype),
            }

    args = [
            VectorArg(dtype, "dest%d" % i)
            for i in range(vec_count)
            ] + [
                VectorArg(idx_dtype, "gmem_dest_idx", with_offset=True),
                VectorArg(idx_dtype, "gmem_src_idx", with_offset=True),
            ] + [
                VectorArg(dtype, "src%d" % i, with_offset=True)
                for i in range(vec_count)
            ] + [
                ScalarArg(idx_dtype, "offset%d" % i)
                for i in range(vec_count) if with_offsets
            ]

    if with_offsets:
        def get_copy_insn(i):
            return ("dest%d[dest_idx] = "
                    "src%d[src_idx+offset%d];"
                    % (i, i, i))
    else:
        def get_copy_insn(i):
            return ("dest%d[dest_idx] = "
                    "src%d[src_idx];" % (i, i))

    body = (("%(idx_tp)s src_idx = gmem_src_idx[i];\n"
                "%(idx_tp)s dest_idx = gmem_dest_idx[i];\n" % ctx)
            + "\n".join(get_copy_insn(i) for i in range(vec_count)))

    return get_elwise_kernel(context, args, body,
            preamble=dtype_to_c_struct(context.devices[0], dtype),
            name="take_put")