Пример #1
0
def are_images_supported():
    """Is the INTENSITY|FLOAT image format supported?"""
    fmt = cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.FLOAT)

    return fmt in cl.get_supported_image_formats(cfg.OPENCL.ctx,
                                                 cl.mem_flags.READ_ONLY,
                                                 cl.mem_object_type.IMAGE2D)
Пример #2
0
 def __determine_image_types(context):
     supported_formats = cl.get_supported_image_formats(context,cl.mem_flags.READ_WRITE,cl.mem_object_type.IMAGE2D)
     picked_uint8_format = None
     picked_int16_format = None
     picked_uint32_format = None
     for image_format in supported_formats:
         try:
             if(image_format.channel_data_type == cl.channel_type.UNSIGNED_INT8):
                 if(image_format.channel_order == cl.channel_order.RGB):
                     picked_uint8_format = image_format #ideal
                 elif(picked_uint8_format is None and image_format.channel_order == cl.channel_order.RGBA):
                     picked_uint8_format = image_format #non-ideal, but acceptable
             elif(image_format.channel_data_type == cl.channel_type.SIGNED_INT16):
                 if(image_format.channel_order == cl.channel_order.RGB):
                     picked_int16_format = image_format #ideal
                 elif(picked_int16_format is None and image_format.channel_order == cl.channel_order.RGBA):
                     picked_int16_format = image_format #non-ideal, but acceptable
             elif(image_format.channel_data_type == cl.channel_type.UNSIGNED_INT32):
                 if(image_format.channel_order == cl.channel_order.RGB):
                     picked_uint32_format = image_format #ideal
                 elif(picked_uint32_format is None and image_format.channel_order == cl.channel_order.RGBA):
                     picked_uint32_format = image_format #non-ideal, but acceptable
         except cl.LogicError:
             continue
     return picked_uint8_format, picked_int16_format, picked_uint32_format
Пример #3
0
    def init_cl(self,
                id_platform=-1,
                id_device=-1,
                use_gpu=True,
                print_info=False,
                context_properties=None):

        platforms = pyopencl.get_platforms()
        if len(platforms) == 0:
            raise Exception("Failed to find any OpenCL platforms.")

        device_types = [pyopencl.device_type.GPU, pyopencl.device_type.CPU]

        # get all platforms and devices
        all_platforms_devs = dict([((_ip, _id, t), d)
                                   for _ip, p in enumerate(platforms)
                                   for _it, t in enumerate(device_types)
                                   for _id, d in enumerate(p.get_devices(t))])

        if len(all_platforms_devs) == 0:
            raise Exception("Failed to find any OpenCL platform or device.")

        device_type = pyopencl.device_type.GPU if use_gpu else pyopencl.device_type.CPU

        device = None

        # try to get the prefered platform...
        # otherwise choose the best one
        try:
            device = all_platforms_devs[(id_platform, id_device, device_type)]
        except KeyError:
            logger.warning(
                "prefered platform/device (%s/%s) not available (device type = %s) \n"
                "...choosing the best from the rest" %
                (id_platform, id_device, device_type))
            # get the best available device
            device, _ = max(
                [(d, t) for (_ip, _id, t), d in all_platforms_devs.items()],
                key=OCLDevice.device_priority)

        if device is None:
            raise Exception("Failed to find a valid device")

        self.context = pyopencl.Context(devices=[device],
                                        properties=context_properties)

        self.device = device

        self.queue = pyopencl.CommandQueue(
            self.context,
            properties=pyopencl.command_queue_properties.PROFILING_ENABLE)

        self.imageformats = pyopencl.get_supported_image_formats(
            self.context, pyopencl.mem_flags.READ_WRITE,
            pyopencl.mem_object_type.IMAGE3D)

        logger.info("intialized, device: {}".format(self.device))
        if print_info:
            self.print_info()
Пример #4
0
def test_magma_fermi_matrix_mul(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = get_suitable_size(ctx)

    if (not ctx.devices[0].image_support
            or ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
    if image_format not in cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D):
        pytest.skip("image format not supported")

    knl = lp.make_kernel(
        "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [
            lp.ImageArg("a", dtype, shape=(n, n)),
            lp.ImageArg("b", dtype, shape=(n, n)),
            lp.GlobalArg("c", dtype, shape=(n, n), order=order),
        ],
        name="matmul")

    seq_knl = knl

    i_reg = 4
    j_reg = 4
    i_chunks = 16
    j_chunks = 16

    knl = lp.split_iname(knl, "i", i_reg * i_chunks, outer_tag="g.0")
    knl = lp.split_iname(knl,
                         "i_inner",
                         i_reg,
                         outer_tag="l.0",
                         inner_tag="ilp")
    knl = lp.split_iname(knl, "j", j_reg * j_chunks, outer_tag="g.1")
    knl = lp.split_iname(knl,
                         "j_inner",
                         j_reg,
                         outer_tag="l.1",
                         inner_tag="ilp")
    knl = lp.split_iname(knl, "k", 16)
    knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr")
    # FIXME
    #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"],
    #           default_tag="l.auto")
    #knl = lp.add_prefetch(knl, 'b',
    #    ["k_inner", ("j_inner_inner", "j_inner_outer"),], default_tag="l.auto")

    lp.auto_test_vs_ref(seq_knl,
                        ctx,
                        knl,
                        op_count=[2 * n**3 / 1e9],
                        op_label=["GFlops"],
                        parameters={},
                        blacklist_ref_vendors="pocl")
Пример #5
0
    def test_nonempty_supported_image_formats(self, device, ctx_getter):
        context = ctx_getter()

        if device.image_support:
            assert len(cl.get_supported_image_formats(
                    context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0
        else:
            from py.test import skip
            skip("images not supported on %s" % device.name)
Пример #6
0
    def init_cl(self,
                id_platform=-1,
                id_device=-1,
                use_gpu=True,
                print_info=False,
                context_properties=None):

        platforms = pyopencl.get_platforms()
        if len(platforms)==0:
            raise Exception("Failed to find any OpenCL platforms.")

        device_types = [pyopencl.device_type.GPU, pyopencl.device_type.CPU]

        # get all platforms and devices
        all_platforms_devs = dict([((_ip, _id, t), d)
                                   for _ip, p in enumerate(platforms)
                                   for _it, t in enumerate(device_types)
                                   for _id, d in enumerate(p.get_devices(t))])

        if len(all_platforms_devs)==0:
            raise Exception("Failed to find any OpenCL platform or device.")

        device_type = pyopencl.device_type.GPU if use_gpu else pyopencl.device_type.CPU

        device = None

        # try to get the prefered platform...
        # otherwise choose the best one
        try:
            device = all_platforms_devs[(id_platform, id_device, device_type)]
        except KeyError:
            logger.warning("prefered platform/device (%s/%s) not available (device type = %s) \n"
                           "...choosing the best from the rest"%
                           (id_platform, id_device, device_type))
            # get the best available device
            device, _ = max([(d, t) for (_ip, _id, t), d in all_platforms_devs.iteritems()],
                            key=OCLDevice.device_priority)

        if device is None:
            raise Exception("Failed to find a valid device")

        self.context = pyopencl.Context(devices=[device],
                                        properties=context_properties)

        self.device = device

        self.queue = pyopencl.CommandQueue(self.context,
                                           properties=pyopencl.command_queue_properties.PROFILING_ENABLE)

        self.imageformats = pyopencl.get_supported_image_formats(self.context,
                                                                 pyopencl.mem_flags.READ_WRITE,
                                                                 pyopencl.mem_object_type.IMAGE3D)

        print(self.device)
        if print_info:
            self.print_info()
Пример #7
0
    def test_nonempty_supported_image_formats(self, device, ctx_getter):
        context = ctx_getter()

        if device.image_support:
            assert len(
                cl.get_supported_image_formats(context, cl.mem_flags.READ_ONLY,
                                               cl.mem_object_type.IMAGE2D)) > 0
        else:
            from py.test import skip
            skip("images not supported on %s" % device.name)
Пример #8
0
def test_nonempty_supported_image_formats(ctx_factory):
    context = ctx_factory()

    device = context.devices[0]

    if device.image_support:
        assert len(cl.get_supported_image_formats(
                context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0
    else:
        from pytest import skip
        skip("images not supported on %s" % device.name)
Пример #9
0
def _debug_context(ctx):

    print(
        cl.get_supported_image_formats(ctx, cl.mem_flags.READ_WRITE,
                                       cl.mem_object_type.IMAGE3D))

    for device in ctx.devices:
        print("DEVICE: ", device)
        for attr in dir(device):
            if attr.startswith("image"):
                print(f" {attr}", getattr(device, attr))
Пример #10
0
def test_nonempty_supported_image_formats(ctx_factory):
    context = ctx_factory()

    device = context.devices[0]

    if device.image_support:
        assert len(cl.get_supported_image_formats(
                context, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)) > 0
    else:
        from pytest import skip
        skip("images not supported on %s" % device.name)
Пример #11
0
def no_test_image_matrix_mul_ilp(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    if (not ctx.devices[0].image_support
            or ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
    if image_format not in cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D):
        pytest.skip("image format not supported")

    n = get_suitable_size(ctx)

    knl = lp.make_kernel(
        "{[i,j,k]: 0<=i,j,k<%d}" % n, ["c[i, j] = sum(k, a[i, k]*b[k, j])"], [
            lp.ImageArg("a", dtype, shape=(n, n)),
            lp.ImageArg("b", dtype, shape=(n, n)),
            lp.GlobalArg("c", dtype, shape=(n, n), order=order),
        ],
        name="matmul")

    seq_knl = knl

    ilp = 4
    knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
    j_inner_split = 4
    knl = lp.split_iname(knl, "j", ilp * j_inner_split, outer_tag="g.1")
    knl = lp.split_iname(knl,
                         "j_inner",
                         j_inner_split,
                         outer_tag="ilp",
                         inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 2)
    # conflict-free?
    knl = lp.add_prefetch(knl,
                          'a', ["i_inner", "k_inner"],
                          default_tag="l.auto")
    knl = lp.add_prefetch(knl,
                          'b', ["j_inner_outer", "j_inner_inner", "k_inner"],
                          default_tag="l.auto")

    lp.auto_test_vs_ref(seq_knl,
                        ctx,
                        knl,
                        op_count=[2 * n**3 / 1e9],
                        op_label=["GFlops"],
                        parameters={})
Пример #12
0
def test_magma_fermi_matrix_mul(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    n = get_suitable_size(ctx)

    if (not ctx.devices[0].image_support
            or ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
    if image_format not in cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D):
        pytest.skip("image format not supported")

    knl = lp.make_kernel(
            "{[i,j,k]: 0<=i,j,k<%d}" % n,
            [
                "c[i, j] = sum(k, a[i, k]*b[k, j])"
                ],
            [
                lp.ImageArg("a", dtype, shape=(n, n)),
                lp.ImageArg("b", dtype, shape=(n, n)),
                lp.GlobalArg("c", dtype, shape=(n, n), order=order),
                ],
            name="matmul")

    seq_knl = knl

    i_reg = 4
    j_reg = 4
    i_chunks = 16
    j_chunks = 16

    knl = lp.split_iname(knl, "i", i_reg*i_chunks, outer_tag="g.0")
    knl = lp.split_iname(knl, "i_inner", i_reg, outer_tag="l.0", inner_tag="ilp")
    knl = lp.split_iname(knl, "j", j_reg*j_chunks, outer_tag="g.1")
    knl = lp.split_iname(knl, "j_inner", j_reg, outer_tag="l.1", inner_tag="ilp")
    knl = lp.split_iname(knl, "k", 16)
    knl = lp.split_iname(knl, "k_inner", 8, outer_tag="unr")
    # FIXME
    #knl = lp.add_prefetch(knl, 'a', ["k_inner", "i_inner_inner", "i_inner_outer"],
    #           default_tag="l.auto")
    #knl = lp.add_prefetch(knl, 'b',
    #    ["k_inner", ("j_inner_inner", "j_inner_outer"),], default_tag="l.auto")

    lp.auto_test_vs_ref(seq_knl, ctx, knl,
            op_count=[2*n**3/1e9], op_label=["GFlops"],
            parameters={}, blacklist_ref_vendors="pocl")
Пример #13
0
def test_int_ptr(ctx_factory):
    def do_test(obj):
        new_obj = type(obj).from_int_ptr(obj.int_ptr)
        assert obj == new_obj
        assert type(obj) is type(new_obj)

    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform
    do_test(device)
    do_test(platform)
    do_test(ctx)

    queue = cl.CommandQueue(ctx)
    do_test(queue)

    evt = cl.enqueue_marker(queue)
    do_test(evt)

    prg = cl.Program(
        ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()

    do_test(prg)
    do_test(prg.sum)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n * 4)
    do_test(a_buf)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
    ]:
        smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP,
                         cl.filter_mode.NEAREST)
        do_test(smp)

        img_format = cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        do_test(img)
Пример #14
0
def get_image(data, access=cl.mem_flags.READ_ONLY, queue=None):
    """Get pyopencl.Image from *data* which can be a numpy array, a pyopencl.array.Array or a
    pyopencl.Image. The image channel order is pyopencl.channel_order.INTENSITY and channel_type is
    pyopencl.channel_type.FLOAT. *access* is either pyopencl.mem_flags.READ_ONLY or
    pyopencl.mem_flags.WRITE_ONLY. *queue* is an OpenCL command queue.
    """
    if not queue:
        queue = cfg.OPENCL.queue

    fmt = cl.ImageFormat(cl.channel_order.INTENSITY, cl.channel_type.FLOAT)
    mf = cl.mem_flags

    if fmt not in cl.get_supported_image_formats(queue.context, access,
                                                 cl.mem_object_type.IMAGE2D):
        raise RuntimeError(
            "INTENSITY|FLOAT image format not supported by this platform")

    if isinstance(data, cl.Image):
        result = data
    else:
        if isinstance(data, cl_array.Array) or isinstance(data, np.ndarray):
            if data.dtype.kind == "c":
                raise TypeError("Complex values are not supported")
            else:
                data = data.astype(np.float32)
        else:
            raise TypeError("Unsupported data type {}".format(type(data)))

        if isinstance(data, cl_array.Array):
            result = cl.Image(cfg.OPENCL.ctx,
                              access,
                              fmt,
                              shape=data.shape[::-1])
            cl.enqueue_copy(queue,
                            result,
                            data.data,
                            offset=0,
                            origin=(0, 0),
                            region=result.shape)
        elif isinstance(data, np.ndarray):
            result = cl.Image(cfg.OPENCL.ctx,
                              access | mf.COPY_HOST_PTR,
                              fmt,
                              shape=data.shape[::-1],
                              hostbuf=data)

    return result
Пример #15
0
def test_int_ptr(ctx_factory):
    def do_test(obj):
        new_obj = type(obj).from_int_ptr(obj.int_ptr)
        assert obj == new_obj
        assert type(obj) is type(new_obj)

    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform
    do_test(device)
    do_test(platform)
    do_test(ctx)

    queue = cl.CommandQueue(ctx)
    do_test(queue)

    evt = cl.enqueue_marker(queue)
    do_test(evt)

    prg = cl.Program(ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()

    do_test(prg)
    do_test(prg.sum)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n*4)
    do_test(a_buf)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
            ]:
        smp = cl.Sampler(ctx, False,
                cl.addressing_mode.CLAMP,
                cl.filter_mode.NEAREST)
        do_test(smp)

        img_format = cl.get_supported_image_formats(
                ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        do_test(img)
Пример #16
0
def _get_image_format(ctx, num_channels, dtype, ndim, mode="rw"):
    """Maximize chance of finding a supported image format."""
    if mode == "rw":
        mode_flag = cl.mem_flags.READ_WRITE
    elif mode == "r":
        mode_flag = cl.mem_flags.READ_ONLY
    elif mode == "w":
        mode_flag = cl.mem_flags.WRITE_ONLY
    else:
        raise ValueError("invalid value '%s' for 'mode'" % mode)

    if ndim == 3:
        _dim = cl.mem_object_type.IMAGE3D
    elif ndim == 2:
        _dim = cl.mem_object_type.IMAGE2D
    elif ndim == 1:
        _dim = cl.mem_object_type.IMAGE1D
    else:
        raise ValueError(f"Unsupported number of image dimensions: {ndim}")

    supported_formats = cl.get_supported_image_formats(ctx, mode_flag, _dim)
    channel_type = cl.DTYPE_TO_CHANNEL_TYPE[dtype]

    if num_channels == 1:
        for order in [
                cl.channel_order.INTENSITY,
                cl.channel_order.R,
                cl.channel_order.Rx,
        ]:
            fmt = cl.ImageFormat(order, channel_type)
            if fmt in supported_formats:
                return fmt, 0
        fmt = cl.ImageFormat(cl.channel_order.RGBA, channel_type)
        if fmt in supported_formats:
            return fmt, 1
        raise ValueError(
            f"No supported ImageFormat found for dtype {dtype} with 1 channel\n",
            f"Supported formats include: {supported_formats}",
        )
    img_format = {
        2: cl.channel_order.RG,
        3: cl.channel_order.RGB,
        4: cl.channel_order.RGBA,
    }[num_channels]

    return cl.ImageFormat(img_format, channel_type), 0
Пример #17
0
def no_test_image_matrix_mul_ilp(ctx_factory):
    dtype = np.float32
    ctx = ctx_factory()
    order = "C"

    if (not ctx.devices[0].image_support
            or ctx.devices[0].platform.name == "Portable Computing Language"):
        pytest.skip("crashes on pocl")

    image_format = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT)
    if image_format not in cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D):
        pytest.skip("image format not supported")

    n = get_suitable_size(ctx)

    knl = lp.make_kernel(
            "{[i,j,k]: 0<=i,j,k<%d}" % n,
            [
                "c[i, j] = sum(k, a[i, k]*b[k, j])"
                ],
            [
                lp.ImageArg("a", dtype, shape=(n, n)),
                lp.ImageArg("b", dtype, shape=(n, n)),
                lp.GlobalArg("c", dtype, shape=(n, n), order=order),
                ],
            name="matmul")

    seq_knl = knl

    ilp = 4
    knl = lp.split_iname(knl, "i", 2, outer_tag="g.0", inner_tag="l.1")
    j_inner_split = 4
    knl = lp.split_iname(knl, "j", ilp*j_inner_split, outer_tag="g.1")
    knl = lp.split_iname(knl, "j_inner", j_inner_split,
            outer_tag="ilp", inner_tag="l.0")
    knl = lp.split_iname(knl, "k", 2)
    # conflict-free?
    knl = lp.add_prefetch(knl, 'a', ["i_inner", "k_inner"], default_tag="l.auto")
    knl = lp.add_prefetch(knl, 'b', ["j_inner_outer", "j_inner_inner", "k_inner"],
            default_tag="l.auto")

    lp.auto_test_vs_ref(seq_knl, ctx, knl,
            op_count=[2*n**3/1e9], op_label=["GFlops"],
            parameters={})
Пример #18
0
    def init_cl(self,
                useDevice=0,
                useGPU=True,
                printInfo=False,
                context_properties=None):
        platforms = pyopencl.get_platforms()
        if len(platforms) == 0:
            raise Exception("Failed to find any OpenCL platforms.")
            return None

        devices = []
        if useGPU:
            devices = platforms[0].get_devices(pyopencl.device_type.GPU)
            if len(devices) == 0:
                logger.warning("Could not find GPU device...")
        else:
            devices = platforms[0].get_devices(pyopencl.device_type.CPU)
            if len(devices) == 0:
                logger.warning("Could neither find GPU nor CPU device....")

        if len(devices) == 0:
            logger.warning("couldnt find any devices...")
            return None
        else:
            logger.info("using device: %s" % devices[useDevice].name)

        # Create a context using the nth device
        self.context = pyopencl.Context(devices=[devices[useDevice]],
                                        properties=context_properties)

        self.device = devices[useDevice]

        self.queue = pyopencl.CommandQueue(
            self.context,
            properties=pyopencl.command_queue_properties.PROFILING_ENABLE)

        self.imageformats = pyopencl.get_supported_image_formats(
            self.context, pyopencl.mem_flags.READ_WRITE,
            pyopencl.mem_object_type.IMAGE3D)

        if printInfo:
            self.printInfo()
Пример #19
0
    def init_cl(self,useDevice = 0, useGPU = True, printInfo = False, context_properties= None):
        platforms = pyopencl.get_platforms()
        if len(platforms) == 0:
            raise Exception("Failed to find any OpenCL platforms.")
            return None

        devices = []
        if useGPU:
            devices = platforms[0].get_devices(pyopencl.device_type.GPU)
            if len(devices) == 0:
                logger.warning("Could not find GPU device...")
        else:
            devices = platforms[0].get_devices(pyopencl.device_type.CPU)
            if len(devices) == 0:
                logger.warning("Could neither find GPU nor CPU device....")

        if len(devices) ==0:
            logger.warning("couldnt find any devices...")
            return None
        else:
            logger.info("using device: %s"%devices[useDevice].name)

        # Create a context using the nth device
        self.context = pyopencl.Context(devices = [devices[useDevice]],properties = context_properties)

        self.device =  devices[useDevice]

        self.queue = pyopencl.CommandQueue(self.context,properties = pyopencl.command_queue_properties.PROFILING_ENABLE)

        self.imageformats = pyopencl.get_supported_image_formats(self.context,
                    pyopencl.mem_flags.READ_WRITE,
                    pyopencl.mem_object_type.IMAGE3D)


        if printInfo:
            self.printInfo()
Пример #20
0
def _get_image_format(ctx: cl.Context,
                      num_channels: int,
                      dtype: np.dtype,
                      ndim: int,
                      mode: str = "rw") -> Tuple[cl.ImageFormat, bool]:
    """Maximize chance of finding a supported image format for the current device.

    Parameters
    ----------
    ctx : cl.Context
        The Context object creating the image
    num_channels : int
        Number of channels in the image
    dtype : np.dtype
        Image type
    ndim : int (must be 1, 2, or 3)
        Number of dimensions in the array.
    mode : {'rw', 'r', 'w'}, optional
        The memory mode, by default "rw"

    Returns
    -------
    tuple
        A tuple of (format, bool) with an cl.ImageFormat suitable for this image,
        and a "reshape" flag indicating that Device support forced reshaping of
        single channel array to RGBA.  (The actual reshaping is handled in
        _image_from_array)

    Raises
    ------
    ValueError
        If mode is not one of {'rw', 'r', 'w'}
        If the number of dimensions is not 1, 2, or 3
        If the dtype is not supported
        If num_channels > 4
    """
    if mode == "rw":
        mode_flag = cl.mem_flags.READ_WRITE
    elif mode == "r":
        mode_flag = cl.mem_flags.READ_ONLY
    elif mode == "w":
        mode_flag = cl.mem_flags.WRITE_ONLY
    else:
        raise ValueError(f"invalid value {mode!r} for 'mode'")

    if ndim == 3:
        _dim = cl.mem_object_type.IMAGE3D
    elif ndim == 2:
        _dim = cl.mem_object_type.IMAGE2D
    elif ndim == 1:
        _dim = cl.mem_object_type.IMAGE1D
    else:
        raise ValueError(f"Unsupported number of image dimensions: {ndim}")

    supported_formats = cl.get_supported_image_formats(ctx, mode_flag, _dim)
    try:
        channel_type = cl.DTYPE_TO_CHANNEL_TYPE[dtype]
    except KeyError:
        raise ValueError(f"Unsupported dtype for image: {dtype}")

    if num_channels == 1:
        for order in [
                cl.channel_order.INTENSITY,
                cl.channel_order.R,
                cl.channel_order.Rx,
        ]:
            fmt = cl.ImageFormat(order, channel_type)
            if fmt in supported_formats:
                return fmt, False

        fmt = cl.ImageFormat(cl.channel_order.RGBA, channel_type)
        if fmt in supported_formats:
            return fmt, True
        raise ValueError(
            f"No supported ImageFormat found for dtype {dtype} with 1 channel\n",
            f"Supported formats include: {supported_formats!r}",
        )
    img_format = {
        2: cl.channel_order.RG,
        3: cl.channel_order.RGB,
        4: cl.channel_order.RGBA,
    }
    if num_channels not in img_format:
        raise ValueError(f"Cannot handle image with {num_channels} channels.")

    return cl.ImageFormat(img_format[num_channels], channel_type), False
Пример #21
0
            print(75 * "-")
        print(device)
        if not options.short:
            print(75 * "-")
            print_info(device, cl.device_info)
            ctx = cl.Context([device])
            for mf in [
                    cl.mem_flags.READ_ONLY,
                    #cl.mem_flags.READ_WRITE,
                    #cl.mem_flags.WRITE_ONLY
            ]:
                for itype in [
                        cl.mem_object_type.IMAGE2D, cl.mem_object_type.IMAGE3D
                ]:
                    try:
                        formats = cl.get_supported_image_formats(
                            ctx, mf, itype)
                    except:
                        formats = "<error>"
                    else:

                        def str_chd_type(chdtype):
                            result = cl.channel_type.to_string(
                                chdtype, "<unknown channel data type %d>")

                            result = result.replace("_INT", "")
                            result = result.replace("UNSIGNED", "U")
                            result = result.replace("SIGNED", "S")
                            result = result.replace("NORM", "N")
                            result = result.replace("FLOAT", "F")
                            return result
Пример #22
0
    def test_get_info(self, platform, device):
        failure_count = [0]

        CRASH_QUIRKS = [
            (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [
                (cl.Event, cl.event_info.COMMAND_QUEUE),
            ]),
        ]
        QUIRKS = []

        plat_quirk_key = (platform.vendor, platform.name, platform.version)

        def find_quirk(quirk_list, cl_obj, info):
            for entry_plat_key, quirks in quirk_list:
                if entry_plat_key == plat_quirk_key:
                    for quirk_cls, quirk_info in quirks:
                        if (isinstance(cl_obj, quirk_cls)
                                and quirk_info == info):
                            return True

            return False

        def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
            if func is None:

                def func(info):
                    cl_obj.get_info(info)

            for info_name in dir(info_cls):
                if not info_name.startswith("_") and info_name != "to_string":
                    info = getattr(info_cls, info_name)

                    if find_quirk(CRASH_QUIRKS, cl_obj, info):
                        print "not executing get_info", type(cl_obj), info_name
                        print "(known crash quirk for %s)" % platform.name
                        continue

                    try:
                        func(info)
                    except:
                        msg = "failed get_info", type(cl_obj), info_name

                        if find_quirk(QUIRKS, cl_obj, info):
                            msg += ("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

                    if try_attr_form:
                        try:
                            getattr(cl_obj, info_name.lower())
                        except:
                            print "failed attr-based get_info", type(
                                cl_obj), info_name

                            if find_quirk(QUIRKS, cl_obj, info):
                                print "(known quirk for %s)" % platform.name
                            else:
                                failure_count[0] += 1

        do_test(platform, cl.platform_info)

        do_test(device, cl.device_info)

        ctx = cl.Context([device])
        do_test(ctx, cl.context_info)

        props = 0
        if (device.queue_properties
                & cl.command_queue_properties.PROFILING_ENABLE):
            profiling = True
            props = cl.command_queue_properties.PROFILING_ENABLE
        queue = cl.CommandQueue(ctx, properties=props)
        do_test(queue, cl.command_queue_info)

        prg = cl.Program(
            ctx, """
            __kernel void sum(__global float *a)
            { a[get_global_id(0)] *= 2; }
            """).build()
        do_test(prg, cl.program_info)
        do_test(prg,
                cl.program_build_info,
                lambda info: prg.get_build_info(device, info),
                try_attr_form=False)

        cl.unload_compiler()  # just for the heck of it

        mf = cl.mem_flags
        n = 2000
        a_buf = cl.Buffer(ctx, 0, n * 4)

        do_test(a_buf, cl.mem_info)

        kernel = prg.sum
        do_test(kernel, cl.kernel_info)

        evt = kernel(queue, (n, ), None, a_buf)
        do_test(evt, cl.event_info)

        if profiling:
            evt.wait()
            do_test(evt,
                    cl.profiling_info,
                    lambda info: evt.get_profiling_info(info),
                    try_attr_form=False)

        if device.image_support:
            smp = cl.Sampler(ctx, True, cl.addressing_mode.CLAMP,
                             cl.filter_mode.NEAREST)
            do_test(smp, cl.sampler_info)

            img_format = cl.get_supported_image_formats(
                ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

            img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
            assert img.shape == (128, 256)

            img.depth
            img.image.depth
            do_test(img, cl.image_info, lambda info: img.get_image_info(info))

        if failure_count[0]:
            raise RuntimeError(
                "get_info testing had %d errors "
                "(If you compiled against OpenCL 1.1 but are testing a 1.0 "
                "implementation, you can safely ignore this.)" %
                failure_count[0])
Пример #23
0
    def test_get_info(self, platform, device):
        failure_count = [0]

        CRASH_QUIRKS = [
                (("NVIDIA Corporation", "NVIDIA CUDA",
                    "OpenCL 1.0 CUDA 3.0.1"),
                    [
                    (cl.Event, cl.event_info.COMMAND_QUEUE),
                    ]),
                ]
        QUIRKS = []

        plat_quirk_key = (
                platform.vendor,
                platform.name,
                platform.version)

        def find_quirk(quirk_list, cl_obj, info):
            for entry_plat_key, quirks in quirk_list:
                if entry_plat_key == plat_quirk_key:
                    for quirk_cls, quirk_info in quirks:
                        if (isinstance(cl_obj, quirk_cls)
                                and quirk_info == info):
                            return True

            return False

        def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
            if func is None:
                def func(info):
                    cl_obj.get_info(info)

            for info_name in dir(info_cls):
                if not info_name.startswith("_") and info_name != "to_string":
                    info = getattr(info_cls, info_name)

                    if find_quirk(CRASH_QUIRKS, cl_obj, info):
                        print("not executing get_info", type(cl_obj), info_name)
                        print("(known crash quirk for %s)" % platform.name)
                        continue

                    try:
                        func(info)
                    except:
                        msg = "failed get_info", type(cl_obj), info_name

                        if find_quirk(QUIRKS, cl_obj, info):
                            msg += ("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

                    if try_attr_form:
                        try:
                            getattr(cl_obj, info_name.lower())
                        except:
                            print("failed attr-based get_info", type(cl_obj), info_name)

                            if find_quirk(QUIRKS, cl_obj, info):
                                print("(known quirk for %s)" % platform.name)
                            else:
                                failure_count[0] += 1

        do_test(platform, cl.platform_info)

        do_test(device, cl.device_info)

        ctx = cl.Context([device])
        do_test(ctx, cl.context_info)

        props = 0
        if (device.queue_properties
                & cl.command_queue_properties.PROFILING_ENABLE):
            profiling = True
            props = cl.command_queue_properties.PROFILING_ENABLE
        queue = cl.CommandQueue(ctx,
                properties=props)
        do_test(queue, cl.command_queue_info)

        prg = cl.Program(ctx, """
            __kernel void sum(__global float *a)
            { a[get_global_id(0)] *= 2; }
            """).build()
        do_test(prg, cl.program_info)
        do_test(prg, cl.program_build_info,
                lambda info: prg.get_build_info(device, info),
                try_attr_form=False)

        cl.unload_compiler() # just for the heck of it

        mf = cl.mem_flags
        n = 2000
        a_buf = cl.Buffer(ctx, 0, n*4)

        do_test(a_buf, cl.mem_info)

        kernel = prg.sum
        do_test(kernel, cl.kernel_info)

        evt = kernel(queue, (n,), None, a_buf)
        do_test(evt, cl.event_info)

        if profiling:
            evt.wait()
            do_test(evt, cl.profiling_info,
                    lambda info: evt.get_profiling_info(info),
                    try_attr_form=False)

        if device.image_support:
            smp = cl.Sampler(ctx, True,
                    cl.addressing_mode.CLAMP,
                    cl.filter_mode.NEAREST)
            do_test(smp, cl.sampler_info)

            img_format = cl.get_supported_image_formats(
                    ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

            img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
            assert img.shape == (128, 256)

            img.depth
            img.image.depth
            do_test(img, cl.image_info,
                    lambda info: img.get_image_info(info))
Пример #24
0
def gen_rgb_to_yuv():
    global context
    from xpra.codecs.csc_opencl.opencl_kernels import gen_rgb_to_yuv_kernels, rgb_mode_to_indexes, indexes_to_rgb_mode
    #for RGB to YUV support we need to be able to handle the channel_order,
    #with READ_ONLY and both with COPY_HOST_PTR and USE_HOST_PTR since we
    #do not know in advance which one we can use..
    RGB_to_YUV_KERNELS = {}
    sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY,  pyopencl.mem_object_type.IMAGE2D)
    sif_copy = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR,  pyopencl.mem_object_type.IMAGE2D)
    log("get_supported_image_formats(READ_ONLY | COPY_HOST_PTR, IMAGE2D)=%s", sif)
    sif_use = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR,  pyopencl.mem_object_type.IMAGE2D)
    log("get_supported_image_formats(READ_ONLY | USE_HOST_PTR, IMAGE2D)=%s", sif)
    if not has_image_format(sif_copy, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8) or \
       not has_image_format(sif_use, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8):
        log.error("cannot convert to YUV without support for READ_ONLY R channel with both COPY_HOST_PTR and USE_HOST_PTR")
        return  {}
    missing = []
    found_rgb = set()
    def add_rgb_to_yuv(src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, channel_order):
        log("add_rgb_to_yuv%s", (src_rgb_mode, kernel_rgb_mode, upload_rgb_mode, CHANNEL_ORDER_TO_STR.get(channel_order)))
        kernels = gen_rgb_to_yuv_kernels(kernel_rgb_mode)
        #log("kernels(%s)=%s", rgb_mode, kernels)
        for key, k_def in kernels.items():
            ksrc, dst = key
            assert ksrc==kernel_rgb_mode
            kname, ksrc = k_def
            RGB_to_YUV_KERNELS[(src_rgb_mode, dst)] = (kname, upload_rgb_mode, channel_order, ksrc)
            found_rgb.add(src_rgb_mode)
    for src_rgb_mode, channel_order in IN_CHANNEL_ORDER:
        errs = []
        if not has_image_format(sif_copy, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
            errs.append("COPY_HOST_PTR")
        if not has_image_format(sif_use, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
            errs.append("USE_HOST_PTR")
        if len(errs)>0:
            log("RGB 2 YUV: channel order %s is not supported in READ_ONLY mode(s): %s", src_rgb_mode, " or ".join(errs))
            missing.append((src_rgb_mode, channel_order))
            continue
        #OpenCL handles this rgb mode natively,
        #so we can generate the kernel for RGB(x) format:
        #(and let the copy to device deal natively with the format given)
        add_rgb_to_yuv(src_rgb_mode, "RGBX", src_rgb_mode, channel_order)
    if len(missing)>0:
        log("RGB 2 YUV: trying to find alternatives for: %s", missing)
        #now look for rgb byte order workarounds (doing the byteswapping ourselves):
        for src_rgb_mode, _ in missing:
            if src_rgb_mode in found_rgb:
                #we already have an alternative channel_order for this rgb mode
                #ie: RGBx and RGBA both map to "RGBX" or "RGBA"
                log("%s already found", src_rgb_mode)
                continue
            #we want a mode which is supported and has the same component channels
            for _, upload_rgb_mode, channel_order, _ in RGB_to_YUV_KERNELS.values():
                if len(upload_rgb_mode)!=len(src_rgb_mode):
                    #skip mode if it has fewer channels (could drop one we need)
                    log("skipping %s (number of channels different from %s)", upload_rgb_mode, src_rgb_mode)
                    continue
                ok = has_same_channels(upload_rgb_mode, src_rgb_mode)
                log("testing %s as byteswap alternative to %s : %s", upload_rgb_mode, src_rgb_mode, ok)
                if not ok:
                    continue
                log("RGB 2 YUV: using upload mode %s to support %s via generated CL kernel byteswapping", upload_rgb_mode, src_rgb_mode)
                #easier than in YUV 2 RGB above, we just need to work out the starting positions of the RGB pixels:
                spos = rgb_mode_to_indexes(src_rgb_mode)     #ie: BGRX -> [2,1,0,3]
                uli = rgb_mode_to_indexes(upload_rgb_mode)   #ie: RGBX -> [0,1,2,3]
                virt_mode = indexes_to_rgb_mode([uli[x] for x in spos])   #ie: [2,1,0,3]
                log("RGB 2 YUV: virtual mode for %s: %s", src_rgb_mode, virt_mode)
                add_rgb_to_yuv(src_rgb_mode, virt_mode, upload_rgb_mode, channel_order)
                break
            if src_rgb_mode not in found_rgb:
                #not matched:
                log("RGB 2 YUV: channel order %s is not supported: we don't have a byteswapping alternative", src_rgb_mode)
                continue

    log("RGB 2 YUV conversions=%s", sorted(RGB_to_YUV_KERNELS.keys()))
    #log("RGB 2 YUV kernels=%s", RGB_to_YUV_KERNELS)
    log("RGB 2 YUV kernels=%s", sorted(list(set([x[0] for x in RGB_to_YUV_KERNELS.values()]))))
    return RGB_to_YUV_KERNELS
Пример #25
0
def test_get_info(ctx_factory):
    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform

    failure_count = [0]

    pocl_quirks = [
        (cl.Buffer, cl.mem_info.OFFSET),
        (cl.Program, cl.program_info.BINARIES),
        (cl.Program, cl.program_info.BINARY_SIZES),
    ]
    if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1, 2):
        pocl_quirks.extend([
            (cl.Program, cl.program_info.KERNEL_NAMES),
            (cl.Program, cl.program_info.NUM_KERNELS),
        ])
    CRASH_QUIRKS = [  # noqa
            (("NVIDIA Corporation", "NVIDIA CUDA",
                "OpenCL 1.0 CUDA 3.0.1"),
                [
                    (cl.Event, cl.event_info.COMMAND_QUEUE),
                    ]),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.8-pre"),
                    pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.8"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.9-pre"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.9"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.10-pre"),
                pocl_quirks),
            (("The pocl project", "Portable Computing Language",
                "OpenCL 1.2 pocl 0.10"),
                pocl_quirks),
            (("Apple", "Apple",
                "OpenCL 1.2"),
                [
                    (cl.Program, cl.program_info.SOURCE),
                    ]),
            ]
    QUIRKS = []  # noqa

    def find_quirk(quirk_list, cl_obj, info):
        for (vendor, name, version), quirks in quirk_list:
            if (
                    vendor == platform.vendor
                    and name == platform.name
                    and platform.version.startswith(version)):
                for quirk_cls, quirk_info in quirks:
                    if (isinstance(cl_obj, quirk_cls)
                            and quirk_info == info):
                        return True

        return False

    def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
        if func is None:
            def func(info):
                cl_obj.get_info(info)

        for info_name in dir(info_cls):
            if not info_name.startswith("_") and info_name != "to_string":
                print(info_cls, info_name)
                info = getattr(info_cls, info_name)

                if find_quirk(CRASH_QUIRKS, cl_obj, info):
                    print("not executing get_info", type(cl_obj), info_name)
                    print("(known crash quirk for %s)" % platform.name)
                    continue

                try:
                    func(info)
                except:
                    msg = "failed get_info", type(cl_obj), info_name

                    if find_quirk(QUIRKS, cl_obj, info):
                        msg += ("(known quirk for %s)" % platform.name)
                    else:
                        failure_count[0] += 1

                if try_attr_form:
                    try:
                        getattr(cl_obj, info_name.lower())
                    except:
                        print("failed attr-based get_info", type(cl_obj), info_name)

                        if find_quirk(QUIRKS, cl_obj, info):
                            print("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

    do_test(platform, cl.platform_info)
    do_test(device, cl.device_info)
    do_test(ctx, cl.context_info)

    props = 0
    if (device.queue_properties
            & cl.command_queue_properties.PROFILING_ENABLE):
        profiling = True
        props = cl.command_queue_properties.PROFILING_ENABLE
    queue = cl.CommandQueue(ctx,
            properties=props)
    do_test(queue, cl.command_queue_info)

    prg = cl.Program(ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()
    do_test(prg, cl.program_info)
    do_test(prg, cl.program_build_info,
            lambda info: prg.get_build_info(device, info),
            try_attr_form=False)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n*4)

    do_test(a_buf, cl.mem_info)

    kernel = prg.sum
    do_test(kernel, cl.kernel_info)

    evt = kernel(queue, (n,), None, a_buf)
    do_test(evt, cl.event_info)

    if profiling:
        evt.wait()
        do_test(evt, cl.profiling_info,
                lambda info: evt.get_profiling_info(info),
                try_attr_form=False)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
            ]:
        smp = cl.Sampler(ctx, False,
                cl.addressing_mode.CLAMP,
                cl.filter_mode.NEAREST)
        do_test(smp, cl.sampler_info)

        img_format = cl.get_supported_image_formats(
                ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        assert img.shape == (128, 256)

        img.depth
        img.image.depth
        do_test(img, cl.image_info,
                lambda info: img.get_image_info(info))
Пример #26
0
        print(device)
        if not options.short:
            print(75*"-")
            print_info(device, cl.device_info)
            ctx = cl.Context([device])
            for mf in [
                    cl.mem_flags.READ_ONLY,
                    #cl.mem_flags.READ_WRITE,
                    #cl.mem_flags.WRITE_ONLY
                    ]:
                for itype in [
                        cl.mem_object_type.IMAGE2D,
                        cl.mem_object_type.IMAGE3D
                        ]:
                    try:
                        formats = cl.get_supported_image_formats(ctx, mf, itype)
                    except:
                        formats = "<error>"
                    else:
                        def str_chd_type(chdtype):
                            result = cl.channel_type.to_string(chdtype,
                                    "<unknown channel data type %d>")

                            result = result.replace("_INT", "")
                            result = result.replace("UNSIGNED", "U")
                            result = result.replace("SIGNED", "S")
                            result = result.replace("NORM", "N")
                            result = result.replace("FLOAT", "F")
                            return result

                        formats = ", ".join(
Пример #27
0
def test_get_info(ctx_factory):
    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform

    failure_count = [0]

    pocl_quirks = [
        (cl.Buffer, cl.mem_info.OFFSET),
        (cl.Program, cl.program_info.BINARIES),
        (cl.Program, cl.program_info.BINARY_SIZES),
    ]
    if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1,
                                                                          2):
        pocl_quirks.extend([
            (cl.Program, cl.program_info.KERNEL_NAMES),
            (cl.Program, cl.program_info.NUM_KERNELS),
        ])
    CRASH_QUIRKS = [  # noqa
        (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [
            (cl.Event, cl.event_info.COMMAND_QUEUE),
        ]),
        (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.2 CUDA 7.5"), [
            (cl.Buffer, getattr(cl.mem_info, "USES_SVM_POINTER", None)),
        ]),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.8"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.9"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.10"), pocl_quirks),
        (("Apple", "Apple", "OpenCL 1.2"), [
            (cl.Program, cl.program_info.SOURCE),
        ]),
    ]
    QUIRKS = []  # noqa

    def find_quirk(quirk_list, cl_obj, info):
        for (vendor, name, version), quirks in quirk_list:
            if (vendor == platform.vendor and name == platform.name
                    and platform.version.startswith(version)):
                for quirk_cls, quirk_info in quirks:
                    if (isinstance(cl_obj, quirk_cls) and quirk_info == info):
                        return True

        return False

    def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
        if func is None:

            def func(info):
                cl_obj.get_info(info)

        for info_name in dir(info_cls):
            if not info_name.startswith("_") and info_name != "to_string":
                print(info_cls, info_name)
                info = getattr(info_cls, info_name)

                if find_quirk(CRASH_QUIRKS, cl_obj, info):
                    print("not executing get_info", type(cl_obj), info_name)
                    print("(known crash quirk for %s)" % platform.name)
                    continue

                try:
                    func(info)
                except:
                    msg = "failed get_info", type(cl_obj), info_name

                    if find_quirk(QUIRKS, cl_obj, info):
                        msg += ("(known quirk for %s)" % platform.name)
                    else:
                        failure_count[0] += 1

                if try_attr_form:
                    try:
                        getattr(cl_obj, info_name.lower())
                    except:
                        print("failed attr-based get_info", type(cl_obj),
                              info_name)

                        if find_quirk(QUIRKS, cl_obj, info):
                            print("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

    do_test(platform, cl.platform_info)
    do_test(device, cl.device_info)
    do_test(ctx, cl.context_info)

    props = 0
    if (device.queue_properties
            & cl.command_queue_properties.PROFILING_ENABLE):
        profiling = True
        props = cl.command_queue_properties.PROFILING_ENABLE
    queue = cl.CommandQueue(ctx, properties=props)
    do_test(queue, cl.command_queue_info)

    prg = cl.Program(
        ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()
    do_test(prg, cl.program_info)
    do_test(prg,
            cl.program_build_info,
            lambda info: prg.get_build_info(device, info),
            try_attr_form=False)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n * 4)

    do_test(a_buf, cl.mem_info)

    kernel = prg.sum
    do_test(kernel, cl.kernel_info)

    evt = kernel(queue, (n, ), None, a_buf)
    do_test(evt, cl.event_info)

    if profiling:
        evt.wait()
        do_test(evt,
                cl.profiling_info,
                lambda info: evt.get_profiling_info(info),
                try_attr_form=False)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
    ]:
        smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP,
                         cl.filter_mode.NEAREST)
        do_test(smp, cl.sampler_info)

        img_format = cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        assert img.shape == (128, 256)

        img.depth
        img.image.depth
        do_test(img, cl.image_info, lambda info: img.get_image_info(info))
Пример #28
0
def gen_yuv_to_rgb():
    global context,selected_platform
    from xpra.codecs.csc_opencl.opencl_kernels import gen_yuv_to_rgb_kernels, rgb_mode_to_indexes, indexes_to_rgb_mode

    YUV_to_RGB_KERNELS = {}

    if selected_platform and selected_platform.name and selected_platform.name.find("CUDA")>=0 and not NVIDIA_YUV2RGB:
        log.warn("CUDA device detected, YUV to RGB disabled")
        return {}

    #for YUV to RGB support we need to be able to handle the channel_order in WRITE_ONLY mode
    #so we can download the result of the CSC:
    sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY,  pyopencl.mem_object_type.IMAGE2D)
    log("get_supported_image_formats(WRITE_ONLY, IMAGE2D)=%s", sif)
    missing = []
    found_rgb = set()
    def add_yuv_to_rgb(dst_rgb_mode, kernel_rgb_mode, download_rgb_mode, channel_order):
        """ add the kernels converting yuv-to-rgb for the rgb_mode given (and record the channel order)"""
        log("add_yuv_to_rgb%s", (dst_rgb_mode, kernel_rgb_mode, download_rgb_mode, CHANNEL_ORDER_TO_STR.get(channel_order)))
        kernels = gen_yuv_to_rgb_kernels(kernel_rgb_mode)
        for (yuv_mode, krgb_mode), (kname, ksrc) in kernels.items():
            assert krgb_mode==kernel_rgb_mode
            YUV_to_RGB_KERNELS[(yuv_mode, dst_rgb_mode)] = (kname, download_rgb_mode, channel_order, ksrc)
            found_rgb.add(dst_rgb_mode)

    for rgb_mode, channel_order in IN_CHANNEL_ORDER:
        #why do we discard RGBX download mode? because it doesn't work, don't ask me why
        if not has_image_format(sif, channel_order, pyopencl.channel_type.UNSIGNED_INT8) or rgb_mode=="RGBX":
            log("YUV 2 RGB: channel order %s is not supported directly in WRITE_ONLY + UNSIGNED_INT8 mode", CHANNEL_ORDER_TO_STR.get(channel_order))
            missing.append((rgb_mode, channel_order))
            continue
        #it is supported natively, so this is easy:
        #just generate kernels for the "RGB(X)" format OpenCL will deliver the image in
        #and dst_rgb_mode is the same mode we download to
        add_yuv_to_rgb(rgb_mode, "RGBX", rgb_mode, channel_order)

    if len(YUV_to_RGB_KERNELS)>0 and len(missing)>0:
        log("YUV 2 RGB: trying to find alternatives for: %s", missing)
        #now look for rgb byte order workarounds (doing the byteswapping ourselves):
        for dst_rgb_mode, _ in missing:
            if dst_rgb_mode in found_rgb:
                #we already have an alternative channel_order for this rgb mode
                #ie: RGBx and RGBA both map to "RGBX" or "RGBA"
                log("%s already found", dst_rgb_mode)
                continue
            #we want a mode which is supported and has the same component channels
            for _, download_rgb_mode, channel_order, _ in YUV_to_RGB_KERNELS.values():
                if len(download_rgb_mode)!=len(dst_rgb_mode):
                    #skip mode if it has fewer channels (could drop one we need)
                    log("skipping %s (number of channels different from %s)", download_rgb_mode, dst_rgb_mode)
                    continue
                ok = has_same_channels(download_rgb_mode, dst_rgb_mode)
                log("testing %s as byteswap alternative to %s : %s", download_rgb_mode, dst_rgb_mode, ok)
                if not ok:
                    continue
                log("YUV 2 RGB: using download mode %s to support %s via generated CL kernel byteswapping", download_rgb_mode, dst_rgb_mode)
                #now we "just" need to add a kernel which will give us
                #dst_rgb_mode after the ???X image data is downloaded as download_rgb_mode
                #ie: we may want BGRX as output, but are downloading the pixels to RGBX (OpenCL does byteswapping)
                #OR: we want RGBX as output, but are downloading to BGRX..
                #so we need the inverse transform which will come out right
                dli = rgb_mode_to_indexes(download_rgb_mode)    #ie: BGRX -> [2,1,0,3]
                wanti = rgb_mode_to_indexes(dst_rgb_mode)       #ie: RGBX -> [0,1,2,3]
                #for each ending position, figure out where it started from:
                rindex = {} #reverse index
                for i in range(4):
                    rindex[dli.index(i)] = i                    #ie: {2:0, 1:1, 0:2, 3:3}
                log("YUV 2 RGB: reverse map for download mode %s (%s): %s", download_rgb_mode, dli, rindex)
                virt_mode = indexes_to_rgb_mode([rindex[x] for x in wanti])
                log("YUV 2 RGB: virtual mode for %s (%s): %s", dst_rgb_mode, wanti, virt_mode)
                add_yuv_to_rgb(dst_rgb_mode, virt_mode, download_rgb_mode, channel_order)
                break
            if dst_rgb_mode not in found_rgb:
                #not matched:
                log("YUV 2 RGB: channel order %s is not supported: we don't have a byteswapping alternative", dst_rgb_mode)
                continue
    log("YUV 2 RGB conversions=%s", sorted(YUV_to_RGB_KERNELS.keys()))
    #log("YUV 2 RGB kernels=%s", YUV_to_RGB_KERNELS)
    log("YUV 2 RGB kernels=%s", sorted(list(set([x[0] for x in YUV_to_RGB_KERNELS.values()]))))
    return YUV_to_RGB_KERNELS