Пример #1
0
    def setUpClass(self):

        self.queue = get_queue()
        self.shape = (4, 5, 6)
        self.size = 4 * 5 * 6
        self.values = {
            'shape_x': self.shape[2],
            'shape_y': self.shape[1],
            'shape_z': self.shape[0],
            'llength': 2,
        }
        self.k = CLKernels(self.queue.context, self.values)
        self.grid = np.zeros(self.shape, dtype=np.float64)
        self.grid[0, 0, 0] = 1
        self.grid[0, 0, 1] = 1
        self.grid[0, 1, 1] = 1
        self.grid[0, 0, 2] = 1
        self.grid[0, 0, -1] = 1
        self.grid[-1, 0, 0] = 1
        self.cl_image = cl.image_from_array(self.queue.context,
                                            self.grid.astype(np.float32))
        self.sampler_linear = cl.Sampler(self.queue.context, True,
                                         cl.addressing_mode.REPEAT,
                                         cl.filter_mode.LINEAR)
        self.sampler_nearest = cl.Sampler(self.queue.context, False,
                                          cl.addressing_mode.CLAMP,
                                          cl.filter_mode.NEAREST)
        self.out = np.zeros(self.shape, dtype=np.float64)
        self.cl_out = cl_array.zeros(self.queue, self.shape, dtype=np.float32)
Пример #2
0
        def __init__(self, ctx, values):
            self.sampler_nearest = cl.Sampler(ctx, True,
                                              cl.addressing_mode.REPEAT,
                                              cl.filter_mode.NEAREST)
            self.sampler_linear = cl.Sampler(ctx, True,
                                             cl.addressing_mode.REPEAT,
                                             cl.filter_mode.LINEAR)
            self.multiply = ElementwiseKernel(ctx,
                                              "float *x, float *y, float *z",
                                              "z[i] = x[i] * y[i];")
            self.conj_multiply = ElementwiseKernel(
                ctx, "cfloat_t *x, cfloat_t *y, cfloat_t *z",
                "z[i] = cfloat_mul(cfloat_conj(x[i]), y[i]);")
            self.calc_lcc_and_take_best = ElementwiseKernel(
                ctx, """float *gcc, float *ave, float *ave2, int *mask,
                       float norm_factor, int nrot, float *lcc, int *grot""",
                """float _lcc;
                       if (mask[i] > 0) {
                           _lcc = gcc[i] / sqrt(ave2[i] * norm_factor - ave[i] * ave[i]);
                           if (_lcc > lcc[i]) {
                               lcc[i] = _lcc;
                               grot[i] = nrot;
                           };
                       };
                    """)

            kernel_file = os.path.join(os.path.dirname(__file__), 'kernels.cl')
            with open(kernel_file) as f:
                t = Template(f.read()).substitute(**values)

            self._program = cl.Program(ctx, t).build()
            self._gws_rotate_grid3d = (96, 64, 1)
Пример #3
0
 def setUp(self):
     p = cl.get_platforms()[0]
     devs = p.get_devices()
     self.ctx = cl.Context(devices=devs)
     self.queue = cl.CommandQueue(self.ctx, device=devs[0])
     self.k = CLKernels(self.ctx)
     self.k = CLKernels(self.ctx)
     self.s_linear = cl.Sampler(self.ctx, False, cl.addressing_mode.CLAMP,
                                cl.filter_mode.LINEAR)
     self.s_nearest = cl.Sampler(self.ctx, False, cl.addressing_mode.CLAMP,
                                 cl.filter_mode.NEAREST)
Пример #4
0
def main():
    # setup OpenCL
    platforms = cl.get_platforms(
    )  # a platform corresponds to a driver (e.g. AMD, NVidia, Intel)
    platform = platforms[0]  # take first platform
    devices = platform.get_devices(
        cl.device_type.GPU)  # get GPU devices of selected platform
    device = devices[0]  # take first GPU
    context = cl.Context([device])  # put selected GPU into context object
    queue = cl.CommandQueue(
        context, device)  # create command queue for selected GPU and context

    # prepare data
    imgIn = cv2.imread('photographer.png', cv2.IMREAD_GRAYSCALE)

    rotation_angle = np.pi / 4
    cos_theta = np.cos(rotation_angle)
    sin_theta = np.sin(rotation_angle)

    # setup sampler
    sampler = cl.Sampler(context, True, cl.addressing_mode.REPEAT,
                         cl.filter_mode.NEAREST)

    # get shape of input image, allocate memory for output to which result can be copied to
    shape = imgIn.T.shape
    imgOut = np.empty_like(imgIn)

    # create image buffers which hold images for OpenCL
    imgInBuf = cl.image_from_array(context,
                                   ary=imgIn,
                                   mode="r",
                                   norm_int=True,
                                   num_channels=1)
    imgOutBuf = cl.image_from_array(context,
                                    ary=imgOut,
                                    mode="w",
                                    norm_int=True,
                                    num_channels=1)

    # load, compile and execute OpenCL program
    program = cl.Program(context, open('kernel.cl').read()).build()
    program.img_rotate(queue, shape, None, sampler, imgInBuf, imgOutBuf,
                       np.double(sin_theta), np.double(cos_theta))
    cl.enqueue_copy(
        queue,
        imgOut,
        imgOutBuf,
        origin=(0, 0),
        region=shape,
        is_blocking=True
    )  # wait until finished copying resulting image back from GPU to CPU

    # write output image
    cv2.imwrite('photographer_rotated.png', imgOut)

    # show images
    fig, ax = plt.subplots(1, 2)
    ax[0].imshow(imgIn, cmap='gray')
    ax[1].imshow(imgOut, cmap='gray')
    plt.show()
Пример #5
0
 def init_with_device(self):
     global context, program
     self.context = context
     self.program = program
     self.queue = pyopencl.CommandQueue(self.context)
     fm = pyopencl.filter_mode.NEAREST
     self.sampler = pyopencl.Sampler(self.context, False,
                                     pyopencl.addressing_mode.CLAMP_TO_EDGE,
                                     fm)
     k_def = KERNELS_DEFS.get((self.src_format, self.dst_format))
     assert k_def, "no kernel found for %s to %s" % (self.src_format,
                                                     self.dst_format)
     self.kernel_function_name, _, self.channel_order, src = k_def
     if self.src_format.endswith("P"):
         #yuv 2 rgb:
         self.do_convert_image = self.convert_image_yuv
     else:
         #rgb 2 yuv:
         self.do_convert_image = self.convert_image_rgb
     log("init_context(..) kernel source=%s", src)
     self.kernel_function = getattr(self.program, self.kernel_function_name)
     log("init_context(..) channel order=%s, filter mode=%s",
         CHANNEL_ORDER_TO_STR.get(self.channel_order, self.channel_order),
         FILTER_MODE_TO_STR.get(fm, fm))
     log("init_context(..) kernel_function %s: %s",
         self.kernel_function_name, self.kernel_function)
     assert self.kernel_function
Пример #6
0
    def test_rotate_image3d_1(self):

        shape = (8, 6, 5)
        np_image = np.zeros(shape, dtype=np.float32)
        np_image[0, 0, 0] = 1
        np_image[0, 0, 1] = 1
        np_image[0, 0, 2] = 1

        # 90 degree rotation around z-axis
        rotmat = [[0, 1, 0], [1, 0, 0], [0, 0, 1]]

        np_out = np.zeros_like(np_image)

        expected = np.zeros_like(np_image)
        expected[0, 0, 0] = 1
        expected[0, 1, 0] = 1
        expected[0, 2, 0] = 1

        cl_image = cl.image_from_array(self.queue.context, np_image)
        cl_out = cl_array.to_device(self.queue, np_out)
        cl_sampler = cl.Sampler(self.queue.context, False,
                                cl.addressing_mode.CLAMP,
                                cl.filter_mode.LINEAR)

        self.kernels.rotate_image3d(self.queue, cl_sampler, cl_image, rotmat,
                                    cl_out)

        self.assertTrue(np.allclose(expected, cl_out.get()))
Пример #7
0
def rescale(image, shape, sampler=None, queue=None, out=None, block=False):
    """Rescale *image* to *shape* and use *sampler* which is a :class:`pyopencl.Sampler` instance.
    Use OpenCL *queue* and *out* pyopencl Array. If *block* is True, wait for the copy to finish.
    """
    if cfg.PRECISION.cl_float == 8:
        raise TypeError("Double precision mode not supported")
    shape = make_tuple(shape)
    # OpenCL order
    factor = float(shape[1]) / image.shape[1], float(shape[0]) / image.shape[0]
    LOG.debug("rescale, shape: %s, final_shape: %s, factor: %s", image.shape, shape, factor)

    if queue is None:
        queue = cfg.OPENCL.queue
    if out is None:
        out = cl.array.Array(queue, shape, dtype=cfg.PRECISION.np_float)

    if not sampler:
        sampler = cl.Sampler(
            cfg.OPENCL.ctx, False, cl.addressing_mode.CLAMP_TO_EDGE, cl.filter_mode.LINEAR
        )
    image = g_util.get_image(image)

    ev = cfg.OPENCL.programs["improc"].rescale(
        queue, shape[::-1], None, image, out.data, sampler, g_util.make_vfloat2(*factor)
    )
    if block:
        ev.wait()

    return out
Пример #8
0
def _varconvolve_2d_parametrized(
    image, parameters, kernel_name, sampler=None, queue=None, out=None, block=False
):
    """Variable convolution of *image* with *parameters*, use OpoenCL kernel *kernel_name*,
    *sampler*, *queue*, *out* and wait if *block* is True. Return *out*.
    """
    if queue is None:
        queue = cfg.OPENCL.queue
    if out is None:
        out = cl.array.Array(queue, image.shape, dtype=cfg.PRECISION.np_float)
    if sampler is None:
        sampler = cl.Sampler(
            queue.context, False, cl.addressing_mode.CLAMP_TO_EDGE, cl.filter_mode.NEAREST
        )
    if not isinstance(parameters, cl_array.Array):
        params_host = np.empty(parameters[0].shape, dtype=cfg.PRECISION.vfloat2)
        params_host["y"] = g_util.get_host(parameters[0])
        params_host["x"] = g_util.get_host(parameters[1])
        parameters = cl_array.to_device(queue, params_host)
    if parameters.shape != image.shape:
        raise ValueError(
            "Parameters shape '{}' differs from image shape '{}'".format(
                parameters.shape, image.shape
            )
        )
    image = g_util.get_image(image, queue=queue)
    args = (image, out.data, sampler, cl_array.vec.make_int2(0, 0), parameters.data)

    varconvolve(kernel_name, image.shape[::-1], args, queue=queue, block=block)

    return out
Пример #9
0
    def compute(self):
        self.width, self.height = self.input(0).size
        self.devOutBuffer = cl.Image(self.engine.ctx,
                                     self.engine.mf.READ_WRITE,
                                     self.image_format,
                                     shape=(self.width, self.height))

        sampler = cl.Sampler(
            self.engine.ctx,
            True,  #  Normalized coordinates
            cl.addressing_mode.CLAMP_TO_EDGE,
            cl.filter_mode.LINEAR)

        exec_evt = self.program.run_add(
            self.engine.queue,
            self.size,
            None,
            self.input(0).getOutDevBuffer(),
            self.input(1).getOutDevBuffer(),
            self.devOutBuffer,
            sampler,
            numpy.int32(self.width),
            numpy.int32(self.height),
        )
        exec_evt.wait()
Пример #10
0
 def setupClQueue(self, ctx):
     self.ctx = ctx
     self.queue = cl.CommandQueue(self.ctx,
                                  properties=cl.command_queue_properties.
                                  OUT_OF_ORDER_EXEC_MODE_ENABLE)
     self.mf = cl.mem_flags
     self.sampler = cl.Sampler(self.ctx, True, cl.addressing_mode.REPEAT,
                               cl.filter_mode.LINEAR)
     pass
Пример #11
0
Файл: lab7.py Проект: obask/CL
def main():
    imageObjects = [0, 0]
    # Main
    if len(sys.argv) != 3:
        print "USAGE: " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
        return 1
    # Create an OpenCL context on first available platform
    context, device = CreateContext()
    if context == None:
        print "Failed to create OpenCL context."
        return 1

    # Create a command-queue on the first device available
    commandQueue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1

    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], imgSize = LoadImage(context, sys.argv[1])
    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA,
                                   cl.channel_type.UNORM_INT8)
    imageObjects[1] = cl.Image(context, cl.mem_flags.WRITE_ONLY, clImageFormat,
                               imgSize)
    # Create sampler for sampling image object
    sampler = cl.Sampler(
        context,
        False,  # Non-normalized coordinates
        cl.addressing_mode.CLAMP_TO_EDGE,
        cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    localWorkSize = (16, 16)
    globalWorkSize = (RoundUp(localWorkSize[0],
                              imgSize[0]), RoundUp(localWorkSize[1],
                                                   imgSize[1]))
    program.gaussian_filter(commandQueue, globalWorkSize, localWorkSize,
                            imageObjects[0], imageObjects[1], sampler,
                            numpy.int32(imgSize[0]), numpy.int32(imgSize[1]))

    # Read the output buffer back to the Host
    buffer = numpy.zeros(imgSize[0] * imgSize[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (imgSize[0], imgSize[1], 1)

    cl.enqueue_read_image(commandQueue, imageObjects[1], origin, region,
                          buffer).wait()
    print "Executed program successfully."
    # Save the image to disk
    SaveImage(sys.argv[2], buffer, imgSize)
Пример #12
0
 def set_envmap(self, envmap):
     fmt = cl.ImageFormat(cl.channel_order.RGBA, cl.channel_type.FLOAT)
     em = np.zeros(envmap.shape[:2] + (4, ), dtype=np.float32)
     em[:, :, :3] = envmap
     em[:, :, 3] = 1
     self.envmap = cl.Image(context,
                            mf.READ_ONLY | mf.COPY_HOST_PTR,
                            fmt,
                            shape=em.shape[:2],
                            hostbuf=em)
     self.sampler = cl.Sampler(context, True, cl.addressing_mode.CLAMP,
                               cl.filter_mode.LINEAR)
Пример #13
0
    def test_image_2d(self, device, ctx_getter):
        context = ctx_getter()

        if not device.image_support:
            from py.test import skip
            skip("images not supported on %s" % device)

        prg = cl.Program(
            context, """
            __kernel void copy_image(
              __global float4 *dest,
              __read_only image2d_t src,
              sampler_t samp,
              int width)
            {
              int x = get_global_id(0);
              int y = get_global_id(1);
              /*
              const sampler_t samp =
                CLK_NORMALIZED_COORDS_FALSE
                | CLK_ADDRESS_CLAMP
                | CLK_FILTER_NEAREST;
                */
              dest[x + width*y] = read_imagef(src, samp, (float2)(x, y));
              // dest[x + width*y] = get_image_height(src);
            }
            """).build()

        a = numpy.random.rand(1024, 1024, 4).astype(numpy.float32)
        queue = cl.CommandQueue(context)
        mf = cl.mem_flags
        a_img = cl.Image(context,
                         mf.READ_ONLY | mf.COPY_HOST_PTR,
                         cl.ImageFormat(cl.channel_order.RGBA,
                                        cl.channel_type.FLOAT),
                         shape=a.shape[:2],
                         hostbuf=a)
        a_dest = cl.Buffer(context, mf.READ_WRITE, a.nbytes)

        samp = cl.Sampler(context, False, cl.addressing_mode.CLAMP,
                          cl.filter_mode.NEAREST)
        prg.copy_image(queue, a.shape, None, a_dest, a_img, samp,
                       numpy.int32(a.shape[0]))

        a_result = numpy.empty_like(a)
        cl.enqueue_read_buffer(queue, a_dest, a_result, is_blocking=True)
        print a_result.dtype

        assert la.norm(a_result - a) == 0
Пример #14
0
    def _transfer_real(
        self,
        shape,
        center,
        pixel_size,
        energy,
        exponent,
        compute_phase,
        is_parabola,
        out,
        queue,
        block,
        flux=1,
    ):
        flux = (self.get_flux(energy, None, pixel_size).rescale(
            1 / q.s).magnitude.astype(cfg.PRECISION.np_float))
        cl_image = gutil.get_image(flux, queue=queue)

        sampler = cl.Sampler(cfg.OPENCL.ctx, False, cl.addressing_mode.CLAMP,
                             cl.filter_mode.LINEAR)

        cl_center = gutil.make_vfloat3(*center)
        cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1])
        cl_input_ps = gutil.make_vfloat2(
            *self._pixel_size.simplified.magnitude[::-1])
        z_sample = self.sample_distance.simplified.magnitude
        lam = energy_to_wavelength(energy).simplified.magnitude
        kernel = cfg.OPENCL.programs["physics"].make_flat_from_2D_profile

        ev = kernel(
            queue,
            shape[::-1],
            None,
            out.data,
            cl_image,
            sampler,
            cl_center,
            cl_ps,
            cl_input_ps,
            cfg.PRECISION.np_float(z_sample),
            cfg.PRECISION.np_float(lam),
            np.int32(exponent),
            np.int32(compute_phase),
            np.int32(is_parabola),
        )

        if block:
            ev.wait()
Пример #15
0
def test_int_ptr(ctx_factory):
    def do_test(obj):
        new_obj = type(obj).from_int_ptr(obj.int_ptr)
        assert obj == new_obj
        assert type(obj) is type(new_obj)

    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform
    do_test(device)
    do_test(platform)
    do_test(ctx)

    queue = cl.CommandQueue(ctx)
    do_test(queue)

    evt = cl.enqueue_marker(queue)
    do_test(evt)

    prg = cl.Program(
        ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()

    do_test(prg)
    do_test(prg.sum)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n * 4)
    do_test(a_buf)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
    ]:
        smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP,
                         cl.filter_mode.NEAREST)
        do_test(smp)

        img_format = cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        do_test(img)
Пример #16
0
    def parallel_prediction_errors(self, image):
        """ Get the MILC prediction errors for a 3D image by means of OpenCL accelerated computation

            Keyword arguments:
            image --  a 3D numpy array (bitmap image)

            Return:
            a 3D numpy array of the same shape of "image", containing the prediction errors
        """

        mf = cl.mem_flags
        # Define the image format for the prediction errors
        err_format = cl.ImageFormat(channel_order=cl.channel_order.R,
                                    channel_type=DataType.CL_ERR.value)

        # Define the input image from the numpy 3D array
        source_image = cl.image_from_array(self.ctx, image)

        original_shape = numpy.shape(image)
        cl_shape = list(
            reversed(original_shape))  # inverted shape (pyOpenCL bug?)

        # output image
        output_image = cl.Image(self.ctx,
                                mf.WRITE_ONLY,
                                err_format,
                                shape=cl_shape)

        # sampler. pixels out of range have a value of '0'
        sampler = cl.Sampler(self.ctx, False, cl.addressing_mode.CLAMP,
                             cl.filter_mode.NEAREST)

        # enqueue kernel
        self.program.image_test(self.queue, original_shape, None, source_image,
                                output_image, sampler)

        # read the resulting image into a numpy array
        output_data = numpy.empty(shape=cl_shape, dtype=DataType.ERR.value)
        cl.enqueue_read_image(self.queue, output_image, (0, 0, 0), cl_shape,
                              output_data)

        return output_data.reshape(original_shape)
Пример #17
0
    def setUp(self):

        self.pdb1 = disvis.PDB.fromfile(
            os.path.join(os.path.dirname(__file__), 'data', 'O14250.pdb'))
        self.pdb2 = disvis.PDB.fromfile(
            os.path.join(os.path.dirname(__file__), 'data', 'Q9UT97.pdb'))
        q, w, a = disvis.rotations.proportional_orientations(10)
        self.rotations = disvis.rotations.quat_to_rotmat(q)
        self.vlength = int(np.linalg.norm(self.pdb2.coor - self.pdb2.center, axis=1).max() +\
                       3 + 1.5)/1.0

        self.shape = disvis.disvis.grid_shape(self.pdb1.coor, self.pdb2.coor,
                                              1)
        radii = np.zeros(self.pdb1.coor.shape[0], dtype=np.float64)
        radii.fill(1.5 + 3)
        self.rsurf = disvis.disvis.rsurface(self.pdb1.coor, radii, self.shape,
                                            1)
        self.rcore = disvis.volume.erode(self.rsurf, 3)
        self.origin = self.rsurf.origin
        self.voxelspacing = 1

        radii = np.zeros(self.pdb2.coor.shape[0], dtype=np.float64)
        radii.fill(1.5)
        self.im_lsurf = disvis.points.dilate_points(self.pdb2.coor - self.pdb2.center \
            + self.pdb1.center, radii, disvis.volume.zeros_like(self.rcore))
        self.im_center = np.asarray(
            (self.pdb1.center - self.rcore.origin) / 1.0, dtype=np.float64)

        self.queue = disvis.helpers.get_queue()
        self.sampler = cl.Sampler(self.queue.context, False,
                                  cl.addressing_mode.CLAMP,
                                  cl.filter_mode.LINEAR)
        self.kernels = disvis.kernels.Kernels(self.queue.context)
        self.ft_shape = list(self.shape)
        self.ft_shape[0] = self.ft_shape[0] // 2 + 1
        self.ft_shape = tuple(self.ft_shape)
        self.kernels.rfftn = disvis.pyclfft.RFFTn(self.queue.context,
                                                  self.shape)
        self.kernels.irfftn = disvis.pyclfft.iRFFTn(self.queue.context,
                                                    self.shape)
Пример #18
0
        if not info_name.startswith("_") and info_name != "to_string":
            info = getattr(info_cls, info_name)
            try:
                info_value = obj.get_info(info)
            except:
                info_value = "<error>"

            print "%s: %s" % (info_name, info_value)


platform = cl.get_platforms()[0]
device = platform.get_devices()[0]
context = cl.Context(devices=[device])
queue = cl.CommandQueue(context)
mf = cl.mem_flags
sampler = cl.Sampler(context, True, cl.addressing_mode.CLAMP,
                     cl.filter_mode.LINEAR)


def normal_maker(name, mat, matw, matr):
    def tup(i):
        return '(float4)' + repr(tuple(mat[i].tolist()))

    def tupw(i):
        return '(float4)' + repr(tuple(matw[i].tolist()))

    def tupr(i):
        return '(float4)' + repr(tuple(matr[i].tolist()))

    return """
  inline float4 matmul3_%s(const float4 r1) {
    return (float4)(dot(%s,r1),dot(%s,r1),dot(%s,r1),0);
Пример #19
0
def dilate():
  #headURI = 'http://www.slicer.org/slicerWiki/images/4/43/MR-head.nrrd'
  #labelURI = 'http://boggs.bwh.harvard.edu/tmp/MRHead-label.nrrd'
  base = '/tmp/hoot/'
  headURI = base + 'MR-head.nrrd'
  labelURI = base + 'MR-head-label.nrrd'

  print("Starting...")
  if not slicer.util.getNode('MR-head*'):
    print("Downloading...")
    vl = slicer.modules.volumes.logic()
    name = 'MR-head'
    volumeNode = vl.AddArchetypeVolume(headURI, name, 0)
    name = 'MR-head-label'
    labelNode = vl.AddArchetypeVolume(labelURI, name, 1)
    if volumeNode:
      storageNode = volumeNode.GetStorageNode()
      if storageNode:
        # Automatically select the volume to display
        appLogic = slicer.app.applicationLogic()
        selNode = appLogic.GetSelectionNode()
        selNode.SetReferenceActiveVolumeID(volumeNode.GetID())
        selNode.SetReferenceActiveLabelVolumeID(labelNode.GetID())
        appLogic.PropagateVolumeSelection(1)

  node = slicer.util.getNode('MR-head')
  volume = slicer.util.array('MR-head')
  oneOverVolumeMax = 1. / volume.max()
  labelNode = slicer.util.getNode('MR-head-label')
  labelVolume = slicer.util.array('MR-head-label')

  print("Creating Context...")
  ctx = None
  for platform in cl.get_platforms():
      for device in platform.get_devices():
          print(cl.device_type.to_string(device.type))
          if cl.device_type.to_string(device.type) == "GPU":
             ctx = cl.Context([device])
             break;

  if not ctx:
    print ("no GPU context available")
    ctx = cl.create_some_context()
  print("Creating Queue...")
  queue = cl.CommandQueue(ctx)

  print("Copying volumes...")
  mf = cl.mem_flags
  volume_dev = cl_array.to_device(queue, volume)
  volume_image_dev = cl.image_from_array(ctx, volume,1)
  label_dev = cl.array.to_device(queue, labelVolume)
  theta = numpy.zeros_like(volume)
  theta_dev = cl.array.to_device(queue,theta)
  thetaNext = numpy.zeros_like(volume)
  thetaNext_dev = cl.array.to_device(queue,thetaNext)
  dest_dev = cl_array.empty_like(volume_dev)

  sampler = cl.Sampler(ctx,False,cl.addressing_mode.REPEAT,cl.filter_mode.LINEAR)

  print("Building program...")
  slices,rows,columns = volume.shape
  prg = cl.Program(ctx, """
      #pragma OPENCL EXTENSION cl_khr_fp64: enable

      __kernel void copy(
          __global short source[{slices}][{rows}][{columns}],
          __global short destination[{slices}][{rows}][{columns}])
      {{
        size_t slice = get_global_id(0);
        size_t column = get_global_id(1);
        size_t row = get_global_id(2);

        if (slice < {slices} && row < {rows} && column < {columns})
        {{
          destination[slice][row][column] = source [slice][row][column];
        }}
      }}

      __kernel void dilate(
          __read_only image3d_t volume,
          __global short label[{slices}][{rows}][{columns}],
          sampler_t volumeSampler,
          __global short dest[{slices}][{rows}][{columns}])
      {{
        size_t slice = get_global_id(0);
        size_t column = get_global_id(1);
        size_t row = get_global_id(2);

        if (slice >= {slices} || row >= {rows} || column >= {columns})
        {{
          return;
        }}

        int size = 1;

        int sliceOff, rowOff, columnOff;
        unsigned int sampleSlice, sampleRow, sampleColumn;

        short samples = 0;
        float4 samplePosition;
        for (sliceOff = -size; sliceOff <= size; sliceOff++)
        {{
          sampleSlice = slice + sliceOff;
          if (sampleSlice < 0 || sampleSlice >= {slices}) continue;
          for (rowOff = -size; rowOff <= size; rowOff++)
          {{
          sampleRow = row + rowOff;
          if (sampleRow < 0 || sampleRow >= {rows}) continue;
            for (columnOff = -size; columnOff <= size; columnOff++)
            {{
              sampleColumn = column + columnOff;
              if (sampleColumn < 0 || sampleColumn >= {columns}) continue;
              if (label[sampleSlice][sampleRow][sampleColumn] != 0)
              {{
                samples++;
              }}
            }}
          }}
        }}
        dest[slice][row][column] = samples;
      }}
      """.format(slices=slices,rows=rows,columns=columns)).build()

  def iterate(iterations=10):
    print("Running!")
    for iteration in xrange(iterations):
      prg.dilate(queue, volume.shape, None, volume_image_dev, label_dev.data, sampler, dest_dev.data)
      prg.copy(queue, volume.shape, None, dest_dev.data, label_dev.data)

    print("Getting data...")
    labelVolume[:] = dest_dev.get()
    print("Rendering...")
    labelNode.GetImageData().Modified()
    node.GetImageData().Modified()

    print("Done!")

  def grow(iterations=10):
    for iteration in xrange(iterations):
      iterate(1)
      slicer.app.processEvents()
Пример #20
0
def imageBlur():
  print("Starting...")
  if not slicer.util.getNode('MRHead*'):
    print("Downloading...")
    vl = slicer.modules.volumes.logic()
    uri = 'http://www.slicer.org/slicerWiki/images/4/43/MR-head.nrrd'
    name = 'MRHead'
    volumeNode = vl.AddArchetypeVolume(uri, name, 0)
    if volumeNode:
      storageNode = volumeNode.GetStorageNode()
      if storageNode:
        # Automatically select the volume to display
        appLogic = slicer.app.applicationLogic()
        selNode = appLogic.GetSelectionNode()
        selNode.SetReferenceActiveVolumeID(volumeNode.GetID())
        appLogic.PropagateVolumeSelection(1)

  node = slicer.util.getNode('MRHead*')
  volume = slicer.util.array('MRHead*')

  print("Creating Context...")
  ctx = None
  for platform in cl.get_platforms():
      for device in platform.get_devices():
          print(cl.device_type.to_string(device.type))
          if cl.device_type.to_string(device.type) == "GPU":
             ctx = cl.Context([device])
             break;

  if not ctx:
    print ("no GPU context available")
    ctx = cl.create_some_context()
  print("Creating Queue...")
  queue = cl.CommandQueue(ctx)

  print("Copying volume...")
  mf = cl.mem_flags
  volume_dev = cl_array.to_device(queue, volume)
  volume_image_dev = cl.image_from_array(ctx, volume,1)
  dest_dev = cl_array.empty_like(volume_dev)

  sampler = cl.Sampler(ctx,False,cl.addressing_mode.REPEAT,cl.filter_mode.LINEAR)

  print("Building program...")
  slices,rows,columns = volume.shape
  prg = cl.Program(ctx, """
      #pragma OPENCL EXTENSION cl_khr_fp64: enable
      __kernel void blur(
          __read_only image3d_t volume,
          sampler_t volumeSampler,
          __global short dest[{slices}][{rows}][{columns}])
      {{
        size_t slice = get_global_id(0);
        size_t column = get_global_id(1);
        size_t row = get_global_id(2);

        int size = 10;

        int sliceOff, rowOff, columnOff;
        unsigned int sampleSlice, sampleRow, sampleColumn;

        float sum = 0;
        unsigned int samples = 0;
        float4 samplePosition;
        int4 sample;
        for (sliceOff = -size; sliceOff <= size; sliceOff++)
        {{
          sampleSlice = slice + sliceOff;
          if (sampleSlice < 0 || sampleSlice >= {slices}) continue;
          for (rowOff = -size; rowOff <= size; rowOff++)
          {{
          sampleRow = row + rowOff;
          if (sampleRow < 0 || sampleRow >= {rows}) continue;
            for (columnOff = -size; columnOff <= size; columnOff++)
            {{
            sampleColumn = column + columnOff;
            if (sampleColumn < 0 || sampleColumn >= {columns}) continue;
            samplePosition.x = sampleColumn;
            samplePosition.y = sampleRow;
            samplePosition.z = sampleSlice;
            sample = read_imagei(volume, volumeSampler, samplePosition);
            //sum += sampleSlice+sampleRow+sampleColumn;
            sum += sample.x;
            samples++;
            }}
          }}
        }}
        dest[slice][row][column] = (short) (sum / samples);
      }}
      """.format(slices=slices,rows=rows,columns=columns)).build()

  print("Running!")
  prg.blur(queue, volume.shape, None, volume_image_dev, label_image_dev, sampler, dest_dev.data)

  print("Getting data...")
  volume[:] = dest_dev.get()
  print("Rendering...")
  node.GetImageData().Modified()

  print("Done!")
Пример #21
0
import pyopencl, pyopencl.array
import numpy
ctx = pyopencl.create_some_context()
queue = pyopencl.CommandQueue(
    ctx, properties=pyopencl.command_queue_properties.PROFILING_ENABLE)
x, y, z = numpy.ogrid[-10:10:0.05, -10:10:0.05, -10:10:0.05]
r = numpy.sqrt(x * x + y * y + z * z)
data = ((x * x - y * y + z * z) * numpy.exp(-r)).astype("float32")
gpu_vol = pyopencl.image_from_array(ctx, data, 1)
shape = (500, 500)
img = numpy.empty(shape, dtype=numpy.float32)
gpu_img = pyopencl.array.empty(queue, shape, numpy.float32)
prg = open("interpolation.cl").read()
sampler = pyopencl.Sampler(
    ctx,
    True,  # normalized coordinates
    pyopencl.addressing_mode.CLAMP_TO_EDGE,
    pyopencl.filter_mode.LINEAR)

prg = pyopencl.Program(ctx, prg).build()
n = pyopencl.array.to_device(queue, numpy.array([1, 1, 1],
                                                dtype=numpy.float32))
c = pyopencl.array.to_device(queue,
                             numpy.array([0.5, 0.5, 0.5], dtype=numpy.float32))
prg.interpolate(queue, (512, 512), (16, 16), gpu_vol, sampler, gpu_img.data,
                numpy.int32(shape[1]), numpy.int32(shape[1]), c.data, n.data)
img = gpu_img.get()

#timing:
evt = []
evt.append(
Пример #22
0
def test_image_2d(ctx_factory):
    context = ctx_factory()

    device, = context.devices

    if not device.image_support:
        from pytest import skip
        skip("images not supported on %s" % device)

    if "Intel" in device.vendor and "31360.31426" in device.version:
        from pytest import skip
        skip("images crashy on %s" % device)
    _xfail_if_pocl(device.platform, None, "pocl does not support CL_ADDRESS_CLAMP")

    prg = cl.Program(context, """
        __kernel void copy_image(
          __global float *dest,
          __read_only image2d_t src,
          sampler_t samp,
          int stride0)
        {
          int d0 = get_global_id(0);
          int d1 = get_global_id(1);
          /*
          const sampler_t samp =
            CLK_NORMALIZED_COORDS_FALSE
            | CLK_ADDRESS_CLAMP
            | CLK_FILTER_NEAREST;
            */
          dest[d0*stride0 + d1] = read_imagef(src, samp, (float2)(d1, d0)).x;
        }
        """).build()

    num_channels = 1
    a = np.random.rand(1024, 512, num_channels).astype(np.float32)
    if num_channels == 1:
        a = a[:, :, 0]

    queue = cl.CommandQueue(context)
    try:
        a_img = cl.image_from_array(context, a, num_channels)
    except cl.RuntimeError:
        import sys
        exc = sys.exc_info()[1]
        if exc.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED:
            from pytest import skip
            skip("required image format not supported on %s" % device.name)
        else:
            raise

    a_dest = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)

    samp = cl.Sampler(context, False,
            cl.addressing_mode.CLAMP,
            cl.filter_mode.NEAREST)
    prg.copy_image(queue, a.shape, None, a_dest, a_img, samp,
            np.int32(a.strides[0]/a.dtype.itemsize))

    a_result = np.empty_like(a)
    cl.enqueue_copy(queue, a_result, a_dest)

    good = la.norm(a_result - a) == 0
    if not good:
        if queue.device.type & cl.device_type.CPU:
            assert good, ("The image implementation on your CPU CL platform '%s' "
                    "returned bad values. This is bad, but common."
                    % queue.device.platform)
        else:
            assert good
def main():
    
    os.environ['PYOPENCL_COMPILER_OUTPUT'] = '1'

    imageObjects = [ 0, 0, 0 ]
            
    # Main
    if len(sys.argv) != 4:
        print "USAGE: " + sys.argv[0] + " <source> <palette> <output>"
        return 1
	
    # Create an OpenCL context on first available platform
    context, device = CreateContext();
    if context == None:
        print "Failed to create OpenCL context."
        return 1
        
    # Create a command-queue on the first device available
    # on the created context
    commandQueue = cl.CommandQueue(context, device)
    
    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        return 1
    
    # Load input image from file and load it into
    # an OpenCL image object
    imageObjects[0], srcSize = LoadImage(context, sys.argv[1])
    imageObjects[1], palSize = LoadImage(context, sys.argv[2])

    pixels = srcSize[0] * srcSize[1]

    if (palSize[0] * palSize[1] != pixels):
        print "Images do not contain the same number of pixels."
        return 1

#########################################
###
###
### TODO Rearrange the palette to the correct dimensions here
###
###
#########################################

    # Create ouput image object
    clImageFormat = cl.ImageFormat(cl.channel_order.RGBA, 
                                   cl.channel_type.UNORM_INT8)
    bufferObject = cl.Buffer(context,
                             cl.mem_flags.READ_WRITE,
			     pixels * 4 * 4)
    imageObjects[2] = cl.Image(context,
                               cl.mem_flags.WRITE_ONLY,
                               clImageFormat,
                               srcSize)

    # Create sampler for sampling image object
    sampler = cl.Sampler(context,
                         False, #  Non-normalized coordinates
                         cl.addressing_mode.CLAMP_TO_EDGE,
                         cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = CreateProgram(context, device, "quad_swap.cl")
    
    # Call the kernel directly
    localWorkSize = (16,)
    globalWorkSize = ( RoundUp(localWorkSize[0], srcSize[0]*srcSize[1]/2), )

    program.quad_swap(commandQueue,
                            globalWorkSize,
                            localWorkSize,
                            imageObjects[0],
                            imageObjects[1],
			    bufferObject,
                            imageObjects[2],
                            sampler,
                            numpy.int32(srcSize[0]),
                            numpy.int32(srcSize[1]))
        
    # Read the output buffer back to the Host
    buffer = numpy.zeros(srcSize[0] * srcSize[1] * 4, numpy.uint8)
    origin = ( 0, 0, 0 )
    region = ( srcSize[0], srcSize[1], 1 )
    
    cl.enqueue_read_image(commandQueue, imageObjects[2], origin, region, buffer).wait()
    
    # Save the image to disk
    SaveImage(sys.argv[3], buffer, srcSize)
Пример #24
0
    def test_get_info(self, platform, device):
        failure_count = [0]

        CRASH_QUIRKS = [
            (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [
                (cl.Event, cl.event_info.COMMAND_QUEUE),
            ]),
        ]
        QUIRKS = []

        plat_quirk_key = (platform.vendor, platform.name, platform.version)

        def find_quirk(quirk_list, cl_obj, info):
            for entry_plat_key, quirks in quirk_list:
                if entry_plat_key == plat_quirk_key:
                    for quirk_cls, quirk_info in quirks:
                        if (isinstance(cl_obj, quirk_cls)
                                and quirk_info == info):
                            return True

            return False

        def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
            if func is None:

                def func(info):
                    cl_obj.get_info(info)

            for info_name in dir(info_cls):
                if not info_name.startswith("_") and info_name != "to_string":
                    info = getattr(info_cls, info_name)

                    if find_quirk(CRASH_QUIRKS, cl_obj, info):
                        print "not executing get_info", type(cl_obj), info_name
                        print "(known crash quirk for %s)" % platform.name
                        continue

                    try:
                        func(info)
                    except:
                        msg = "failed get_info", type(cl_obj), info_name

                        if find_quirk(QUIRKS, cl_obj, info):
                            msg += ("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

                    if try_attr_form:
                        try:
                            getattr(cl_obj, info_name.lower())
                        except:
                            print "failed attr-based get_info", type(
                                cl_obj), info_name

                            if find_quirk(QUIRKS, cl_obj, info):
                                print "(known quirk for %s)" % platform.name
                            else:
                                failure_count[0] += 1

        do_test(platform, cl.platform_info)

        do_test(device, cl.device_info)

        ctx = cl.Context([device])
        do_test(ctx, cl.context_info)

        props = 0
        if (device.queue_properties
                & cl.command_queue_properties.PROFILING_ENABLE):
            profiling = True
            props = cl.command_queue_properties.PROFILING_ENABLE
        queue = cl.CommandQueue(ctx, properties=props)
        do_test(queue, cl.command_queue_info)

        prg = cl.Program(
            ctx, """
            __kernel void sum(__global float *a)
            { a[get_global_id(0)] *= 2; }
            """).build()
        do_test(prg, cl.program_info)
        do_test(prg,
                cl.program_build_info,
                lambda info: prg.get_build_info(device, info),
                try_attr_form=False)

        cl.unload_compiler()  # just for the heck of it

        mf = cl.mem_flags
        n = 2000
        a_buf = cl.Buffer(ctx, 0, n * 4)

        do_test(a_buf, cl.mem_info)

        kernel = prg.sum
        do_test(kernel, cl.kernel_info)

        evt = kernel(queue, (n, ), None, a_buf)
        do_test(evt, cl.event_info)

        if profiling:
            evt.wait()
            do_test(evt,
                    cl.profiling_info,
                    lambda info: evt.get_profiling_info(info),
                    try_attr_form=False)

        if device.image_support:
            smp = cl.Sampler(ctx, True, cl.addressing_mode.CLAMP,
                             cl.filter_mode.NEAREST)
            do_test(smp, cl.sampler_info)

            img_format = cl.get_supported_image_formats(
                ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

            img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
            assert img.shape == (128, 256)

            img.depth
            img.image.depth
            do_test(img, cl.image_info, lambda info: img.get_image_info(info))

        if failure_count[0]:
            raise RuntimeError(
                "get_info testing had %d errors "
                "(If you compiled against OpenCL 1.1 but are testing a 1.0 "
                "implementation, you can safely ignore this.)" %
                failure_count[0])
Пример #25
0
    def test_findMembranePositionUsingMaxIncline(self):
        inputPath = 'C:/Private/PhD_Publications/Publication_of_Algorithm/Code/TrackingAlgorithm/TrackingAlgorithm/TestData/ReferenceDataForTests/UnitTests/OpenClKernels/findMembranePositionUsingMaxIncline_000/input'
        referencePath = 'C:/Private/PhD_Publications/Publication_of_Algorithm/Code/TrackingAlgorithm/TrackingAlgorithm/TestData/ReferenceDataForTests/UnitTests/OpenClKernels/findMembranePositionUsingMaxIncline_000/output'
        referenceVariableName1 = 'dev_membraneCoordinates'
        referenceVariableName2 = 'dev_membraneNormalVectors'
        referenceVariableName3 = 'dev_fitInclines'

        self.loadHostVariable('linFitSearchRangeXvalues', inputPath)
        self.setupTest("maximumIntensityIncline")

        self.nrOfLocalAngleSteps = 64
        self.detectionKernelStrideSize = 2048
        self.nrOfStrides = 1
        self.nrOfDetectionAngleSteps = np.float64(
            self.nrOfStrides * self.detectionKernelStrideSize)
        self.sampler = cl.Sampler(self.ctx, True, cl.addressing_mode.REPEAT,
                                  cl.filter_mode.LINEAR)

        self.loadHostVariable('trackingGlobalSize', inputPath)
        self.loadHostVariable('trackingWorkGroupSize', inputPath)
        self.loadHostVariable('host_Img', inputPath)
        self.dev_Img = cl.image_from_array(self.ctx,
                                           ary=self.host_Img,
                                           mode="r",
                                           norm_int=False,
                                           num_channels=1)
        self.loadHostVariable('imgSizeX', inputPath)
        self.loadHostVariable('imgSizeY', inputPath)
        #self.saveDeviceVariable('buf_localRotationMatrices',inputPath)
        self.loadHostVariable('localRotationMatrices', inputPath)
        self.buf_localRotationMatrices = cl.Buffer(
            self.ctx,
            self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
            hostbuf=self.localRotationMatrices)
        #self.saveDeviceVariable('buf_linFitSearchRangeXvalues',inputPath)
        self.buf_linFitSearchRangeXvalues = cl.Buffer(
            self.ctx,
            self.mf.READ_ONLY | self.mf.COPY_HOST_PTR,
            hostbuf=self.linFitSearchRangeXvalues)
        self.loadHostVariable('linFitParameter', inputPath)
        self.loadHostVariable('fitIntercept_memSize', inputPath)
        self.fitIncline_memSize = self.fitIntercept_memSize
        self.loadHostVariable('rotatedUnitVector_memSize', inputPath)
        self.loadHostVariable('meanParameter', inputPath)
        #self.saveDeviceVariable('buf_meanRangeXvalues',inputPath)
        self.loadHostVariable('meanRangeXvalues', inputPath)
        self.buf_meanRangeXvalues = cl.Buffer(self.ctx,
                                              self.mf.READ_ONLY
                                              | self.mf.COPY_HOST_PTR,
                                              hostbuf=self.meanRangeXvalues)
        self.loadHostVariable('meanRangePositionOffset', inputPath)
        self.loadHostVariable('localMembranePositions_memSize', inputPath)
        self.loadDeviceVariable('dev_membraneCoordinates', inputPath)
        self.loadDeviceVariable('dev_membraneNormalVectors', inputPath)
        self.loadDeviceVariable('dev_fitInclines', inputPath)
        self.loadHostVariable('inclineTolerance', inputPath)
        self.inclineRefinementRange = np.int32(2)
        self.setWorkGroupSizes()

        for strideNr in range(self.nrOfStrides):
            # set the starting index of the coordinate array for each kernel instance
            kernelCoordinateStartingIndex = np.int32(
                strideNr * self.detectionKernelStrideSize)

            self.prg.findMembranePosition(self.queue, self.trackingGlobalSize, self.trackingWorkGroupSize, self.sampler, \
                     self.dev_Img, self.imgSizeX, self.imgSizeY, \
                     self.buf_localRotationMatrices, \
                     self.buf_linFitSearchRangeXvalues, \
                     self.linFitParameter, \
                     cl.LocalMemory(self.fitIntercept_memSize), cl.LocalMemory(self.fitIncline_memSize), \
                     cl.LocalMemory(self.rotatedUnitVector_memSize), \
                     self.meanParameter, \
                     self.buf_meanRangeXvalues, self.meanRangePositionOffset, \
                     cl.LocalMemory(self.localMembranePositions_memSize), \
                     self.dev_membraneCoordinates.data, \
                     self.dev_membraneNormalVectors.data, \
                     self.dev_fitInclines.data, \
                     kernelCoordinateStartingIndex, \
                     self.inclineTolerance, \
                     self.inclineRefinementRange)

            barrierEvent = cl.enqueue_barrier(self.queue)

        self.assertVector2EqualsExpectedResult(
            self.dev_membraneCoordinates,
            referencePath + '/' + referenceVariableName1 + '.npy')
        self.assertVector2EqualsExpectedResult(
            self.dev_membraneNormalVectors,
            referencePath + '/' + referenceVariableName2 + '.npy')
        self.assertVectorEqualsExpectedResult(
            self.dev_fitInclines,
            referencePath + '/' + referenceVariableName3 + '.npy')
        pass
Пример #26
0
    def _gpu_init(self):
        """Method to initialize all the data for GPU-accelerate search"""

        self.gpu_data = {}
        g = self.gpu_data
        d = self.data
        q = self.queue

        # move data to the GPU. All should be float32, as these is the native
        # lenght for GPUs
        g['rcore'] = cl_array.to_device(q, float32array(d['rcore'].array))
        g['rsurf'] = cl_array.to_device(q, float32array(d['rsurf'].array))
        # Make the scanning chain object an Image, as this is faster to rotate
        g['im_lsurf'] = cl.image_from_array(q.context,
                                            float32array(d['lsurf'].array))
        g['sampler'] = cl.Sampler(q.context, False, cl.addressing_mode.CLAMP,
                                  cl.filter_mode.LINEAR)

        if self.distance_restraints:
            g['restraints'] = cl_array.to_device(q,
                                                 float32array(d['restraints']))

        # Allocate arrays on the GPU
        g['lsurf'] = cl_array.zeros_like(g['rcore'])
        g['clashvol'] = cl_array.zeros_like(g['rcore'])
        g['intervol'] = cl_array.zeros_like(g['rcore'])
        g['interspace'] = cl_array.zeros(q, d['shape'], dtype=np.int32)
        g['restspace'] = cl_array.zeros_like(g['interspace'])
        g['access_interspace'] = cl_array.zeros_like(g['interspace'])
        g['best_access_interspace'] = cl_array.zeros_like(g['interspace'])

        # arrays for counting
        # Reductions are typically tedious on GPU, and we need to define the
        # workgroupsize to allocate the correct amount of data
        WORKGROUPSIZE = 32
        nsubhists = int(np.ceil(g['rcore'].size / WORKGROUPSIZE))
        g['subhists'] = cl_array.zeros(q, (nsubhists, d['nrestraints'] + 1),
                                       dtype=np.float32)
        g['viol_counter'] = cl_array.zeros(
            q, (nsubhists, d['nrestraints'], d['nrestraints']),
            dtype=np.float32)

        # complex arrays
        g['ft_shape'] = list(d['shape'])
        g['ft_shape'][0] = d['shape'][0] // 2 + 1
        g['ft_rcore'] = cl_array.zeros(q, g['ft_shape'], dtype=np.complex64)
        g['ft_rsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_lsurf'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_clashvol'] = cl_array.zeros_like(g['ft_rcore'])
        g['ft_intervol'] = cl_array.zeros_like(g['ft_rcore'])

        # other miscellanious data
        g['nrot'] = d['nrot']
        g['max_clash'] = d['max_clash']
        g['min_interaction'] = d['min_interaction']

        # kernels
        g['k'] = Kernels(q.context)
        g['k'].rfftn = pyclfft.RFFTn(q.context, d['shape'])
        g['k'].irfftn = pyclfft.iRFFTn(q.context, d['shape'])

        # initial calculations
        g['k'].rfftn(q, g['rcore'], g['ft_rcore'])
        g['k'].rfftn(q, g['rsurf'], g['ft_rsurf'])
Пример #27
0
def test_image_3d(ctx_factory):
    #test for image_from_array for 3d image of float2
    context = ctx_factory()

    device, = context.devices

    if not device.image_support:
        from pytest import skip
        skip("images not supported on %s" % device)

    if device.platform.vendor == "Intel(R) Corporation":
        from pytest import skip
        skip("images crashy on %s" % device)
    _skip_if_pocl(device.platform, 'pocl does not support CL_ADDRESS_CLAMP')

    prg = cl.Program(
        context, """
        __kernel void copy_image_plane(
          __global float2 *dest,
          __read_only image3d_t src,
          sampler_t samp,
          int stride0,
          int stride1)
        {
          int d0 = get_global_id(0);
          int d1 = get_global_id(1);
          int d2 = get_global_id(2);
          /*
          const sampler_t samp =
            CLK_NORMALIZED_COORDS_FALSE
            | CLK_ADDRESS_CLAMP
            | CLK_FILTER_NEAREST;
            */
          dest[d0*stride0 + d1*stride1 + d2] = read_imagef(
                src, samp, (float4)(d2, d1, d0, 0)).xy;
        }
        """).build()

    num_channels = 2
    shape = (3, 4, 2)
    a = np.random.random(shape + (num_channels, )).astype(np.float32)

    queue = cl.CommandQueue(context)
    try:
        a_img = cl.image_from_array(context, a, num_channels)
    except cl.RuntimeError:
        import sys
        exc = sys.exc_info()[1]
        if exc.code == cl.status_code.IMAGE_FORMAT_NOT_SUPPORTED:
            from pytest import skip
            skip("required image format not supported on %s" % device.name)
        else:
            raise

    a_dest = cl.Buffer(context, cl.mem_flags.READ_WRITE, a.nbytes)

    samp = cl.Sampler(context, False, cl.addressing_mode.CLAMP,
                      cl.filter_mode.NEAREST)
    prg.copy_image_plane(
        queue,
        shape,
        None,
        a_dest,
        a_img,
        samp,
        np.int32(a.strides[0] / a.itemsize / num_channels),
        np.int32(a.strides[1] / a.itemsize / num_channels),
    )

    a_result = np.empty_like(a)
    cl.enqueue_copy(queue, a_result, a_dest)

    good = la.norm(a_result - a) == 0
    if not good:
        if queue.device.type & cl.device_type.CPU:
            assert good, (
                "The image implementation on your CPU CL platform '%s' "
                "returned bad values. This is bad, but common." %
                queue.device.platform)
        else:
            assert good
Пример #28
0
    def setup_device(self, imshape):

        print('Setting up with imshape = %s' % (str(imshape)))

        self.cached_shape = imshape

        self.clIm = cla.Array(self.q, imshape, np.float32)
        self.clm = cla.empty_like(self.clIm)
        self.clx = cla.empty_like(self.clIm)
        self.cly = cla.empty_like(self.clIm)
        self.clO = cla.zeros_like(self.clIm)
        self.clM = cla.zeros_like(self.clIm)
        self.clF = cla.empty_like(self.clIm)
        self.clS = cla.empty_like(self.clIm)
        self.clThisS = cla.empty_like(self.clIm)
        self.clScratch = cla.empty_like(self.clIm)

        self.radial_prg = pyopencl.Program(self.ctx, RADIAL_PROGRAM).build()

        self.sobel = Sobel(self.ctx, self.q)

        #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
        self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)

        self.accum = ElementwiseKernel(self.ctx,
                                       'float *a, float *b',
                                       'a[i] += b[i]')

        self.norm_s = ElementwiseKernel(self.ctx,
                                        'float *s, const float nRadii',
                                        's[i] = -1 * s[i] / nRadii',
                                        'norm_s')

        self.accum_s = ElementwiseKernel(self.ctx,
                                         'float *a, float *b, const float nr',
                                         'a[i] -= b[i] / nr')

        self.gaussians = {}
        self.gaussian_prgs = {}

        self.minmax = MinMaxKernel(self.ctx, self.q)

        # starburst storage

        clImageFormat = cl.ImageFormat(cl.channel_order.R,
                                       cl.channel_type.FLOAT)

        self.clIm2D = cl.Image(self.ctx,
                               mf.READ_ONLY,
                               clImageFormat,
                               imshape)

        # Create sampler for sampling image object
        self.imSampler = cl.Sampler(self.ctx,
                                    False,  # Non-normalized coordinates
                                    cl.addressing_mode.CLAMP_TO_EDGE,
                                    cl.filter_mode.LINEAR)

        self.cl_find_ray_boundaries = FindRayBoundaries(self.ctx, self.q)

        self.calcF = self.radial_prg.calcF
        self.calcOM = self.radial_prg.calcOM
Пример #29
0
def test_get_info(ctx_factory):
    ctx = ctx_factory()
    device, = ctx.devices
    platform = device.platform

    failure_count = [0]

    pocl_quirks = [
        (cl.Buffer, cl.mem_info.OFFSET),
        (cl.Program, cl.program_info.BINARIES),
        (cl.Program, cl.program_info.BINARY_SIZES),
    ]
    if ctx._get_cl_version() >= (1, 2) and cl.get_cl_header_version() >= (1,
                                                                          2):
        pocl_quirks.extend([
            (cl.Program, cl.program_info.KERNEL_NAMES),
            (cl.Program, cl.program_info.NUM_KERNELS),
        ])
    CRASH_QUIRKS = [  # noqa
        (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.0 CUDA 3.0.1"), [
            (cl.Event, cl.event_info.COMMAND_QUEUE),
        ]),
        (("NVIDIA Corporation", "NVIDIA CUDA", "OpenCL 1.2 CUDA 7.5"), [
            (cl.Buffer, getattr(cl.mem_info, "USES_SVM_POINTER", None)),
        ]),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.8-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.8"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.9-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.9"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.10-pre"), pocl_quirks),
        (("The pocl project", "Portable Computing Language",
          "OpenCL 1.2 pocl 0.10"), pocl_quirks),
        (("Apple", "Apple", "OpenCL 1.2"), [
            (cl.Program, cl.program_info.SOURCE),
        ]),
    ]
    QUIRKS = []  # noqa

    def find_quirk(quirk_list, cl_obj, info):
        for (vendor, name, version), quirks in quirk_list:
            if (vendor == platform.vendor and name == platform.name
                    and platform.version.startswith(version)):
                for quirk_cls, quirk_info in quirks:
                    if (isinstance(cl_obj, quirk_cls) and quirk_info == info):
                        return True

        return False

    def do_test(cl_obj, info_cls, func=None, try_attr_form=True):
        if func is None:

            def func(info):
                cl_obj.get_info(info)

        for info_name in dir(info_cls):
            if not info_name.startswith("_") and info_name != "to_string":
                print(info_cls, info_name)
                info = getattr(info_cls, info_name)

                if find_quirk(CRASH_QUIRKS, cl_obj, info):
                    print("not executing get_info", type(cl_obj), info_name)
                    print("(known crash quirk for %s)" % platform.name)
                    continue

                try:
                    func(info)
                except:
                    msg = "failed get_info", type(cl_obj), info_name

                    if find_quirk(QUIRKS, cl_obj, info):
                        msg += ("(known quirk for %s)" % platform.name)
                    else:
                        failure_count[0] += 1

                if try_attr_form:
                    try:
                        getattr(cl_obj, info_name.lower())
                    except:
                        print("failed attr-based get_info", type(cl_obj),
                              info_name)

                        if find_quirk(QUIRKS, cl_obj, info):
                            print("(known quirk for %s)" % platform.name)
                        else:
                            failure_count[0] += 1

    do_test(platform, cl.platform_info)
    do_test(device, cl.device_info)
    do_test(ctx, cl.context_info)

    props = 0
    if (device.queue_properties
            & cl.command_queue_properties.PROFILING_ENABLE):
        profiling = True
        props = cl.command_queue_properties.PROFILING_ENABLE
    queue = cl.CommandQueue(ctx, properties=props)
    do_test(queue, cl.command_queue_info)

    prg = cl.Program(
        ctx, """
        __kernel void sum(__global float *a)
        { a[get_global_id(0)] *= 2; }
        """).build()
    do_test(prg, cl.program_info)
    do_test(prg,
            cl.program_build_info,
            lambda info: prg.get_build_info(device, info),
            try_attr_form=False)

    n = 2000
    a_buf = cl.Buffer(ctx, 0, n * 4)

    do_test(a_buf, cl.mem_info)

    kernel = prg.sum
    do_test(kernel, cl.kernel_info)

    evt = kernel(queue, (n, ), None, a_buf)
    do_test(evt, cl.event_info)

    if profiling:
        evt.wait()
        do_test(evt,
                cl.profiling_info,
                lambda info: evt.get_profiling_info(info),
                try_attr_form=False)

    # crashes on intel...
    # and pocl does not support CL_ADDRESS_CLAMP
    if device.image_support and platform.vendor not in [
            "Intel(R) Corporation",
            "The pocl project",
    ]:
        smp = cl.Sampler(ctx, False, cl.addressing_mode.CLAMP,
                         cl.filter_mode.NEAREST)
        do_test(smp, cl.sampler_info)

        img_format = cl.get_supported_image_formats(
            ctx, cl.mem_flags.READ_ONLY, cl.mem_object_type.IMAGE2D)[0]

        img = cl.Image(ctx, cl.mem_flags.READ_ONLY, img_format, (128, 256))
        assert img.shape == (128, 256)

        img.depth
        img.image.depth
        do_test(img, cl.image_info, lambda info: img.get_image_info(info))
Пример #30
0
def gpu_filter(in_put='in.jpg'):
    gpu_filter_ = GpuFilter()
    image_objects = [0, 0]
    # if len(sys.argv) != 3:
    #     print "  : " + sys.argv[0] + " <inputImageFile> <outputImageFile>"
    #     exit()

    # Create an OpenCL context on first available platform
    context, device = gpu_filter_.create_context()

    if context is None:
        print "Failed to create OpenCL context."
        exit()

    # Create a command-queue on the first device available on the context that has been created
    command_queue = cl.CommandQueue(context, device)

    # Make sure the device supports images, otherwise exit
    if not device.get_info(cl.device_info.IMAGE_SUPPORT):
        print "OpenCL device does not support images."
        exit()

    # Load input image from file and load it into an OpenCL image object
    image_objects[0], img_size = gpu_filter_.load_image(context, in_put)
    # print image_objects[0], img_size

    # Create output image object
    cl_image_format = cl.ImageFormat(cl.channel_order.RGBA,
                                     cl.channel_type.UNORM_INT8)

    image_objects[1] = cl.Image(context, cl.mem_flags.WRITE_ONLY,
                                cl_image_format, img_size)

    # Create sampler for sampling image object
    sampler = cl.Sampler(
        context,
        False,  # Non-normalized coordinates
        cl.addressing_mode.CLAMP,
        cl.filter_mode.NEAREST)

    # Create OpenCL program
    program = gpu_filter_.create_program(context, device, "ImageFilter2D.cl")

    # Call the kernel directly
    local_work_size = (16, 16)
    global_work_size = (gpu_filter_.round_up(local_work_size[0], img_size[0]),
                        gpu_filter_.round_up(local_work_size[1], img_size[1]))
    program.gaussian_filter(command_queue, global_work_size, local_work_size,
                            image_objects[0], image_objects[1], sampler,
                            numpy.int32(img_size[0]), numpy.int32(img_size[1]))

    # Read the output buffer back to the Host
    cl_buffer = numpy.zeros(img_size[0] * img_size[1] * 4, numpy.uint8)
    origin = (0, 0, 0)
    region = (img_size[0], img_size[1], 1)
    cl.enqueue_read_image(command_queue, image_objects[1], origin, region,
                          cl_buffer).wait()
    print "Executed program successfully."

    # return the image matrix
    return gpu_filter_.image_convert(cl_buffer, img_size)