def findSample(self, meanN, nMax):
     auxRandNumbers = cl_array.Array(self.queue, [meanN.shape[0], nMax],
                                     numpy.float32)
     self.generator.fill_uniform(auxRandNumbers)
     sample = cl_array.Array(self.queue, [
         meanN.shape[0],
     ],
                             dtype=numpy.int32)
     self.program.countEmissions(self.queue, (meanN.size, ), None,
                                 meanN.data, auxRandNumbers.data,
                                 numpy.int32(nMax), sample.data,
                                 numpy.int32(meanN.shape[0]))
     return sample
示例#2
0
def propagate(samples, shape, energies, distance, pixel_size, region=None,
              apply_phase_factor=False, mollified=True, detector=None, offset=None,
              queue=None, out=None, t=None, check=True, block=False):
    """Propagate *samples* with *shape* as (y, x) which are
    :class:`syris.opticalelements.OpticalElement` instances at *energies* to *distance*. Use
    *pixel_size*, limit coherence to *region*, *apply_phase_factor* is as by the Fresnel
    approximation phase factor, *offset* is the sample offset. *queue* an OpenCL command queue,
    *out* a PyOpenCL Array. If *block* is True, wait for the kernels to finish. If *check* is True,
    check the transmission function sampling.
    """
    if queue is None:
        queue = cfg.OPENCL.queue
    u = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx)
    intensity = cl_array.zeros(queue, shape, cfg.PRECISION.np_float)

    for energy in energies:
        u.fill(0)
        u = transfer_many(samples, shape, pixel_size, energy, offset=offset,
                            queue=queue, out=u, t=t, check=check, block=block)
        if distance != 0 * q.m:
            lam = energy_to_wavelength(energy)
            propagator = compute_propagator(u.shape[0], distance, lam, pixel_size, region=region,
                                            apply_phase_factor=apply_phase_factor,
                                            mollified=mollified, queue=queue, block=block)
            fft_2(u, queue=queue, block=block)
            u *= propagator
            ifft_2(u, queue=queue, block=block)
        if detector:
            intensity += detector.convert(abs(u) ** 2, energy)
        else:
            intensity += abs(u) ** 2

    return intensity
示例#3
0
def get_array(bh_ary, queue):
    _import_pyopencl_module()
    from pyopencl import array as clarray
    return clarray.Array(queue,
                         bh_ary.shape,
                         bh_ary.dtype,
                         data=get_buffer(bh_ary))
示例#4
0
def transfer_many(objects, shape, pixel_size, energy, exponent=False, offset=None, queue=None,
                  out=None, t=None, check=True, block=False):
    """Compute transmission from more *objects*. If *exponent* is True, compute only the exponent,
    if it is False, evaluate the exponent. Use *shape* (y, x), *pixel_size*, *energy*, *offset* as
    (y, x), OpenCL command *queue*, *out* array, time *t*, check the sampling if *check* is True and
    wait for OpenCL kernels if *block* is True. Returned *out* array is different from the input one
    because of the pyopencl.clmath behavior.
    """
    if queue is None:
        queue = cfg.OPENCL.queue
    if out is None:
        out = cl_array.zeros(queue, shape, cfg.PRECISION.np_cplx)
    u_sample = cl_array.Array(queue, shape, cfg.PRECISION.np_cplx)
    lam = energy_to_wavelength(energy)

    for i, sample in enumerate(objects):
        out += sample.transfer(shape, pixel_size, energy, exponent=True, offset=offset, t=t,
                               queue=queue, out=u_sample, check=False, block=block)

    if check and not is_wavefield_sampling_ok(out, queue=queue):
        LOG.error('Insufficient transmission function sampling')

    # Apply the exponent
    if not exponent:
        out = clmath.exp(out, queue=queue)

    return out
示例#5
0
def test_negative_dim_rejection(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    with pytest.raises(ValueError):
        cl_array.Array(queue, shape=-10, dtype=np.float64)

    with pytest.raises(ValueError):
        cl_array.Array(queue, shape=(-10, ), dtype=np.float64)

    for left_dim in (-1, 0, 1):
        with pytest.raises(ValueError):
            cl_array.Array(queue, shape=(left_dim, -1), dtype=np.float64)

    for right_dim in (-1, 0, 1):
        with pytest.raises(ValueError):
            cl_array.Array(queue, shape=(-1, right_dim), dtype=np.float64)
示例#6
0
def project_metaballs_naive(metaballs,
                            shape,
                            pixel_size,
                            offset=None,
                            z_step=None,
                            queue=None,
                            out=None,
                            block=False):
    """Project a list of :class:`.MetaBall` on an image plane with *shape*, *pixel_size*. *z_step*
    is the physical step in the z-dimension, if not specified it is the same as *pixel_size*.
    *offset* is the physical spatial body offset as (y, x). Use OpenCL *queue* and *out* pyopencl
    Array instance for returning the result. If *block* is True, wait for the kernel to finish.
    """
    def get_extrema(sgn):
        func = np.max if sgn > 0 else np.min
        x_ps = util.make_tuple(pixel_size)[1]
        res = [(ball.position[2] + sgn *
                (2 * ball.radius + x_ps)).simplified.magnitude
               for ball in metaballs]

        return func(res)

    if offset is None:
        offset = (0, 0) * q.m
    if not queue:
        queue = cfg.OPENCL.queue
    if out is None:
        out = cl_array.Array(queue, shape, cfg.PRECISION.np_float)

    string = b"".join([body.pack() for body in metaballs])
    data = np.fromstring(string, dtype=np.float32)
    data = cl_array.to_device(queue, data)
    n, m = shape
    ps = util.make_tuple(pixel_size.simplified.magnitude)
    z_step = ps[1] if z_step is None else z_step.simplified.magnitude

    z_range = get_extrema(-1), get_extrema(1)
    offset = g_util.make_vfloat2(*offset.simplified.magnitude[::-1])

    ev = cfg.OPENCL.programs["geometry"].naive_metaballs(
        cfg.OPENCL.queue,
        (m, n),
        None,
        out.data,
        data.data,
        np.int32(len(metaballs)),
        offset,
        g_util.make_vfloat2(*z_range),
        cfg.PRECISION.np_float(z_step),
        g_util.make_vfloat2(*ps[::-1]),
        np.int32(True),
    )
    if block:
        ev.wait()

    return out
示例#7
0
def test_vector_fill(ctx_factory):
    context = ctx_factory()
    queue = cl.CommandQueue(context)

    a_gpu = cl_array.Array(queue, 100, dtype=cltypes.float4)
    a_gpu.fill(cltypes.make_float4(0.0, 0.0, 1.0, 0.0))
    a = a_gpu.get()
    assert a.dtype == cltypes.float4

    a_gpu = cl_array.zeros(queue, 100, dtype=cltypes.float4)
示例#8
0
def compute_propagator(size, distance, lam, pixel_size, fresnel=True, region=None,
                       apply_phase_factor=False, mollified=True, queue=None, block=False):
    """Create a propagator with (*size*, *size*) dimensions for propagation *distance*, wavelength
    *lam* and *pixel_size*. If *fresnel* is True, use the Fresnel approximation, if it is False, use
    the full propagator (don't approximate the square root). *region* is the diameter of the the
    wavefront area which is capable of interference. If *apply_phase_factor* is True, apply the
    phase factor defined by Fresnel approximation. If *mollified* is True the aliased frequencies
    are suppressed. If command *queue* is specified, execute the kernel on it. If *block* is True,
    wait for the kernel to finish.
    """
    if size % 2:
        raise ValueError('Only even sizes are supported')
    if queue is None:
        queue = cfg.OPENCL.queue

    # Check the sampling
    r_cutoff = compute_aliasing_limit(size, lam, pixel_size, distance, fov=region, fourier=False)
    min_n = 4
    if r_cutoff < min_n:
        LOG.error('Propagator too narrow, propagation distance too small or pixel size too large')
    f_cutoff = compute_aliasing_limit(size, lam, pixel_size, distance, fov=region, fourier=True)
    if f_cutoff < min_n:
        LOG.error('Propagator too wide, propagation distance too large or pixel size too small')

    out = cl_array.Array(queue, (size, size), cfg.PRECISION.np_cplx)
    if apply_phase_factor:
        phase_factor = np.exp(2 * np.pi * distance.simplified / lam.simplified * 1j)
    else:
        phase_factor = 0 + 0j

    ev = cfg.OPENCL.programs['physics'].propagator(queue,
                                                   (size / 2 + 1, size / 2 + 1),
                                                   None,
                                                   out.data,
                                                   cfg.PRECISION.np_float(distance.simplified),
                                                   cfg.PRECISION.np_float(lam.simplified),
                                                   cfg.PRECISION.np_float(pixel_size.simplified),
                                                   g_util.make_vcomplex(phase_factor),
                                                   np.int32(fresnel))
    if block:
        ev.wait()

    if mollified:
        fwtm = compute_aliasing_limit(size, lam, pixel_size, distance,
                                      fov=size * pixel_size, fourier=True)
        if region is not None:
            fwtm_region = compute_aliasing_limit(size, lam, pixel_size, distance, region,
                                                 fourier=True)
            fwtm = min(fwtm_region, fwtm)

        sigma = fwnm_to_sigma(fwtm, n=10)
        mollifier = get_gauss_2d(size, sigma, fourier=False, queue=queue, block=block)
        out = out * mollifier

    return out
示例#9
0
def project_metaballs(metaballs,
                      shape,
                      pixel_size,
                      offset=None,
                      queue=None,
                      out=None,
                      block=False):
    """Project a list of :class:`.MetaBall` on an image plane with *shape*, *pixel_size*.  *offset*
    is the physical spatial body offset as (y, x). Use OpenCL *queue* and *out* pyopencl Array
    instance for returning the result. If *block* is True, wait for the kernel to finish.
    """
    string = b"".join([body.pack() for body in metaballs])
    n, m = shape
    ps = pixel_size.simplified.magnitude
    if offset is None:
        offset = (0, 0) * q.m
    if not queue:
        queue = cfg.OPENCL.queue

    bodies_mem = cl.Buffer(cfg.OPENCL.ctx,
                           cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR,
                           hostbuf=string)
    pbodies_mem = cl.Buffer(cfg.OPENCL.ctx,
                            cl.mem_flags.READ_WRITE,
                            size=m * n * cfg.MAX_META_BODIES * 4 * 7)
    left_mem = cl.Buffer(cfg.OPENCL.ctx,
                         cl.mem_flags.READ_WRITE,
                         size=m * n * 2 * cfg.MAX_META_BODIES)
    right_mem = cl.Buffer(cfg.OPENCL.ctx,
                          cl.mem_flags.READ_WRITE,
                          size=m * n * 2 * cfg.MAX_META_BODIES)
    offset = g_util.make_vfloat2(*offset.simplified.magnitude[::-1])
    if out is None:
        out = cl_array.Array(queue, shape, cfg.PRECISION.np_float)

    ev = cfg.OPENCL.programs["geometry"].metaballs(
        cfg.OPENCL.queue,
        (m, n),
        None,
        out.data,
        bodies_mem,
        pbodies_mem,
        left_mem,
        right_mem,
        np.int32(len(metaballs)),
        offset,
        cl_array.vec.make_int2(0, 0),
        cl_array.vec.make_int4(0, 0, m, n),
        g_util.make_vfloat2(ps[1], ps[0]),
        np.int32(True),
    )
    if block:
        ev.wait()

    return out
示例#10
0
 def compute_RHS(self, line_da, f, dx):
     f_local = line_da.create_local_vector()
     line_da.global_to_local(f, f_local)
     f_d = cl_array.to_device(self.queue, f_local)
     x_d = cl_array.Array(self.queue, (line_da.nz, line_da.ny, line_da.nx),
                          dtype=np.float64)
     self.compute_RHS_kernel(self.queue,
                             (line_da.nx, line_da.ny, line_da.nz), None,
                             f_d.data, x_d.data, np.float64(dx),
                             np.int32(line_da.rank), np.int32(line_da.size))
     return x_d
示例#11
0
    def make_sequence(
        self,
        t_start,
        t_end,
        shape=None,
        shot_noise=True,
        amplifier_noise=True,
        source_blur=True,
        queue=None,
    ):
        """Make images between times *t_start* and *t_end*."""
        if queue is None:
            queue = cfg.OPENCL.queue
        shape_0 = self.detector.camera.shape
        if shape is None:
            shape = shape_0
        ps_0 = self.detector.pixel_size
        ps = shape_0[0] / float(shape[0]) * ps_0
        fps = self.detector.camera.fps
        frame_time = 1 / fps
        times = (np.arange(
            t_start.simplified.magnitude,
            t_end.simplified.magnitude,
            frame_time.simplified.magnitude,
        ) * q.s)
        image = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_float)
        source_blur_kernel = None
        if source_blur:
            source_blur_kernel = self.make_source_blur(shape,
                                                       ps,
                                                       queue=queue,
                                                       block=False)

        fmt = "Making sequence with shape {} and pixel size {} from {} to {}"
        LOG.debug(fmt.format(shape, ps, t_start, t_end))

        for t_0 in times:
            image.fill(0)
            t = t_0
            t_next = self.get_next_time(t, ps)
            while t_next < t_0 + frame_time:
                LOG.debug("Motion blur: {} -> {}".format(t, t_next))
                image += self.compute_intensity(t, t_next, shape, ps)
                t = t_next
                t_next = self.get_next_time(t, ps)
            image += self.compute_intensity(t, t_0 + frame_time, shape, ps)
            if source_blur:
                image = ip.ifft_2(ip.fft_2(image) * source_blur_kernel).real
            camera_image = self.detector.camera.get_image(
                image, shot_noise=shot_noise, amplifier_noise=amplifier_noise)
            LOG.debug("Image: {} -> {}".format(t_0, t_0 + frame_time))
            yield camera_image
示例#12
0
def transfer(thickness, refractive_index, wavelength, exponent=False, queue=None, out=None,
             check=True, block=False):
    """Transfer *thickness* (can be either a numpy or pyopencl array) with *refractive_index* and
    given *wavelength*. If *exponent* is True, compute the exponent of the function without applying
    the wavenumber. Use command *queue* for computation and *out* pyopencl array. If *block* is
    True, wait for the kernel to finish. If *check* is True, the function is checked for aliasing
    artefacts. Returned *out* array is different from the input one because of the pyopencl.clmath
    behavior.
    """
    if queue is None:
        queue = cfg.OPENCL.queue

    if isinstance(thickness, cl_array.Array):
        thickness_mem = thickness
    else:
        prep = thickness.simplified.magnitude.astype(cfg.PRECISION.np_float)
        thickness_mem = cl_array.to_device(queue, prep)

    if out is None:
        out = cl_array.Array(queue, thickness_mem.shape, cfg.PRECISION.np_cplx)

    if exponent or check:
        wavenumber = cfg.PRECISION.np_float(2 * np.pi / wavelength.simplified.magnitude)
        ev = cfg.OPENCL.programs['physics'].transmission_add(queue,
                                                             thickness_mem.shape[::-1],
                                                             None,
                                                             out.data,
                                                             thickness_mem.data,
                                                             cfg.PRECISION.np_cplx(
                                                                 refractive_index),
                                                             wavenumber,
                                                             np.int32(1))
        if check and not is_wavefield_sampling_ok(out, queue=queue):
            LOG.error('Insufficient transmission function sampling')
        if not exponent:
            # Apply the exponent
            out = clmath.exp(out, queue=queue)
    else:
        ev = cfg.OPENCL.programs['physics'].transfer(queue,
                                                     thickness_mem.shape[::-1],
                                                     None,
                                                     out.data,
                                                     thickness_mem.data,
                                                     cfg.PRECISION.np_cplx(refractive_index),
                                                     cfg.PRECISION.np_float(
                                                         wavelength.simplified.magnitude))
    if block:
        ev.wait()

    return out
示例#13
0
def empty(shape,
          dtype,
          cq: CommandQueue = None,
          order="C",
          allocator=None,
          data=None,
          offset=0,
          strides=None,
          events=None,
          _flags=None):
    cq = get_current_queue() if cq is None else cq
    res = Array.from_array(cl_array.Array(cq, shape, dtype, order, allocator))
    res.add_latest_event('empty')
    return res
示例#14
0
    def __init__(self,
                 size,
                 dim,
                 mask=(OCLC_X | OCLC_V | OCLC_A | OCLC_M),
                 dtype=np.float32):

        self.__dtype = dtype

        self.__size = size
        self.__dim = dim

        self.__opt_arrays = dict()

        self.__cl_context = cl.create_some_context()
        self.__cl_queue = cl.CommandQueue(
            self.__cl_context,
            properties=cl.command_queue_properties.PROFILING_ENABLE)

        if mask & OCLC_V:
            self.__V_cla = cla.Array(self.__cl_queue, (size, dim), dtype)
        else:
            self.__V_cla = None

        if mask & OCLC_X:
            self.__X_cla = cla.Array(self.__cl_queue, (size, dim), dtype)
        else:
            self.__X_cla = None

        if mask & OCLC_A:
            self.__A_cla = cla.Array(self.__cl_queue, (size, dim), dtype)
        else:
            self.__A_cla = None

        if mask & OCLC_M:
            self.__M_cla = cla.Array(self.__cl_queue, (size, 1), dtype)
        else:
            self.__M_cla = None
示例#15
0
 def forward(ctx, input):
     ctx.save_for_backward(input)
     krnl = cl_reduct_krnl_build(ctx.cl_ctx,
                                 np.float32,
                                 neutral="0",
                                 reduce_expr="a+b",
                                 map_expr="x[i]",
                                 arguments="__global float *x")
     ret = krnl(
         pycl_array.Array(ctx.cl_queue,
                          input.size,
                          dtype=np.float32,
                          data=input)).data
     ret.shape = (1, )
     ret.dtype = np.float32
     return ret
    def computeAcc(self, xd, yd, zd, vxd, vyd, vzd, qd, md, axd, ayd, azd, t,
                   dt):

        # Compute average numbers of scattered photons
        nbars = cl_array.zeros_like(xd)
        if self.sigma == None:
            self.program.compute_mean_scattered_photons_homogeneous_beam(
                self.queue, (xd.size, ), None, xd.data,
                yd.data, zd.data, vxd.data, vyd.data, vzd.data,
                numpy.float32(self.k0[0]), numpy.float32(self.k0[1]),
                numpy.float32(self.k0[2]), numpy.float32(self.gamma),
                numpy.float32(self.delta0), numpy.float32(self.S),
                numpy.float32(dt), numpy.int32(xd.size), nbars.data)
        else:
            self.program.compute_mean_scattered_photons_gaussian_beam(
                self.queue, (xd.size, ), None, xd.data,
                yd.data, zd.data, vxd.data, vyd.data, vzd.data,
                numpy.float32(self.k0[0]), numpy.float32(self.k0[1]),
                numpy.float32(self.k0[2]), numpy.float32(self.x0[0]),
                numpy.float32(self.x0[1]), numpy.float32(self.x0[2]),
                numpy.float32(self.sigma), numpy.float32(self.gamma),
                numpy.float32(self.delta0), numpy.float32(self.S),
                numpy.float32(dt), numpy.int32(xd.size), nbars.data)

        # Compute scattered photons and associated recoil kicks
        nMax = int(
            math.ceil(10.0 * self.S * (self.gamma / 2.0 / numpy.pi) * dt))
        actualNs = self.findSample(nbars, nMax)
        recoilDirectionsD = cl_array.Array(self.queue, [nbars.size, nMax, 3],
                                           dtype=numpy.float32)
        self.generator.fill_normal(recoilDirectionsD)

        # apply recoil kicks to particles
        recoilMomentum = numpy.linalg.norm(
            self.k0) * self._PlanckConstantReduced
        self.program.computeKicks(self.queue, (xd.size, ),
                                  None, md.data, actualNs.data,
                                  numpy.int32(nMax), recoilDirectionsD.data,
                                  numpy.float32(self.k0[0]),
                                  numpy.float32(self.k0[1]),
                                  numpy.float32(self.k0[2]),
                                  numpy.float32(recoilMomentum),
                                  numpy.float32(dt), axd.data, ayd.data,
                                  azd.data, numpy.int32(xd.shape[0]))
示例#17
0
文件: sources.py 项目: ufo-kit/syris
    def _transfer(
        self,
        shape,
        pixel_size,
        energy,
        offset,
        exponent=False,
        t=None,
        queue=None,
        out=None,
        check=True,
        block=False,
    ):
        """Compute the flat field wavefield. Returned *out* array is different from the input
        one.
        """
        if queue is None:
            queue = cfg.OPENCL.queue
        if out is None:
            out = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx)

        ps = make_tuple(pixel_size)
        if t is None:
            x, y, z = self.trajectory.control_points.simplified.magnitude[0]
        else:
            x, y, z = self.trajectory.get_point(t).simplified.magnitude
        x += offset[1].simplified.magnitude
        y += offset[0].simplified.magnitude
        center = (x, y, z)
        phase = self.phase_profile != "plane"
        parabola = self.phase_profile == "parabola"
        compute_exponent = exponent or check and phase

        self._transfer_real(shape, center, ps, energy, compute_exponent, phase,
                            parabola, out, queue, block)

        if compute_exponent:
            if check and phase and not is_wavefield_sampling_ok(out,
                                                                queue=queue):
                LOG.error("Insufficient beam phase sampling")
            if not exponent:
                out = clmath.exp(out, queue=queue)

        return out
示例#18
0
def try_offset(ctx):
    queue = cl.CommandQueue(ctx)

    a = cl_array.Array(queue, shape=(2048,), dtype="uint8")
    b = a[3:]

    try:
        b.data
    except Exception as ex:
        pass
        # print("Always fails due to ArrayHasOffsetError")
        # print(repr(ex))

    for offset in 3, 4, 8, 16, 32, 64, 128:
        try:
            b = a[offset:]
            b.base_data[b.offset : b.offset + b.nbytes]
            print(f"offset={b.offset} OK")
        except Exception as ex:
            # Fails in some platform due to clCreateSubBuffer MISALIGNED_SUB_BUFFER_OFFSET"
            print(f"offset={b.offset} not OK")
示例#19
0
    def add_array_by_name(self, key, size=None, dim=None, dtype=None):
        """
        Add a new array by name.
        
        :param key: The key (or name) of the new array
        :param size: (Default: current) The size of the new array, if not specified by default it uses the current size
        :param dim: (Default: current) The dim of the new array, if not specified by default it uses the current dim 
        :param dtype: (Dafault: current) The dtype of the new array, if not specified by default it uses the context dtype
        """

        if dim is None:
            dim = self.__dim

        if size is None:
            size = self.__size

        if dtype is None:
            dtype = self.dtype

        self.__opt_arrays[key] = cla.Array(self.__cl_queue, (size, dim),
                                           dtype=dtype)
示例#20
0
    def alloc_buf(self, size=None, like=None, wrap_in_array=True):
        mf = cl.mem_flags
        if like is not None:
            if like.base is not None:
                hbuf = like.base
            else:
                hbuf = like

            buf = cl.Buffer(self.ctx,
                            mf.READ_WRITE | mf.COPY_HOST_PTR,
                            hostbuf=hbuf)
            self.buffers[buf] = hbuf
            self.to_buf(buf)
            if wrap_in_array:
                self.arrays[buf] = clarray.Array(self.ctx,
                                                 like.shape,
                                                 like.dtype,
                                                 data=buf)
        else:
            buf = cl.Buffer(self.ctx, mf.READ_WRITE, size)

        return buf
示例#21
0
    def setup_device(self, imshape):

        print('Setting up with imshape = %s' % (str(imshape)))

        self.imshape = imshape

        self.clIm = cla.Array(self.q, imshape, numpy.float32)
        self.clm = cla.empty_like(self.clIm)
        self.clx = cla.empty_like(self.clIm)
        self.cly = cla.empty_like(self.clIm)
        self.clO = cla.zeros_like(self.clIm)
        self.clM = cla.zeros_like(self.clIm)
        self.clF = cla.empty_like(self.clIm)
        self.clS = cla.empty_like(self.clIm)
        self.clThisS = cla.empty_like(self.clIm)
        self.clScratch = cla.empty_like(self.clIm)

        self.radial_prg = pyopencl.Program(self.ctx, PROGRAM).build()

        self.sobel = Sobel(self.ctx, self.q)

        #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
        self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)

        self.accum = ElementwiseKernel(self.ctx, 'float *a, float *b',
                                       'a[i] += b[i]')

        self.norm_s = ElementwiseKernel(self.ctx,
                                        'float *s, const float nRadii',
                                        's[i] = -1 * s[i] / nRadii', 'norm_s')

        self.accum_s = ElementwiseKernel(self.ctx,
                                         'float *a, float *b, const float nr',
                                         'a[i] -= b[i] / nr')

        self.gaussians = {}
        self.gaussian_prgs = {}

        self.minmax = MinMaxKernel(self.ctx, self.q)
示例#22
0
    def setup_device(self, imshape):

        print('Setting up with imshape = %s' % (str(imshape)))

        self.cached_shape = imshape

        self.clIm = cla.Array(self.q, imshape, np.float32)
        self.clm = cla.empty_like(self.clIm)
        self.clx = cla.empty_like(self.clIm)
        self.cly = cla.empty_like(self.clIm)
        self.clO = cla.zeros_like(self.clIm)
        self.clM = cla.zeros_like(self.clIm)
        self.clF = cla.empty_like(self.clIm)
        self.clS = cla.empty_like(self.clIm)
        self.clThisS = cla.empty_like(self.clIm)
        self.clScratch = cla.empty_like(self.clIm)

        self.radial_prg = pyopencl.Program(self.ctx, RADIAL_PROGRAM).build()

        self.sobel = Sobel(self.ctx, self.q)

        #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q)
        self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q)

        self.accum = ElementwiseKernel(self.ctx,
                                       'float *a, float *b',
                                       'a[i] += b[i]')

        self.norm_s = ElementwiseKernel(self.ctx,
                                        'float *s, const float nRadii',
                                        's[i] = -1 * s[i] / nRadii',
                                        'norm_s')

        self.accum_s = ElementwiseKernel(self.ctx,
                                         'float *a, float *b, const float nr',
                                         'a[i] -= b[i] / nr')

        self.gaussians = {}
        self.gaussian_prgs = {}

        self.minmax = MinMaxKernel(self.ctx, self.q)

        # starburst storage

        clImageFormat = cl.ImageFormat(cl.channel_order.R,
                                       cl.channel_type.FLOAT)

        self.clIm2D = cl.Image(self.ctx,
                               mf.READ_ONLY,
                               clImageFormat,
                               imshape)

        # Create sampler for sampling image object
        self.imSampler = cl.Sampler(self.ctx,
                                    False,  # Non-normalized coordinates
                                    cl.addressing_mode.CLAMP_TO_EDGE,
                                    cl.filter_mode.LINEAR)

        self.cl_find_ray_boundaries = FindRayBoundaries(self.ctx, self.q)

        self.calcF = self.radial_prg.calcF
        self.calcOM = self.radial_prg.calcOM
示例#23
0
def arr_from_np(queue, nparr):
    if nparr.dtype == np.object:
        nparr = np.concatenate(nparr)
    buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=nparr)
    return clarray.Array(queue, nparr.shape, nparr.dtype, data=buf)
示例#24
0
    def _transfer(self,
                  shape,
                  pixel_size,
                  energy,
                  offset,
                  exponent=False,
                  t=None,
                  queue=None,
                  out=None,
                  check=True,
                  block=False):
        """Compute the flat field wavefield. Returned *out* array is different from the input one."""
        if queue is None:
            queue = cfg.OPENCL.queue

        ps = make_tuple(pixel_size)
        if t is None:
            x, y, z = self.trajectory.control_points.simplified.magnitude[0]
        else:
            x, y, z = self.trajectory.get_point(t).simplified.magnitude
        x += offset[1].simplified.magnitude
        y += offset[0].simplified.magnitude
        center = (x, y, z)
        cl_center = gutil.make_vfloat3(*center)
        cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1])
        fov = np.arange(0, shape[0]) * ps[0] - y * q.m
        angles = np.arctan((fov / self.sample_distance).simplified)
        profile = self._create_vertical_profile(energy, angles, ps[0]).rescale(
            1 / q.s).magnitude

        profile = cl_array.to_device(queue,
                                     profile.astype(cfg.PRECISION.np_float))
        if out is None:
            out = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx)

        z_sample = self.sample_distance.simplified.magnitude
        lam = energy_to_wavelength(energy).simplified.magnitude
        phase = self.phase_profile != 'plane'
        parabola = self.phase_profile == 'parabola'
        if exponent or check and phase:
            ev = cfg.OPENCL.programs['physics'].make_flat(
                queue, shape[::-1], None, out.data, profile.data, cl_center,
                cl_ps, cfg.PRECISION.np_float(z_sample),
                cfg.PRECISION.np_float(lam), np.int32(True), np.int32(phase),
                np.int32(parabola))
            if check and phase and not is_wavefield_sampling_ok(out,
                                                                queue=queue):
                LOG.error('Insufficient beam phase sampling')
            if not exponent:
                out = clmath.exp(out, queue=queue)
        else:
            ev = cfg.OPENCL.programs['physics'].make_flat(
                queue, shape[::-1], None,
                out.data, profile.data, cl_center, cl_ps,
                cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam),
                np.int32(exponent), np.int32(phase), np.int32(parabola))

        if block:
            ev.wait()

        return out
示例#25
0
    def solve_reduced_system(self, line_da, x_UH, x_LH, x_R_d, reduced_solver):
        nz, ny, nx = line_da.nz, line_da.ny, line_da.nx
        line_rank = line_da.rank
        line_size = line_da.size

        x_UH_line = np.zeros(2 * line_size, dtype=np.float64)
        x_LH_line = np.zeros(2 * line_size, dtype=np.float64)
        line_da.gather([np.array([x_UH[0], x_UH[-1]]), 2, MPI.DOUBLE],
                       [x_UH_line, 2, MPI.DOUBLE])
        line_da.gather([np.array([x_LH[0], x_LH[-1]]), 2, MPI.DOUBLE],
                       [x_LH_line, 2, MPI.DOUBLE])

        lengths = np.ones(line_size)
        displacements = np.arange(0, 2 * line_size, 2)
        start_z, start_y, start_x = 0, 0, displacements[line_rank]
        subarray_aux = MPI.DOUBLE.Create_subarray([nz, ny, 2 * line_size],
                                                  [nz, ny, 2],
                                                  [start_z, start_y, start_x])
        subarray = subarray_aux.Create_resized(0, 8)
        subarray.Commit()

        x_R_faces_d = cl_array.Array(self.queue, (nz, ny, 2), np.float64)
        self.copy_faces_kernel(self.queue, [1, ny, nz], None, x_R_d.data,
                               x_R_faces_d.data, np.int32(nx), np.int32(ny),
                               np.int32(nz), np.int32(line_da.mx),
                               np.int32(line_da.npx))
        x_R_faces = x_R_faces_d.get()
        x_R_faces_line = np.zeros([nz, ny, 2 * line_size], dtype=np.float64)
        line_da.gatherv([x_R_faces, MPI.DOUBLE],
                        [x_R_faces_line, lengths, displacements, subarray])

        if line_rank == 0:
            a_reduced = np.zeros(2 * line_size, dtype=np.float64)
            b_reduced = np.zeros(2 * line_size, dtype=np.float64)
            c_reduced = np.zeros(2 * line_size, dtype=np.float64)
            a_reduced[0::2] = -1.
            a_reduced[1::2] = x_UH_line[1::2]
            b_reduced[0::2] = x_UH_line[0::2]
            b_reduced[1::2] = x_LH_line[1::2]
            c_reduced[0::2] = x_LH_line[0::2]
            c_reduced[1::2] = -1.
            a_reduced[0], c_reduced[0] = 0.0, 0.0
            b_reduced[0] = 1.0
            a_reduced[-1], c_reduced[-1] = 0.0, 0.0
            b_reduced[-1] = 1.0
            a_reduced[1] = 0.
            c_reduced[-2] = 0.
            a_reduced_d = cl_array.to_device(self.queue, a_reduced)
            b_reduced_d = cl_array.to_device(self.queue, b_reduced)
            c_reduced_d = cl_array.to_device(self.queue, c_reduced)
            c2_reduced_d = cl_array.to_device(self.queue, c_reduced)
            d_reduced_d = cl_array.to_device(self.queue, x_R_faces_line)
            reduced_solver.solve(a_reduced_d, b_reduced_d, c_reduced_d,
                                 c2_reduced_d, d_reduced_d)
            params = d_reduced_d.get()
        else:
            params = None

        params_local = np.zeros([nz, ny, 2], dtype=np.float64)
        line_da.scatterv([params, lengths, displacements, subarray],
                         [params_local, MPI.DOUBLE])
        alpha = params_local[:, :, 0].copy()
        beta = params_local[:, :, 1].copy()
        return alpha, beta
示例#26
0
 def volume_empty(self):
     return cl_array.Array(self.ctx,
                           queue=self.queue,
                           shape=(self.K, self.block_size),
                           dtype=np.float32)
示例#27
0
def im2col_old(img, rec_field, n_filters, stride=1, zero_pad=0, wait_for=None):
    """

    :type stride: int
    :type zero_pad: int
    """
    dtype = 'float' if img.dtype == np.float32 else 'double'
    q = clplatf.qs[0]
    d1, h1, w1 = img.shape

    kh, kw = rec_field
    out_h = kh * kw * d1

    w2 = (w1 - kw + 2 * zero_pad) // stride + 1
    h2 = (h1 - kh + 2 * zero_pad) // stride + 1
    # TODO check if w2 or h2 is not int and raise something or zeropad...
    out_w = w2 * h2
    # alloc output
    col = clarray.Array(q, (out_h, out_w), img.dtype)

    prg = cl.Program(
        clplatf.ctx, """
    __kernel void im2col_k(__global %(dtype)s *img,
                            int h,
                            int w,
                            int o_h,
                            int o_w,
                            int kh,
                            int kw,
                            int stride,
                            int padding,
                            __global %(dtype)s *out) {
        int gid = get_global_id(0);
        int out_w = o_w * o_h;
        int out_h = kw * kh;
        int out_x = gid %% out_w;
        int out_y = gid / out_w %% out_h;
        int kx = out_y %% kw;
        int ky = (out_y / kh) %% kh;
        int ch = gid / (kh * kw * o_h * o_w);

        int in_x = kx + (out_x %% o_w)*stride - padding;
        int in_y = ky + (out_x / o_w)*stride - padding;

        if (in_x >= 0 && in_x < w && in_y >= 0 && in_y < h) {
            out[gid] = img[(h * ch + in_y) * w + in_x];
        } else {
            out[gid] = 0;
        }
    }
    """ % locals()).build()

    evt = prg.im2col_k(q, (out_h * out_w, ),
                       None,
                       img.data,
                       np.int32(h1),
                       np.int32(w1),
                       np.int32(h2),
                       np.int32(w2),
                       np.int32(kh),
                       np.int32(kw),
                       np.int32(stride),
                       np.int32(zero_pad),
                       col.data,
                       wait_for=wait_for)

    return col, evt
示例#28
0
文件: ocl.py 项目: ringw/reikna
 def array(self, shape, dtype, strides=None, allocator=None):
     return clarray.Array(self._queue,
                          shape,
                          dtype,
                          strides=strides,
                          allocator=allocator)
示例#29
0
    def make_tomography(self,
                        projections,
                        rotation,
                        pause,
                        num_ref_per_block=1,
                        num_proj_per_block=1,
                        num_dark_img=0,
                        start_frame=0,
                        shape=None,
                        shot_noise=True,
                        amplifier_noise=True,
                        source_blur=True,
                        queue=None):
        """Make sequence of *projections* projection images over 0 to *rotation* degrees. *pause*
        after each image. Proceed in image blocks, with *num_ref_per_block* flatfields and
        *num_proj_per_block* projections per block. Make *num_dark_img* dark images at the beginning.
        Start with *start_frame* (must be less or equal total number of images). """

        if queue is None:
            queue = cfg.OPENCL.queue
        shape_0 = self.detector.camera.shape
        if shape is None:
            shape = shape_0
        ps_0 = self.detector.pixel_size
        ps = shape_0[0] / float(shape[0]) * ps_0

        image = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_float)
        source_blur_kernel = None
        #if source_blur:
        #source_blur_kernel = self.make_source_blur(shape, ps, queue=queue, block=False)

        angles = np.linspace(0, rotation, num=projections) * q.deg
        angle_step_size = abs(angles[1] - angles[0])
        overall_no_images = num_dark_img + projections + \
            projections / num_proj_per_block * num_ref_per_block

        DARK_IMAGE = 0
        PROJECTION = 1
        FLATFIELD = 2

        darks = np.repeat(DARK_IMAGE, num_dark_img)
        blocks = np.repeat(PROJECTION, num_proj_per_block)
        blocks = np.append(blocks, np.repeat(FLATFIELD, num_ref_per_block))
        blocks = np.tile(blocks, projections / num_proj_per_block)
        image_type = np.append(darks, blocks)

        self.clock = 0 * q.s
        exptime = self.detector.camera._exp_time

        counter_darkimages = 0
        counter_projections = 0
        counter_flatfields = 0

        for i in np.arange(0, overall_no_images):

            if start_frame > i:
                self.clock += exptime + pause
                if image_type[i] == DARK_IMAGE:
                    counter_darkimages += 1
                elif image_type[i] == PROJECTION:
                    counter_projections += 1
                elif image_type[i] == FLATFIELD:
                    counter_flatfields += 1
                yield None, None

            else:
                image.fill(0)

                t_0 = self.clock
                t_next = self.get_next_time(self.clock, ps)
                image_name = None

                # Dark images:
                if image_type[i] == DARK_IMAGE:
                    image_name = 'dark_{:>05}.tif'.format(counter_darkimages)
                    counter_darkimages += 1
                    self.clock += exptime + pause

                # Projections:
                elif image_type[i] == PROJECTION:
                    # Turn sample
                    self.tomo_rotate(angles[counter_projections])
                    while t_next < t_0 + exptime:
                        LOG.debug('Motion blur: {} -> {}'.format(t_0, t_next))
                        image += self.compute_intensity(
                            self.clock, t_next, shape, ps)
                        self.clock = t_next
                        t_next = self.get_next_time(self.clock, ps)
                    image += self.compute_intensity(self.clock, t_0 + exptime,
                                                    shape, ps)
                    self.clock = t_0 + exptime + pause

                    #if source_blur:
                    #image = ip.ifft_2(ip.fft_2(image) * source_blur_kernel).real
                    image_name = 'proj_{:>05}.tif'.format(counter_projections)
                    counter_projections += 1
                    LOG.debug('Projection: {} -> {}'.format(
                        t_0, t_0 + exptime))

                # Flatfields:
                elif image_type[i] == FLATFIELD:
                    while t_next < t_0 + exptime:
                        LOG.debug('Motion blur: {} -> {}'.format(t_0, t_next))
                        image += self.compute_intensity(self.clock,
                                                        t_next,
                                                        shape,
                                                        ps,
                                                        flat=True)
                        self.clock = t_next
                        t_next = self.get_next_time(self.clock, ps)
                    image += self.compute_intensity(self.clock,
                                                    t_0 + exptime,
                                                    shape,
                                                    ps,
                                                    flat=True)
                    self.clock = t_0 + exptime + pause
                    image_name = 'ref_{:>05}.tif'.format(counter_flatfields)
                    counter_flatfields += 1

                else:
                    raise ValueError("Unknow image type requested. "\
                        "Options are: Dark image, projection, flatfield.")

                camera_image = self.detector.camera.get_image(
                    image,
                    shot_noise=shot_noise,
                    amplifier_noise=amplifier_noise)

                yield camera_image, image_name
示例#30
0
    def compute_intensity(self,
                          t_0,
                          t_1,
                          shape,
                          pixel_size,
                          queue=None,
                          block=False,
                          flat=False):
        """Compute intensity between times *t_0* and *t_1*."""

        exp_time = (t_1 - t_0).simplified.magnitude

        if queue is None:
            queue = cfg.OPENCL.queue
        u = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx)
        u_sample = cl_array.zeros(queue, shape, cfg.PRECISION.np_cplx)
        intensity = cl_array.zeros(queue, shape, cfg.PRECISION.np_float)

        for energy in self.energies:
            u.fill(1)
            for oeid, oe in enumerate(self.oe):

                if flat and oe == self.sample:
                    continue

                u *= oe.transfer(shape,
                                 pixel_size,
                                 energy,
                                 t=t_0,
                                 queue=queue,
                                 out=u_sample,
                                 check=False,
                                 block=block)

                # Propagate and blur optical element when not source
                if self.distances[oeid] != 0 * q.m and oe != self.source:
                    lam = energy_to_wavelength(energy)
                    propagator = compute_propagator(u.shape[0],
                                                    self.distances[oeid],
                                                    lam,
                                                    pixel_size,
                                                    queue=queue,
                                                    block=block,
                                                    mollified=True)

                    ip.fft_2(u, queue=queue, block=block)

                    sdistance = np.sum(self.distances[:oeid + 1])
                    fwhm = (self.distances[oeid] * self.source.size /
                            sdistance).simplified
                    sigma = smath.fwnm_to_sigma(fwhm, n=2)
                    psf = ip.get_gauss_2d(shape,
                                          sigma,
                                          pixel_size=pixel_size,
                                          fourier=True,
                                          queue=queue,
                                          block=block)
                    u *= psf
                    u *= propagator
                    ip.ifft_2(u, queue=queue, block=block)

            intensity += self.detector.convert(abs(u)**2, energy)

        return intensity * exp_time