def findSample(self, meanN, nMax): auxRandNumbers = cl_array.Array(self.queue, [meanN.shape[0], nMax], numpy.float32) self.generator.fill_uniform(auxRandNumbers) sample = cl_array.Array(self.queue, [ meanN.shape[0], ], dtype=numpy.int32) self.program.countEmissions(self.queue, (meanN.size, ), None, meanN.data, auxRandNumbers.data, numpy.int32(nMax), sample.data, numpy.int32(meanN.shape[0])) return sample
def propagate(samples, shape, energies, distance, pixel_size, region=None, apply_phase_factor=False, mollified=True, detector=None, offset=None, queue=None, out=None, t=None, check=True, block=False): """Propagate *samples* with *shape* as (y, x) which are :class:`syris.opticalelements.OpticalElement` instances at *energies* to *distance*. Use *pixel_size*, limit coherence to *region*, *apply_phase_factor* is as by the Fresnel approximation phase factor, *offset* is the sample offset. *queue* an OpenCL command queue, *out* a PyOpenCL Array. If *block* is True, wait for the kernels to finish. If *check* is True, check the transmission function sampling. """ if queue is None: queue = cfg.OPENCL.queue u = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx) intensity = cl_array.zeros(queue, shape, cfg.PRECISION.np_float) for energy in energies: u.fill(0) u = transfer_many(samples, shape, pixel_size, energy, offset=offset, queue=queue, out=u, t=t, check=check, block=block) if distance != 0 * q.m: lam = energy_to_wavelength(energy) propagator = compute_propagator(u.shape[0], distance, lam, pixel_size, region=region, apply_phase_factor=apply_phase_factor, mollified=mollified, queue=queue, block=block) fft_2(u, queue=queue, block=block) u *= propagator ifft_2(u, queue=queue, block=block) if detector: intensity += detector.convert(abs(u) ** 2, energy) else: intensity += abs(u) ** 2 return intensity
def get_array(bh_ary, queue): _import_pyopencl_module() from pyopencl import array as clarray return clarray.Array(queue, bh_ary.shape, bh_ary.dtype, data=get_buffer(bh_ary))
def transfer_many(objects, shape, pixel_size, energy, exponent=False, offset=None, queue=None, out=None, t=None, check=True, block=False): """Compute transmission from more *objects*. If *exponent* is True, compute only the exponent, if it is False, evaluate the exponent. Use *shape* (y, x), *pixel_size*, *energy*, *offset* as (y, x), OpenCL command *queue*, *out* array, time *t*, check the sampling if *check* is True and wait for OpenCL kernels if *block* is True. Returned *out* array is different from the input one because of the pyopencl.clmath behavior. """ if queue is None: queue = cfg.OPENCL.queue if out is None: out = cl_array.zeros(queue, shape, cfg.PRECISION.np_cplx) u_sample = cl_array.Array(queue, shape, cfg.PRECISION.np_cplx) lam = energy_to_wavelength(energy) for i, sample in enumerate(objects): out += sample.transfer(shape, pixel_size, energy, exponent=True, offset=offset, t=t, queue=queue, out=u_sample, check=False, block=block) if check and not is_wavefield_sampling_ok(out, queue=queue): LOG.error('Insufficient transmission function sampling') # Apply the exponent if not exponent: out = clmath.exp(out, queue=queue) return out
def test_negative_dim_rejection(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) with pytest.raises(ValueError): cl_array.Array(queue, shape=-10, dtype=np.float64) with pytest.raises(ValueError): cl_array.Array(queue, shape=(-10, ), dtype=np.float64) for left_dim in (-1, 0, 1): with pytest.raises(ValueError): cl_array.Array(queue, shape=(left_dim, -1), dtype=np.float64) for right_dim in (-1, 0, 1): with pytest.raises(ValueError): cl_array.Array(queue, shape=(-1, right_dim), dtype=np.float64)
def project_metaballs_naive(metaballs, shape, pixel_size, offset=None, z_step=None, queue=None, out=None, block=False): """Project a list of :class:`.MetaBall` on an image plane with *shape*, *pixel_size*. *z_step* is the physical step in the z-dimension, if not specified it is the same as *pixel_size*. *offset* is the physical spatial body offset as (y, x). Use OpenCL *queue* and *out* pyopencl Array instance for returning the result. If *block* is True, wait for the kernel to finish. """ def get_extrema(sgn): func = np.max if sgn > 0 else np.min x_ps = util.make_tuple(pixel_size)[1] res = [(ball.position[2] + sgn * (2 * ball.radius + x_ps)).simplified.magnitude for ball in metaballs] return func(res) if offset is None: offset = (0, 0) * q.m if not queue: queue = cfg.OPENCL.queue if out is None: out = cl_array.Array(queue, shape, cfg.PRECISION.np_float) string = b"".join([body.pack() for body in metaballs]) data = np.fromstring(string, dtype=np.float32) data = cl_array.to_device(queue, data) n, m = shape ps = util.make_tuple(pixel_size.simplified.magnitude) z_step = ps[1] if z_step is None else z_step.simplified.magnitude z_range = get_extrema(-1), get_extrema(1) offset = g_util.make_vfloat2(*offset.simplified.magnitude[::-1]) ev = cfg.OPENCL.programs["geometry"].naive_metaballs( cfg.OPENCL.queue, (m, n), None, out.data, data.data, np.int32(len(metaballs)), offset, g_util.make_vfloat2(*z_range), cfg.PRECISION.np_float(z_step), g_util.make_vfloat2(*ps[::-1]), np.int32(True), ) if block: ev.wait() return out
def test_vector_fill(ctx_factory): context = ctx_factory() queue = cl.CommandQueue(context) a_gpu = cl_array.Array(queue, 100, dtype=cltypes.float4) a_gpu.fill(cltypes.make_float4(0.0, 0.0, 1.0, 0.0)) a = a_gpu.get() assert a.dtype == cltypes.float4 a_gpu = cl_array.zeros(queue, 100, dtype=cltypes.float4)
def compute_propagator(size, distance, lam, pixel_size, fresnel=True, region=None, apply_phase_factor=False, mollified=True, queue=None, block=False): """Create a propagator with (*size*, *size*) dimensions for propagation *distance*, wavelength *lam* and *pixel_size*. If *fresnel* is True, use the Fresnel approximation, if it is False, use the full propagator (don't approximate the square root). *region* is the diameter of the the wavefront area which is capable of interference. If *apply_phase_factor* is True, apply the phase factor defined by Fresnel approximation. If *mollified* is True the aliased frequencies are suppressed. If command *queue* is specified, execute the kernel on it. If *block* is True, wait for the kernel to finish. """ if size % 2: raise ValueError('Only even sizes are supported') if queue is None: queue = cfg.OPENCL.queue # Check the sampling r_cutoff = compute_aliasing_limit(size, lam, pixel_size, distance, fov=region, fourier=False) min_n = 4 if r_cutoff < min_n: LOG.error('Propagator too narrow, propagation distance too small or pixel size too large') f_cutoff = compute_aliasing_limit(size, lam, pixel_size, distance, fov=region, fourier=True) if f_cutoff < min_n: LOG.error('Propagator too wide, propagation distance too large or pixel size too small') out = cl_array.Array(queue, (size, size), cfg.PRECISION.np_cplx) if apply_phase_factor: phase_factor = np.exp(2 * np.pi * distance.simplified / lam.simplified * 1j) else: phase_factor = 0 + 0j ev = cfg.OPENCL.programs['physics'].propagator(queue, (size / 2 + 1, size / 2 + 1), None, out.data, cfg.PRECISION.np_float(distance.simplified), cfg.PRECISION.np_float(lam.simplified), cfg.PRECISION.np_float(pixel_size.simplified), g_util.make_vcomplex(phase_factor), np.int32(fresnel)) if block: ev.wait() if mollified: fwtm = compute_aliasing_limit(size, lam, pixel_size, distance, fov=size * pixel_size, fourier=True) if region is not None: fwtm_region = compute_aliasing_limit(size, lam, pixel_size, distance, region, fourier=True) fwtm = min(fwtm_region, fwtm) sigma = fwnm_to_sigma(fwtm, n=10) mollifier = get_gauss_2d(size, sigma, fourier=False, queue=queue, block=block) out = out * mollifier return out
def project_metaballs(metaballs, shape, pixel_size, offset=None, queue=None, out=None, block=False): """Project a list of :class:`.MetaBall` on an image plane with *shape*, *pixel_size*. *offset* is the physical spatial body offset as (y, x). Use OpenCL *queue* and *out* pyopencl Array instance for returning the result. If *block* is True, wait for the kernel to finish. """ string = b"".join([body.pack() for body in metaballs]) n, m = shape ps = pixel_size.simplified.magnitude if offset is None: offset = (0, 0) * q.m if not queue: queue = cfg.OPENCL.queue bodies_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf=string) pbodies_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_WRITE, size=m * n * cfg.MAX_META_BODIES * 4 * 7) left_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_WRITE, size=m * n * 2 * cfg.MAX_META_BODIES) right_mem = cl.Buffer(cfg.OPENCL.ctx, cl.mem_flags.READ_WRITE, size=m * n * 2 * cfg.MAX_META_BODIES) offset = g_util.make_vfloat2(*offset.simplified.magnitude[::-1]) if out is None: out = cl_array.Array(queue, shape, cfg.PRECISION.np_float) ev = cfg.OPENCL.programs["geometry"].metaballs( cfg.OPENCL.queue, (m, n), None, out.data, bodies_mem, pbodies_mem, left_mem, right_mem, np.int32(len(metaballs)), offset, cl_array.vec.make_int2(0, 0), cl_array.vec.make_int4(0, 0, m, n), g_util.make_vfloat2(ps[1], ps[0]), np.int32(True), ) if block: ev.wait() return out
def compute_RHS(self, line_da, f, dx): f_local = line_da.create_local_vector() line_da.global_to_local(f, f_local) f_d = cl_array.to_device(self.queue, f_local) x_d = cl_array.Array(self.queue, (line_da.nz, line_da.ny, line_da.nx), dtype=np.float64) self.compute_RHS_kernel(self.queue, (line_da.nx, line_da.ny, line_da.nz), None, f_d.data, x_d.data, np.float64(dx), np.int32(line_da.rank), np.int32(line_da.size)) return x_d
def make_sequence( self, t_start, t_end, shape=None, shot_noise=True, amplifier_noise=True, source_blur=True, queue=None, ): """Make images between times *t_start* and *t_end*.""" if queue is None: queue = cfg.OPENCL.queue shape_0 = self.detector.camera.shape if shape is None: shape = shape_0 ps_0 = self.detector.pixel_size ps = shape_0[0] / float(shape[0]) * ps_0 fps = self.detector.camera.fps frame_time = 1 / fps times = (np.arange( t_start.simplified.magnitude, t_end.simplified.magnitude, frame_time.simplified.magnitude, ) * q.s) image = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_float) source_blur_kernel = None if source_blur: source_blur_kernel = self.make_source_blur(shape, ps, queue=queue, block=False) fmt = "Making sequence with shape {} and pixel size {} from {} to {}" LOG.debug(fmt.format(shape, ps, t_start, t_end)) for t_0 in times: image.fill(0) t = t_0 t_next = self.get_next_time(t, ps) while t_next < t_0 + frame_time: LOG.debug("Motion blur: {} -> {}".format(t, t_next)) image += self.compute_intensity(t, t_next, shape, ps) t = t_next t_next = self.get_next_time(t, ps) image += self.compute_intensity(t, t_0 + frame_time, shape, ps) if source_blur: image = ip.ifft_2(ip.fft_2(image) * source_blur_kernel).real camera_image = self.detector.camera.get_image( image, shot_noise=shot_noise, amplifier_noise=amplifier_noise) LOG.debug("Image: {} -> {}".format(t_0, t_0 + frame_time)) yield camera_image
def transfer(thickness, refractive_index, wavelength, exponent=False, queue=None, out=None, check=True, block=False): """Transfer *thickness* (can be either a numpy or pyopencl array) with *refractive_index* and given *wavelength*. If *exponent* is True, compute the exponent of the function without applying the wavenumber. Use command *queue* for computation and *out* pyopencl array. If *block* is True, wait for the kernel to finish. If *check* is True, the function is checked for aliasing artefacts. Returned *out* array is different from the input one because of the pyopencl.clmath behavior. """ if queue is None: queue = cfg.OPENCL.queue if isinstance(thickness, cl_array.Array): thickness_mem = thickness else: prep = thickness.simplified.magnitude.astype(cfg.PRECISION.np_float) thickness_mem = cl_array.to_device(queue, prep) if out is None: out = cl_array.Array(queue, thickness_mem.shape, cfg.PRECISION.np_cplx) if exponent or check: wavenumber = cfg.PRECISION.np_float(2 * np.pi / wavelength.simplified.magnitude) ev = cfg.OPENCL.programs['physics'].transmission_add(queue, thickness_mem.shape[::-1], None, out.data, thickness_mem.data, cfg.PRECISION.np_cplx( refractive_index), wavenumber, np.int32(1)) if check and not is_wavefield_sampling_ok(out, queue=queue): LOG.error('Insufficient transmission function sampling') if not exponent: # Apply the exponent out = clmath.exp(out, queue=queue) else: ev = cfg.OPENCL.programs['physics'].transfer(queue, thickness_mem.shape[::-1], None, out.data, thickness_mem.data, cfg.PRECISION.np_cplx(refractive_index), cfg.PRECISION.np_float( wavelength.simplified.magnitude)) if block: ev.wait() return out
def empty(shape, dtype, cq: CommandQueue = None, order="C", allocator=None, data=None, offset=0, strides=None, events=None, _flags=None): cq = get_current_queue() if cq is None else cq res = Array.from_array(cl_array.Array(cq, shape, dtype, order, allocator)) res.add_latest_event('empty') return res
def __init__(self, size, dim, mask=(OCLC_X | OCLC_V | OCLC_A | OCLC_M), dtype=np.float32): self.__dtype = dtype self.__size = size self.__dim = dim self.__opt_arrays = dict() self.__cl_context = cl.create_some_context() self.__cl_queue = cl.CommandQueue( self.__cl_context, properties=cl.command_queue_properties.PROFILING_ENABLE) if mask & OCLC_V: self.__V_cla = cla.Array(self.__cl_queue, (size, dim), dtype) else: self.__V_cla = None if mask & OCLC_X: self.__X_cla = cla.Array(self.__cl_queue, (size, dim), dtype) else: self.__X_cla = None if mask & OCLC_A: self.__A_cla = cla.Array(self.__cl_queue, (size, dim), dtype) else: self.__A_cla = None if mask & OCLC_M: self.__M_cla = cla.Array(self.__cl_queue, (size, 1), dtype) else: self.__M_cla = None
def forward(ctx, input): ctx.save_for_backward(input) krnl = cl_reduct_krnl_build(ctx.cl_ctx, np.float32, neutral="0", reduce_expr="a+b", map_expr="x[i]", arguments="__global float *x") ret = krnl( pycl_array.Array(ctx.cl_queue, input.size, dtype=np.float32, data=input)).data ret.shape = (1, ) ret.dtype = np.float32 return ret
def computeAcc(self, xd, yd, zd, vxd, vyd, vzd, qd, md, axd, ayd, azd, t, dt): # Compute average numbers of scattered photons nbars = cl_array.zeros_like(xd) if self.sigma == None: self.program.compute_mean_scattered_photons_homogeneous_beam( self.queue, (xd.size, ), None, xd.data, yd.data, zd.data, vxd.data, vyd.data, vzd.data, numpy.float32(self.k0[0]), numpy.float32(self.k0[1]), numpy.float32(self.k0[2]), numpy.float32(self.gamma), numpy.float32(self.delta0), numpy.float32(self.S), numpy.float32(dt), numpy.int32(xd.size), nbars.data) else: self.program.compute_mean_scattered_photons_gaussian_beam( self.queue, (xd.size, ), None, xd.data, yd.data, zd.data, vxd.data, vyd.data, vzd.data, numpy.float32(self.k0[0]), numpy.float32(self.k0[1]), numpy.float32(self.k0[2]), numpy.float32(self.x0[0]), numpy.float32(self.x0[1]), numpy.float32(self.x0[2]), numpy.float32(self.sigma), numpy.float32(self.gamma), numpy.float32(self.delta0), numpy.float32(self.S), numpy.float32(dt), numpy.int32(xd.size), nbars.data) # Compute scattered photons and associated recoil kicks nMax = int( math.ceil(10.0 * self.S * (self.gamma / 2.0 / numpy.pi) * dt)) actualNs = self.findSample(nbars, nMax) recoilDirectionsD = cl_array.Array(self.queue, [nbars.size, nMax, 3], dtype=numpy.float32) self.generator.fill_normal(recoilDirectionsD) # apply recoil kicks to particles recoilMomentum = numpy.linalg.norm( self.k0) * self._PlanckConstantReduced self.program.computeKicks(self.queue, (xd.size, ), None, md.data, actualNs.data, numpy.int32(nMax), recoilDirectionsD.data, numpy.float32(self.k0[0]), numpy.float32(self.k0[1]), numpy.float32(self.k0[2]), numpy.float32(recoilMomentum), numpy.float32(dt), axd.data, ayd.data, azd.data, numpy.int32(xd.shape[0]))
def _transfer( self, shape, pixel_size, energy, offset, exponent=False, t=None, queue=None, out=None, check=True, block=False, ): """Compute the flat field wavefield. Returned *out* array is different from the input one. """ if queue is None: queue = cfg.OPENCL.queue if out is None: out = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx) ps = make_tuple(pixel_size) if t is None: x, y, z = self.trajectory.control_points.simplified.magnitude[0] else: x, y, z = self.trajectory.get_point(t).simplified.magnitude x += offset[1].simplified.magnitude y += offset[0].simplified.magnitude center = (x, y, z) phase = self.phase_profile != "plane" parabola = self.phase_profile == "parabola" compute_exponent = exponent or check and phase self._transfer_real(shape, center, ps, energy, compute_exponent, phase, parabola, out, queue, block) if compute_exponent: if check and phase and not is_wavefield_sampling_ok(out, queue=queue): LOG.error("Insufficient beam phase sampling") if not exponent: out = clmath.exp(out, queue=queue) return out
def try_offset(ctx): queue = cl.CommandQueue(ctx) a = cl_array.Array(queue, shape=(2048,), dtype="uint8") b = a[3:] try: b.data except Exception as ex: pass # print("Always fails due to ArrayHasOffsetError") # print(repr(ex)) for offset in 3, 4, 8, 16, 32, 64, 128: try: b = a[offset:] b.base_data[b.offset : b.offset + b.nbytes] print(f"offset={b.offset} OK") except Exception as ex: # Fails in some platform due to clCreateSubBuffer MISALIGNED_SUB_BUFFER_OFFSET" print(f"offset={b.offset} not OK")
def add_array_by_name(self, key, size=None, dim=None, dtype=None): """ Add a new array by name. :param key: The key (or name) of the new array :param size: (Default: current) The size of the new array, if not specified by default it uses the current size :param dim: (Default: current) The dim of the new array, if not specified by default it uses the current dim :param dtype: (Dafault: current) The dtype of the new array, if not specified by default it uses the context dtype """ if dim is None: dim = self.__dim if size is None: size = self.__size if dtype is None: dtype = self.dtype self.__opt_arrays[key] = cla.Array(self.__cl_queue, (size, dim), dtype=dtype)
def alloc_buf(self, size=None, like=None, wrap_in_array=True): mf = cl.mem_flags if like is not None: if like.base is not None: hbuf = like.base else: hbuf = like buf = cl.Buffer(self.ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=hbuf) self.buffers[buf] = hbuf self.to_buf(buf) if wrap_in_array: self.arrays[buf] = clarray.Array(self.ctx, like.shape, like.dtype, data=buf) else: buf = cl.Buffer(self.ctx, mf.READ_WRITE, size) return buf
def setup_device(self, imshape): print('Setting up with imshape = %s' % (str(imshape))) self.imshape = imshape self.clIm = cla.Array(self.q, imshape, numpy.float32) self.clm = cla.empty_like(self.clIm) self.clx = cla.empty_like(self.clIm) self.cly = cla.empty_like(self.clIm) self.clO = cla.zeros_like(self.clIm) self.clM = cla.zeros_like(self.clIm) self.clF = cla.empty_like(self.clIm) self.clS = cla.empty_like(self.clIm) self.clThisS = cla.empty_like(self.clIm) self.clScratch = cla.empty_like(self.clIm) self.radial_prg = pyopencl.Program(self.ctx, PROGRAM).build() self.sobel = Sobel(self.ctx, self.q) #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q) self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q) self.accum = ElementwiseKernel(self.ctx, 'float *a, float *b', 'a[i] += b[i]') self.norm_s = ElementwiseKernel(self.ctx, 'float *s, const float nRadii', 's[i] = -1 * s[i] / nRadii', 'norm_s') self.accum_s = ElementwiseKernel(self.ctx, 'float *a, float *b, const float nr', 'a[i] -= b[i] / nr') self.gaussians = {} self.gaussian_prgs = {} self.minmax = MinMaxKernel(self.ctx, self.q)
def setup_device(self, imshape): print('Setting up with imshape = %s' % (str(imshape))) self.cached_shape = imshape self.clIm = cla.Array(self.q, imshape, np.float32) self.clm = cla.empty_like(self.clIm) self.clx = cla.empty_like(self.clIm) self.cly = cla.empty_like(self.clIm) self.clO = cla.zeros_like(self.clIm) self.clM = cla.zeros_like(self.clIm) self.clF = cla.empty_like(self.clIm) self.clS = cla.empty_like(self.clIm) self.clThisS = cla.empty_like(self.clIm) self.clScratch = cla.empty_like(self.clIm) self.radial_prg = pyopencl.Program(self.ctx, RADIAL_PROGRAM).build() self.sobel = Sobel(self.ctx, self.q) #self.sepcorr2d = NaiveSeparableCorrelation(self.ctx, self.q) self.sepcorr2d = LocalMemorySeparableCorrelation(self.ctx, self.q) self.accum = ElementwiseKernel(self.ctx, 'float *a, float *b', 'a[i] += b[i]') self.norm_s = ElementwiseKernel(self.ctx, 'float *s, const float nRadii', 's[i] = -1 * s[i] / nRadii', 'norm_s') self.accum_s = ElementwiseKernel(self.ctx, 'float *a, float *b, const float nr', 'a[i] -= b[i] / nr') self.gaussians = {} self.gaussian_prgs = {} self.minmax = MinMaxKernel(self.ctx, self.q) # starburst storage clImageFormat = cl.ImageFormat(cl.channel_order.R, cl.channel_type.FLOAT) self.clIm2D = cl.Image(self.ctx, mf.READ_ONLY, clImageFormat, imshape) # Create sampler for sampling image object self.imSampler = cl.Sampler(self.ctx, False, # Non-normalized coordinates cl.addressing_mode.CLAMP_TO_EDGE, cl.filter_mode.LINEAR) self.cl_find_ray_boundaries = FindRayBoundaries(self.ctx, self.q) self.calcF = self.radial_prg.calcF self.calcOM = self.radial_prg.calcOM
def arr_from_np(queue, nparr): if nparr.dtype == np.object: nparr = np.concatenate(nparr) buf = cl.Buffer(ctx, mf.READ_WRITE | mf.COPY_HOST_PTR, hostbuf=nparr) return clarray.Array(queue, nparr.shape, nparr.dtype, data=buf)
def _transfer(self, shape, pixel_size, energy, offset, exponent=False, t=None, queue=None, out=None, check=True, block=False): """Compute the flat field wavefield. Returned *out* array is different from the input one.""" if queue is None: queue = cfg.OPENCL.queue ps = make_tuple(pixel_size) if t is None: x, y, z = self.trajectory.control_points.simplified.magnitude[0] else: x, y, z = self.trajectory.get_point(t).simplified.magnitude x += offset[1].simplified.magnitude y += offset[0].simplified.magnitude center = (x, y, z) cl_center = gutil.make_vfloat3(*center) cl_ps = gutil.make_vfloat2(*pixel_size.simplified.magnitude[::-1]) fov = np.arange(0, shape[0]) * ps[0] - y * q.m angles = np.arctan((fov / self.sample_distance).simplified) profile = self._create_vertical_profile(energy, angles, ps[0]).rescale( 1 / q.s).magnitude profile = cl_array.to_device(queue, profile.astype(cfg.PRECISION.np_float)) if out is None: out = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx) z_sample = self.sample_distance.simplified.magnitude lam = energy_to_wavelength(energy).simplified.magnitude phase = self.phase_profile != 'plane' parabola = self.phase_profile == 'parabola' if exponent or check and phase: ev = cfg.OPENCL.programs['physics'].make_flat( queue, shape[::-1], None, out.data, profile.data, cl_center, cl_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(True), np.int32(phase), np.int32(parabola)) if check and phase and not is_wavefield_sampling_ok(out, queue=queue): LOG.error('Insufficient beam phase sampling') if not exponent: out = clmath.exp(out, queue=queue) else: ev = cfg.OPENCL.programs['physics'].make_flat( queue, shape[::-1], None, out.data, profile.data, cl_center, cl_ps, cfg.PRECISION.np_float(z_sample), cfg.PRECISION.np_float(lam), np.int32(exponent), np.int32(phase), np.int32(parabola)) if block: ev.wait() return out
def solve_reduced_system(self, line_da, x_UH, x_LH, x_R_d, reduced_solver): nz, ny, nx = line_da.nz, line_da.ny, line_da.nx line_rank = line_da.rank line_size = line_da.size x_UH_line = np.zeros(2 * line_size, dtype=np.float64) x_LH_line = np.zeros(2 * line_size, dtype=np.float64) line_da.gather([np.array([x_UH[0], x_UH[-1]]), 2, MPI.DOUBLE], [x_UH_line, 2, MPI.DOUBLE]) line_da.gather([np.array([x_LH[0], x_LH[-1]]), 2, MPI.DOUBLE], [x_LH_line, 2, MPI.DOUBLE]) lengths = np.ones(line_size) displacements = np.arange(0, 2 * line_size, 2) start_z, start_y, start_x = 0, 0, displacements[line_rank] subarray_aux = MPI.DOUBLE.Create_subarray([nz, ny, 2 * line_size], [nz, ny, 2], [start_z, start_y, start_x]) subarray = subarray_aux.Create_resized(0, 8) subarray.Commit() x_R_faces_d = cl_array.Array(self.queue, (nz, ny, 2), np.float64) self.copy_faces_kernel(self.queue, [1, ny, nz], None, x_R_d.data, x_R_faces_d.data, np.int32(nx), np.int32(ny), np.int32(nz), np.int32(line_da.mx), np.int32(line_da.npx)) x_R_faces = x_R_faces_d.get() x_R_faces_line = np.zeros([nz, ny, 2 * line_size], dtype=np.float64) line_da.gatherv([x_R_faces, MPI.DOUBLE], [x_R_faces_line, lengths, displacements, subarray]) if line_rank == 0: a_reduced = np.zeros(2 * line_size, dtype=np.float64) b_reduced = np.zeros(2 * line_size, dtype=np.float64) c_reduced = np.zeros(2 * line_size, dtype=np.float64) a_reduced[0::2] = -1. a_reduced[1::2] = x_UH_line[1::2] b_reduced[0::2] = x_UH_line[0::2] b_reduced[1::2] = x_LH_line[1::2] c_reduced[0::2] = x_LH_line[0::2] c_reduced[1::2] = -1. a_reduced[0], c_reduced[0] = 0.0, 0.0 b_reduced[0] = 1.0 a_reduced[-1], c_reduced[-1] = 0.0, 0.0 b_reduced[-1] = 1.0 a_reduced[1] = 0. c_reduced[-2] = 0. a_reduced_d = cl_array.to_device(self.queue, a_reduced) b_reduced_d = cl_array.to_device(self.queue, b_reduced) c_reduced_d = cl_array.to_device(self.queue, c_reduced) c2_reduced_d = cl_array.to_device(self.queue, c_reduced) d_reduced_d = cl_array.to_device(self.queue, x_R_faces_line) reduced_solver.solve(a_reduced_d, b_reduced_d, c_reduced_d, c2_reduced_d, d_reduced_d) params = d_reduced_d.get() else: params = None params_local = np.zeros([nz, ny, 2], dtype=np.float64) line_da.scatterv([params, lengths, displacements, subarray], [params_local, MPI.DOUBLE]) alpha = params_local[:, :, 0].copy() beta = params_local[:, :, 1].copy() return alpha, beta
def volume_empty(self): return cl_array.Array(self.ctx, queue=self.queue, shape=(self.K, self.block_size), dtype=np.float32)
def im2col_old(img, rec_field, n_filters, stride=1, zero_pad=0, wait_for=None): """ :type stride: int :type zero_pad: int """ dtype = 'float' if img.dtype == np.float32 else 'double' q = clplatf.qs[0] d1, h1, w1 = img.shape kh, kw = rec_field out_h = kh * kw * d1 w2 = (w1 - kw + 2 * zero_pad) // stride + 1 h2 = (h1 - kh + 2 * zero_pad) // stride + 1 # TODO check if w2 or h2 is not int and raise something or zeropad... out_w = w2 * h2 # alloc output col = clarray.Array(q, (out_h, out_w), img.dtype) prg = cl.Program( clplatf.ctx, """ __kernel void im2col_k(__global %(dtype)s *img, int h, int w, int o_h, int o_w, int kh, int kw, int stride, int padding, __global %(dtype)s *out) { int gid = get_global_id(0); int out_w = o_w * o_h; int out_h = kw * kh; int out_x = gid %% out_w; int out_y = gid / out_w %% out_h; int kx = out_y %% kw; int ky = (out_y / kh) %% kh; int ch = gid / (kh * kw * o_h * o_w); int in_x = kx + (out_x %% o_w)*stride - padding; int in_y = ky + (out_x / o_w)*stride - padding; if (in_x >= 0 && in_x < w && in_y >= 0 && in_y < h) { out[gid] = img[(h * ch + in_y) * w + in_x]; } else { out[gid] = 0; } } """ % locals()).build() evt = prg.im2col_k(q, (out_h * out_w, ), None, img.data, np.int32(h1), np.int32(w1), np.int32(h2), np.int32(w2), np.int32(kh), np.int32(kw), np.int32(stride), np.int32(zero_pad), col.data, wait_for=wait_for) return col, evt
def array(self, shape, dtype, strides=None, allocator=None): return clarray.Array(self._queue, shape, dtype, strides=strides, allocator=allocator)
def make_tomography(self, projections, rotation, pause, num_ref_per_block=1, num_proj_per_block=1, num_dark_img=0, start_frame=0, shape=None, shot_noise=True, amplifier_noise=True, source_blur=True, queue=None): """Make sequence of *projections* projection images over 0 to *rotation* degrees. *pause* after each image. Proceed in image blocks, with *num_ref_per_block* flatfields and *num_proj_per_block* projections per block. Make *num_dark_img* dark images at the beginning. Start with *start_frame* (must be less or equal total number of images). """ if queue is None: queue = cfg.OPENCL.queue shape_0 = self.detector.camera.shape if shape is None: shape = shape_0 ps_0 = self.detector.pixel_size ps = shape_0[0] / float(shape[0]) * ps_0 image = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_float) source_blur_kernel = None #if source_blur: #source_blur_kernel = self.make_source_blur(shape, ps, queue=queue, block=False) angles = np.linspace(0, rotation, num=projections) * q.deg angle_step_size = abs(angles[1] - angles[0]) overall_no_images = num_dark_img + projections + \ projections / num_proj_per_block * num_ref_per_block DARK_IMAGE = 0 PROJECTION = 1 FLATFIELD = 2 darks = np.repeat(DARK_IMAGE, num_dark_img) blocks = np.repeat(PROJECTION, num_proj_per_block) blocks = np.append(blocks, np.repeat(FLATFIELD, num_ref_per_block)) blocks = np.tile(blocks, projections / num_proj_per_block) image_type = np.append(darks, blocks) self.clock = 0 * q.s exptime = self.detector.camera._exp_time counter_darkimages = 0 counter_projections = 0 counter_flatfields = 0 for i in np.arange(0, overall_no_images): if start_frame > i: self.clock += exptime + pause if image_type[i] == DARK_IMAGE: counter_darkimages += 1 elif image_type[i] == PROJECTION: counter_projections += 1 elif image_type[i] == FLATFIELD: counter_flatfields += 1 yield None, None else: image.fill(0) t_0 = self.clock t_next = self.get_next_time(self.clock, ps) image_name = None # Dark images: if image_type[i] == DARK_IMAGE: image_name = 'dark_{:>05}.tif'.format(counter_darkimages) counter_darkimages += 1 self.clock += exptime + pause # Projections: elif image_type[i] == PROJECTION: # Turn sample self.tomo_rotate(angles[counter_projections]) while t_next < t_0 + exptime: LOG.debug('Motion blur: {} -> {}'.format(t_0, t_next)) image += self.compute_intensity( self.clock, t_next, shape, ps) self.clock = t_next t_next = self.get_next_time(self.clock, ps) image += self.compute_intensity(self.clock, t_0 + exptime, shape, ps) self.clock = t_0 + exptime + pause #if source_blur: #image = ip.ifft_2(ip.fft_2(image) * source_blur_kernel).real image_name = 'proj_{:>05}.tif'.format(counter_projections) counter_projections += 1 LOG.debug('Projection: {} -> {}'.format( t_0, t_0 + exptime)) # Flatfields: elif image_type[i] == FLATFIELD: while t_next < t_0 + exptime: LOG.debug('Motion blur: {} -> {}'.format(t_0, t_next)) image += self.compute_intensity(self.clock, t_next, shape, ps, flat=True) self.clock = t_next t_next = self.get_next_time(self.clock, ps) image += self.compute_intensity(self.clock, t_0 + exptime, shape, ps, flat=True) self.clock = t_0 + exptime + pause image_name = 'ref_{:>05}.tif'.format(counter_flatfields) counter_flatfields += 1 else: raise ValueError("Unknow image type requested. "\ "Options are: Dark image, projection, flatfield.") camera_image = self.detector.camera.get_image( image, shot_noise=shot_noise, amplifier_noise=amplifier_noise) yield camera_image, image_name
def compute_intensity(self, t_0, t_1, shape, pixel_size, queue=None, block=False, flat=False): """Compute intensity between times *t_0* and *t_1*.""" exp_time = (t_1 - t_0).simplified.magnitude if queue is None: queue = cfg.OPENCL.queue u = cl_array.Array(queue, shape, dtype=cfg.PRECISION.np_cplx) u_sample = cl_array.zeros(queue, shape, cfg.PRECISION.np_cplx) intensity = cl_array.zeros(queue, shape, cfg.PRECISION.np_float) for energy in self.energies: u.fill(1) for oeid, oe in enumerate(self.oe): if flat and oe == self.sample: continue u *= oe.transfer(shape, pixel_size, energy, t=t_0, queue=queue, out=u_sample, check=False, block=block) # Propagate and blur optical element when not source if self.distances[oeid] != 0 * q.m and oe != self.source: lam = energy_to_wavelength(energy) propagator = compute_propagator(u.shape[0], self.distances[oeid], lam, pixel_size, queue=queue, block=block, mollified=True) ip.fft_2(u, queue=queue, block=block) sdistance = np.sum(self.distances[:oeid + 1]) fwhm = (self.distances[oeid] * self.source.size / sdistance).simplified sigma = smath.fwnm_to_sigma(fwhm, n=2) psf = ip.get_gauss_2d(shape, sigma, pixel_size=pixel_size, fourier=True, queue=queue, block=block) u *= psf u *= propagator ip.ifft_2(u, queue=queue, block=block) intensity += self.detector.convert(abs(u)**2, energy) return intensity * exp_time