def test_kernel_cpu(): ctx = xo.ContextCpu() src_code = r""" double my_mul(const int n, const double* x1, const double* x2) { int tid; double y =0; for (tid=0; tid<n; tid++){ y+= x1[tid] * x2[tid]; } return y; } """ kernel_descriptions = { "my_mul": xo.Kernel( args=[ xo.Arg(xo.Int32, name="n"), xo.Arg(xo.Float64, pointer=True, name="x1"), xo.Arg(xo.Float64, pointer=True, name="x2"), ], ret=xo.Arg(xo.Float64), ) } ctx.add_kernels(sources=[src_code], kernels=kernel_descriptions) a1 = np.arange(10.0) a2 = np.arange(10.0) y = ctx.kernels.my_mul(n=len(a1), x1=a1, x2=a2) assert y == 285.0
class Base(xo.UnionRef): _reftypes = (Triangle, Square) _methods = [ xo.Method( c_name="compute_area", args=[xo.Arg(xo.Float64, name="scale")], ret=xo.Arg(xo.Float64), ) ]
def test_kernels(): for ctx in xo.context.get_test_contexts(): src_code = """ /*gpufun*/ void myfun(double x, double y, double* z){ z[0] = x * y; } /*gpukern*/ void my_mul(const int n, /*gpuglmem*/ const double* x1, /*gpuglmem*/ const double* x2, /*gpuglmem*/ double* y) { int tid = 0 //vectorize_over tid n double z; myfun(x1[tid], x2[tid], &z); y[tid] = z; //end_vectorize } """ kernel_descriptions = { "my_mul": xo.Kernel( args=[ xo.Arg(xo.Int32, name="n"), xo.Arg(xo.Float64, pointer=True, const=True, name="x1"), xo.Arg(xo.Float64, pointer=True, const=True, name="x2"), xo.Arg(xo.Float64, pointer=True, const=False, name="y"), ], n_threads="n", ), } # Import kernel in context ctx.add_kernels( sources=[src_code], kernels=kernel_descriptions, # save_src_as=f'_test_{name}.c') save_source_as=None, ) x1_host = np.array([1.0, 2.0, 3.0], dtype=np.float64) x2_host = np.array([7.0, 8.0, 9.0], dtype=np.float64) x1_dev = ctx.nparray_to_context_array(x1_host) x2_dev = ctx.nparray_to_context_array(x2_host) y_dev = ctx.zeros(shape=x1_host.shape, dtype=x1_host.dtype) ctx.kernels.my_mul(n=len(x1_host), x1=x1_dev, x2=x2_dev, y=y_dev) y_host = ctx.nparray_from_context_array(y_dev) assert np.allclose(y_host, x1_host * x2_host)
def test_ref_c_api(): for context in xo.context.get_test_contexts(): print(f"Test {context}") class MyStruct(xo.Struct): a = xo.Float64[:] class MyStruct2(xo.Struct): a = xo.Float64[:] sr = xo.Ref(MyStruct) ms = MyStruct(a=[1, 2, 3], _context=context) ms2 = MyStruct2(_buffer=ms._buffer, sr=ms, a=[0, 0, 0]) src = """ /*gpukern*/ void cp_sra_to_a(MyStruct2 ms, int64_t n){ for(int64_t ii=0; ii<n; ii++){ //vectorize_over ii n double const val = MyStruct2_get_sr_a(ms, ii); MyStruct2_set_a(ms, ii, val); }//end_vectorize } """ context.add_kernels( sources=[src], kernels={ "cp_sra_to_a": xo.Kernel( args=[ xo.Arg(MyStruct2, name="ms"), xo.Arg(xo.Int64, name="n"), ], n_threads="n", ) }, ) context.kernels.cp_sra_to_a(ms=ms2, n=len(ms.a)) for vv, ww in zip(ms2.a, ms2.sr.a): assert vv == ww
def test_unionref(): class Triangle(xo.Struct): b = xo.Float64 h = xo.Float64 _extra_c_source = """ /*gpufun*/ double Triangle_compute_area(Triangle tr, double scale){ double b = Triangle_get_b(tr); double h = Triangle_get_h(tr); return 0.5*b*h*scale; } """ class Square(xo.Struct): a = xo.Float64 _extra_c_source = """ /*gpufun*/ double Square_compute_area(Square sq, double scale){ double a = Square_get_a(sq); return a*a*scale; } """ class Base(xo.UnionRef): _reftypes = (Triangle, Square) _methods = [ xo.Method( c_name="compute_area", args=[xo.Arg(xo.Float64, name="scale")], ret=xo.Arg(xo.Float64), ) ] class Prism(xo.Struct): base = Base height = xo.Float64 volume = xo.Float64 _extra_c_source = """ /*gpukern*/ void Prism_compute_volume(Prism pr){ Base base = Prism_getp_base(pr); double height = Prism_get_height(pr); double base_area = Base_compute_area(base, 3.); printf("base_area = %e", base_area); Prism_set_volume(pr, base_area*height); } """ for context in xo.context.get_test_contexts(): print(f"Test {context}") context.add_kernels( kernels={ "Prism_compute_volume": xo.Kernel( args=[xo.Arg(Prism, name="prism")] ) } ) triangle = Triangle(b=2, h=3, _context=context) prism_triangle = Prism(base=triangle, height=5, _context=context) square = Square(a=2, _context=context) prism_square = Prism(base=square, height=10, _context=context) context.kernels.Prism_compute_volume(prism=prism_triangle) context.kernels.Prism_compute_volume(prism=prism_square) assert prism_triangle.volume == 45 assert prism_square.volume == 120 assert prism_triangle._has_refs
class BeamBeamBiGaussian3D(xt.BeamElement): _xofields = { '_sin_phi': xo.Float64, '_cos_phi': xo.Float64, '_tan_phi': xo.Float64, '_sin_alpha': xo.Float64, '_cos_alpha': xo.Float64, 'ref_shift_x': xo.Float64, 'ref_shift_px': xo.Float64, 'ref_shift_y': xo.Float64, 'ref_shift_py': xo.Float64, 'ref_shift_zeta': xo.Float64, 'ref_shift_pzeta': xo.Float64, 'other_beam_shift_x': xo.Float64, 'other_beam_shift_px': xo.Float64, 'other_beam_shift_y': xo.Float64, 'other_beam_shift_py': xo.Float64, 'other_beam_shift_zeta': xo.Float64, 'other_beam_shift_pzeta': xo.Float64, 'post_subtract_x': xo.Float64, 'post_subtract_px': xo.Float64, 'post_subtract_y': xo.Float64, 'post_subtract_py': xo.Float64, 'post_subtract_zeta': xo.Float64, 'post_subtract_pzeta': xo.Float64, 'other_beam_q0': xo.Float64, 'num_slices_other_beam': xo.Int64, 'slices_other_beam_num_particles': xo.Float64[:], 'slices_other_beam_x_center_star': xo.Float64[:], 'slices_other_beam_px_center_star': xo.Float64[:], 'slices_other_beam_y_center_star': xo.Float64[:], 'slices_other_beam_py_center_star': xo.Float64[:], 'slices_other_beam_zeta_center_star': xo.Float64[:], 'slices_other_beam_pzeta_center_star': xo.Float64[:], 'slices_other_beam_Sigma_11_star': xo.Float64[:], 'slices_other_beam_Sigma_12_star': xo.Float64[:], 'slices_other_beam_Sigma_13_star': xo.Float64[:], 'slices_other_beam_Sigma_14_star': xo.Float64[:], 'slices_other_beam_Sigma_22_star': xo.Float64[:], 'slices_other_beam_Sigma_23_star': xo.Float64[:], 'slices_other_beam_Sigma_24_star': xo.Float64[:], 'slices_other_beam_Sigma_33_star': xo.Float64[:], 'slices_other_beam_Sigma_34_star': xo.Float64[:], 'slices_other_beam_Sigma_44_star': xo.Float64[:], 'min_sigma_diff': xo.Float64, 'threshold_singular': xo.Float64, } _extra_c_sources = [ _pkg_root.joinpath('headers/constants.h'), _pkg_root.joinpath('headers/sincos.h'), _pkg_root.joinpath('headers/power_n.h'), _pkg_root.joinpath( 'fieldmaps/bigaussian_src/complex_error_function.h'), '#define NOFIELDMAP', #TODO Remove this workaround _pkg_root.joinpath('fieldmaps/bigaussian_src/bigaussian.h'), '#undef NOFIELDMAP', #TODO Remove this workaround _pkg_root.joinpath( 'beam_elements/beambeam_src/beambeam3d_transport_sigmas.h'), _pkg_root.joinpath( 'beam_elements/beambeam_src/beambeam3d_ref_frame_changes.h'), _pkg_root.joinpath('beam_elements/beambeam_src/beambeam3d.h'), _pkg_root.joinpath( 'beam_elements/beambeam_src/beambeam3d_methods_for_strongstrong.h' ), ] _per_particle_kernels = { 'synchro_beam_kick': xo.Kernel( c_name='BeamBeam3D_selective_apply_synchrobeam_kick_local_particle', args=[ xo.Arg(xo.Int64, pointer=True, name='i_slice_for_particles') ]), 'change_ref_frame': xo.Kernel( c_name='BeamBeamBiGaussian3D_change_ref_frame_local_particle', args=[]), 'change_back_ref_frame_and_subtract_dipolar': xo.Kernel( c_name= 'BeamBeamBiGaussian3D_change_back_ref_frame_and_subtract_dipolar_local_particle', args=[]), } def __init__(self, phi=None, alpha=None, other_beam_q0=None, slices_other_beam_num_particles=None, slices_other_beam_x_center=0., slices_other_beam_px_center=0., slices_other_beam_y_center=0., slices_other_beam_py_center=0., slices_other_beam_zeta_center=None, slices_other_beam_pzeta_center=0., slices_other_beam_x_center_star=None, slices_other_beam_px_center_star=None, slices_other_beam_y_center_star=None, slices_other_beam_py_center_star=None, slices_other_beam_zeta_center_star=None, slices_other_beam_pzeta_center_star=None, slices_other_beam_Sigma_11=None, slices_other_beam_Sigma_12=None, slices_other_beam_Sigma_13=None, slices_other_beam_Sigma_14=None, slices_other_beam_Sigma_22=None, slices_other_beam_Sigma_23=None, slices_other_beam_Sigma_24=None, slices_other_beam_Sigma_33=None, slices_other_beam_Sigma_34=None, slices_other_beam_Sigma_44=None, slices_other_beam_Sigma_11_star=None, slices_other_beam_Sigma_12_star=None, slices_other_beam_Sigma_13_star=None, slices_other_beam_Sigma_14_star=None, slices_other_beam_Sigma_22_star=None, slices_other_beam_Sigma_23_star=None, slices_other_beam_Sigma_24_star=None, slices_other_beam_Sigma_33_star=None, slices_other_beam_Sigma_34_star=None, slices_other_beam_Sigma_44_star=None, ref_shift_x=0, ref_shift_px=0, ref_shift_y=0, ref_shift_py=0, ref_shift_zeta=0, ref_shift_pzeta=0, other_beam_shift_x=0, other_beam_shift_px=0, other_beam_shift_y=0, other_beam_shift_py=0, other_beam_shift_zeta=0, other_beam_shift_pzeta=0, post_subtract_x=0, post_subtract_px=0, post_subtract_y=0, post_subtract_py=0, post_subtract_zeta=0, post_subtract_pzeta=0, min_sigma_diff=1e-10, threshold_singular=1e-28, old_interface=None, config_for_update=None, _sin_phi=None, _cos_phi=None, _tan_phi=None, _sin_alpha=None, _cos_alpha=None, **kwargs): if '_xobject' in kwargs.keys(): self.xoinitialize(**kwargs) return # Collective mode (pipeline update) if config_for_update is not None: self.config_for_update = config_for_update self.iscollective = True self.track = self._track_collective # switch to specific track method assert slices_other_beam_zeta_center is not None assert not np.isscalar(slices_other_beam_num_particles) # Some dummy values just to initialize the object if slices_other_beam_Sigma_11 is None: slices_other_beam_Sigma_11 = 1. if slices_other_beam_Sigma_12 is None: slices_other_beam_Sigma_12 = 1. if slices_other_beam_Sigma_22 is None: slices_other_beam_Sigma_22 = 1. if slices_other_beam_Sigma_33 is None: slices_other_beam_Sigma_33 = 1. if slices_other_beam_Sigma_34 is None: slices_other_beam_Sigma_34 = 1. if slices_other_beam_Sigma_44 is None: slices_other_beam_Sigma_44 = 1. if slices_other_beam_num_particles is None: slices_other_beam_num_particles = np.zeros_like( slices_other_beam_zeta_center) if old_interface is not None: self._init_from_old_interface(old_interface=old_interface, **kwargs) return assert (slices_other_beam_zeta_center is not None or slices_other_beam_zeta_center_star is not None) assert slices_other_beam_num_particles is not None assert not np.isscalar(slices_other_beam_num_particles), ( 'slices_other_beam_num_particles must be an array') if slices_other_beam_zeta_center is not None: assert not np.isscalar(slices_other_beam_zeta_center), ( 'slices_other_beam_zeta_center must be an array') assert (len(slices_other_beam_zeta_center) == len( slices_other_beam_num_particles)) if slices_other_beam_zeta_center_star is not None: assert not np.isscalar(slices_other_beam_zeta_center_star), ( 'slices_other_beam_zeta_center_star must be an array') assert (len(slices_other_beam_zeta_center_star) == len( slices_other_beam_num_particles)) n_slices = len(slices_other_beam_num_particles) self._allocate_xobject(n_slices, **kwargs) if self.iscollective: if not isinstance(self._buffer.context, xo.ContextCpu): raise NotImplementedError( 'BeamBeamBiGaussian3D only works with CPU context for now') if phi is None: assert _sin_phi is not None and _cos_phi is not None and _tan_phi is not None, ( 'phi must be specified if _sin_phi, _cos_phi, _tan_phi are not' ) self._sin_phi = _sin_phi self._cos_phi = _cos_phi self._tan_phi = _tan_phi else: self._sin_phi = np.sin(phi) self._cos_phi = np.cos(phi) self._tan_phi = np.tan(phi) if alpha is None: assert _sin_alpha is not None and _cos_alpha is not None, ( 'alpha must be specified if _sin_alpha, _cos_alpha are not') self._sin_alpha = _sin_alpha self._cos_alpha = _cos_alpha else: self._sin_alpha = np.sin(alpha) self._cos_alpha = np.cos(alpha) self.num_slices_other_beam = n_slices self.slices_other_beam_num_particles = self._arr2ctx( np.array(slices_other_beam_num_particles)) # Trigger properties to set corresponding starred quantities self._init_Sigmas( slices_other_beam_Sigma_11, slices_other_beam_Sigma_12, slices_other_beam_Sigma_13, slices_other_beam_Sigma_14, slices_other_beam_Sigma_22, slices_other_beam_Sigma_23, slices_other_beam_Sigma_24, slices_other_beam_Sigma_33, slices_other_beam_Sigma_34, slices_other_beam_Sigma_44, slices_other_beam_Sigma_11_star, slices_other_beam_Sigma_12_star, slices_other_beam_Sigma_13_star, slices_other_beam_Sigma_14_star, slices_other_beam_Sigma_22_star, slices_other_beam_Sigma_23_star, slices_other_beam_Sigma_24_star, slices_other_beam_Sigma_33_star, slices_other_beam_Sigma_34_star, slices_other_beam_Sigma_44_star, ) # Initialize slice positions in the boosted frame self._init_starred_positions( slices_other_beam_x_center, slices_other_beam_px_center, slices_other_beam_y_center, slices_other_beam_py_center, slices_other_beam_zeta_center, slices_other_beam_pzeta_center, slices_other_beam_x_center_star, slices_other_beam_px_center_star, slices_other_beam_y_center_star, slices_other_beam_py_center_star, slices_other_beam_zeta_center_star, slices_other_beam_pzeta_center_star) assert other_beam_q0 is not None self.other_beam_q0 = other_beam_q0 self.ref_shift_x = ref_shift_x self.ref_shift_px = ref_shift_px self.ref_shift_y = ref_shift_y self.ref_shift_py = ref_shift_py self.ref_shift_zeta = ref_shift_zeta self.ref_shift_pzeta = ref_shift_pzeta self.other_beam_shift_x = other_beam_shift_x self.other_beam_shift_px = other_beam_shift_px self.other_beam_shift_y = other_beam_shift_y self.other_beam_shift_py = other_beam_shift_py self.other_beam_shift_zeta = other_beam_shift_zeta self.other_beam_shift_pzeta = other_beam_shift_pzeta self.post_subtract_x = post_subtract_x self.post_subtract_px = post_subtract_px self.post_subtract_y = post_subtract_y self.post_subtract_py = post_subtract_py self.post_subtract_zeta = post_subtract_zeta self.post_subtract_pzeta = post_subtract_pzeta self.min_sigma_diff = min_sigma_diff self.threshold_singular = threshold_singular def _allocate_xobject(self, n_slices, **kwargs): self.xoinitialize(slices_other_beam_Sigma_11_star=n_slices, slices_other_beam_Sigma_12_star=n_slices, slices_other_beam_Sigma_13_star=n_slices, slices_other_beam_Sigma_14_star=n_slices, slices_other_beam_Sigma_22_star=n_slices, slices_other_beam_Sigma_23_star=n_slices, slices_other_beam_Sigma_24_star=n_slices, slices_other_beam_Sigma_33_star=n_slices, slices_other_beam_Sigma_34_star=n_slices, slices_other_beam_Sigma_44_star=n_slices, slices_other_beam_num_particles=n_slices, slices_other_beam_x_center_star=n_slices, slices_other_beam_px_center_star=n_slices, slices_other_beam_y_center_star=n_slices, slices_other_beam_py_center_star=n_slices, slices_other_beam_zeta_center_star=n_slices, slices_other_beam_pzeta_center_star=n_slices, **kwargs) def _init_from_old_interface(self, old_interface, **kwargs): params = old_interface n_slices = len(params["charge_slices"]) self._allocate_xobject(n_slices, **kwargs) self.other_beam_q0 = 1., # TODO: handle ions phi = params["phi"] alpha = params["alpha"] self._sin_phi = np.sin(phi) self._cos_phi = np.cos(phi) self._tan_phi = np.tan(phi) self._sin_alpha = np.sin(alpha) self._cos_alpha = np.cos(alpha) self.slices_other_beam_Sigma_11 = self._arr2ctx(params['sigma_11']) self.slices_other_beam_Sigma_12 = self._arr2ctx(params['sigma_12']) self.slices_other_beam_Sigma_13 = self._arr2ctx(params['sigma_13']) self.slices_other_beam_Sigma_14 = self._arr2ctx(params['sigma_14']) self.slices_other_beam_Sigma_22 = self._arr2ctx(params['sigma_22']) self.slices_other_beam_Sigma_23 = self._arr2ctx(params['sigma_23']) self.slices_other_beam_Sigma_24 = self._arr2ctx(params['sigma_24']) self.slices_other_beam_Sigma_33 = self._arr2ctx(params['sigma_33']) self.slices_other_beam_Sigma_34 = self._arr2ctx(params['sigma_34']) self.slices_other_beam_Sigma_44 = self._arr2ctx(params['sigma_44']) # Sort according to z, head at the first position in the arrays z_slices = np.array(params["zeta_slices"]).copy() N_part_per_slice = params["charge_slices"].copy() ind_sorted = np.argsort(z_slices)[::-1] z_slices = np.take(z_slices, ind_sorted) N_part_per_slice = np.take(N_part_per_slice, ind_sorted) ( x_slices_star, px_slices_star, y_slices_star, py_slices_star, zeta_slices_star, pzeta_slices_star, ) = _python_boost( x=0 * z_slices, px=0 * z_slices, y=0 * z_slices, py=0 * z_slices, zeta=z_slices, pzeta=0 * z_slices, sphi=self.sin_phi, cphi=self.cos_phi, tphi=self.tan_phi, salpha=self.sin_alpha, calpha=self.cos_alpha, ) self.slices_other_beam_num_particles = self._arr2ctx(N_part_per_slice) self.slices_other_beam_x_center_star = self._arr2ctx(x_slices_star) self.slices_other_beam_px_center_star = self._arr2ctx(px_slices_star) self.slices_other_beam_y_center_star = self._arr2ctx(y_slices_star) self.slices_other_beam_py_center_star = self._arr2ctx(py_slices_star) self.slices_other_beam_zeta_center_star = self._arr2ctx( zeta_slices_star) self.slices_other_beam_pzeta_center_star = self._arr2ctx( pzeta_slices_star) self.ref_shift_x = params['x_co'] self.ref_shift_px = params['px_co'] self.ref_shift_y = params['y_co'] self.ref_shift_py = params['py_co'] self.ref_shift_zeta = params['zeta_co'] self.ref_shift_pzeta = params['delta_co'] self.other_beam_shift_x = params["x_bb_co"] self.other_beam_shift_px = 0 self.other_beam_shift_y = params["y_bb_co"] self.other_beam_shift_py = 0 self.other_beam_shift_zeta = 0 self.other_beam_shift_pzeta = 0 self.post_subtract_x = params['d_x'] self.post_subtract_px = params['d_px'] self.post_subtract_y = params['d_y'] self.post_subtract_py = params['d_py'] self.post_subtract_zeta = params['d_zeta'] self.post_subtract_pzeta = params['d_delta'] self.min_sigma_diff = 1e-10 self.threshold_singular = 1e-28 self.num_slices_other_beam = len(params["charge_slices"]) def _track_collective(self, particles, _force_suspend=False): if self.config_for_update._working_on_bunch is not None: # I am resuming a suspended calculation assert self.config_for_update._working_on_bunch == particles.name # Beam beam interaction in the boosted frame ret = self._apply_bb_kicks_in_boosted_frame(particles) if ret is not None: return ret # PipelineStatus else: # Back to line reference frame self.change_back_ref_frame_and_subtract_dipolar(particles) return None else: # I am working on a new bunch if particles._num_active_particles == 0: return # All particles are lost # Check that the element is not occupied by a bunch assert self.config_for_update._i_step == 0 assert self.config_for_update._particles_slice_index is None assert self.config_for_update._working_on_bunch is None assert particles.name in self.config_for_update.collision_schedule.keys( ) self.config_for_update._working_on_bunch = particles.name # Slice bunch (in the lab frame) self.config_for_update._particles_slice_index = ( self.config_for_update.slicer.get_slice_indices(particles)) self.config_for_update._other_beam_slice_index_for_particles = np.zeros_like( self.config_for_update._particles_slice_index) # Handle update frequency at_turn = particles._xobject.at_turn[ 0] # On CPU there is always an active particle in position 0 if (self.config_for_update.update_every is not None and at_turn % self.config_for_update.update_every == 0): self.config_for_update._do_update = True else: self.config_for_update._do_update = False # Change reference frame self.change_ref_frame(particles) # Can be used to test the resume without pipeline if _force_suspend: return xt.PipelineStatus(on_hold=True) # Beam beam interaction in the boosted frame ret = self._apply_bb_kicks_in_boosted_frame(particles) if ret is not None: return ret # PipelineStatus else: # Back to line reference frame self.change_back_ref_frame_and_subtract_dipolar(particles) return None def _apply_bb_kicks_in_boosted_frame(self, particles): n_slices_self_beam = self.config_for_update.slicer.num_slices while True: if self.config_for_update._do_update: pipeline_manager = self.config_for_update.pipeline_manager # Pipeline update --> still to be developed # # Compute momenta # momenta_self = self.config_for_update.compute_slice_momenta( # particles, self.slice_index) # # Send momenta (I invent a bit for now...) # pipeline_manager.send_data(momenta_self, etc___) # # Receive momenta (I invent a bit for now...) # data_received = self.pipeline_manager.receive_data(etc___) # if data_received == 'not_ready': # return xt.PipelineStatus(on_hold=True) # else: # # Remember that here one needs to make a transformation between # # the coordinates of the the two beams (positions and sigma matrix) # # Here how it is done in the mask: https://github.com/lhcopt/lhcmask/blob/865eaf9d7b9b888c6486de00214c0c24ac93cfd3/pymask/beambeam.py#L310 # self.config_for_update._update_from_received_data( # beambeam_element=self, # data_received=data_received) # Method to be written self.config_for_update._other_beam_slice_index_for_particles[:] = ( self.config_for_update._i_step - self.config_for_update._particles_slice_index) self.config_for_update._other_beam_slice_index_for_particles[ self.config_for_update._particles_slice_index < 0] = -1 self.synchro_beam_kick( particles=particles, i_slice_for_particles=self.config_for_update. _other_beam_slice_index_for_particles) self.config_for_update._i_step += 1 if self.config_for_update._i_step == (n_slices_self_beam + self.num_slices_other_beam): self.config_for_update._i_step = 0 self.config_for_update._working_on_bunch = None break return None @property def sin_phi(self): return self._sin_phi @property def cos_phi(self): return self._cos_phi @property def tan_phi(self): return self._tan_phi @property def sin_alpha(self): return self._sin_alpha @property def cos_alpha(self): return self._cos_alpha @property def phi(self): return np.arctan2(self.sin_phi, self.cos_phi) @phi.setter def phi(self, value): raise NotImplementedError("Setting phi is not implemented yet") @property def alpha(self): return np.arctan2(self.sin_alpha, self.cos_alpha) @alpha.setter def alpha(self, value): raise NotImplementedError("Setting alpha is not implemented yet") def _inv_boost_slice_centers(self): x_star_slices = self.slices_other_beam_x_center_star px_star_slices = self.slices_other_beam_px_center_star y_star_slices = self.slices_other_beam_y_center_star py_star_slices = self.slices_other_beam_py_center_star zeta_star_slices = self.slices_other_beam_zeta_center_star pzeta_star_slices = self.slices_other_beam_pzeta_center_star ( x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices, ) = _python_inv_boost( x_st=x_star_slices, px_st=px_star_slices, y_st=y_star_slices, py_st=py_star_slices, zeta_st=zeta_star_slices, pzeta_st=pzeta_star_slices, sphi=self.sin_phi, cphi=self.cos_phi, tphi=self.tan_phi, salpha=self.sin_alpha, calpha=self.cos_alpha, ) return x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices def _init_Sigmas( self, slices_other_beam_Sigma_11, slices_other_beam_Sigma_12, slices_other_beam_Sigma_13, slices_other_beam_Sigma_14, slices_other_beam_Sigma_22, slices_other_beam_Sigma_23, slices_other_beam_Sigma_24, slices_other_beam_Sigma_33, slices_other_beam_Sigma_34, slices_other_beam_Sigma_44, slices_other_beam_Sigma_11_star, slices_other_beam_Sigma_12_star, slices_other_beam_Sigma_13_star, slices_other_beam_Sigma_14_star, slices_other_beam_Sigma_22_star, slices_other_beam_Sigma_23_star, slices_other_beam_Sigma_24_star, slices_other_beam_Sigma_33_star, slices_other_beam_Sigma_34_star, slices_other_beam_Sigma_44_star, ): # Mandatory sigmas assert (slices_other_beam_Sigma_11 is not None or slices_other_beam_Sigma_11_star is not None), ("`slices_other_beam_Sigma_11` must be provided") assert (slices_other_beam_Sigma_12 is not None or slices_other_beam_Sigma_12_star is not None), ("`slices_other_beam_Sigma_12` must be provided") assert (slices_other_beam_Sigma_22 is not None or slices_other_beam_Sigma_22_star is not None), ("`slices_other_beam_Sigma_22` must be provided") assert (slices_other_beam_Sigma_33 is not None or slices_other_beam_Sigma_33_star is not None), ("`slices_other_beam_Sigma_33` must be provided") assert (slices_other_beam_Sigma_34 is not None or slices_other_beam_Sigma_34_star is not None), ("`slices_other_beam_Sigma_34` must be provided") assert (slices_other_beam_Sigma_44 is not None or slices_other_beam_Sigma_44_star is not None), ("`slices_other_beam_Sigma_44` must be provided") # Coupling between transverse planes if slices_other_beam_Sigma_13 is None and slices_other_beam_Sigma_13_star is None: slices_other_beam_Sigma_13 = 0 if slices_other_beam_Sigma_14 is None and slices_other_beam_Sigma_14_star is None: slices_other_beam_Sigma_14 = 0 if slices_other_beam_Sigma_23 is None and slices_other_beam_Sigma_23_star is None: slices_other_beam_Sigma_23 = 0 if slices_other_beam_Sigma_24 is None and slices_other_beam_Sigma_24_star is None: slices_other_beam_Sigma_24 = 0 if slices_other_beam_Sigma_11 is not None: self.slices_other_beam_Sigma_11 = self._arr2ctx( slices_other_beam_Sigma_11) else: self.slices_other_beam_Sigma_11_star = self._arr2ctx( slices_other_beam_Sigma_11_star) if slices_other_beam_Sigma_12 is not None: self.slices_other_beam_Sigma_12 = self._arr2ctx( slices_other_beam_Sigma_12) else: self.slices_other_beam_Sigma_12_star = self._arr2ctx( slices_other_beam_Sigma_12_star) if slices_other_beam_Sigma_13 is not None: self.slices_other_beam_Sigma_13 = self._arr2ctx( slices_other_beam_Sigma_13) else: self.slices_other_beam_Sigma_13_star = self._arr2ctx( slices_other_beam_Sigma_13_star) if slices_other_beam_Sigma_14 is not None: self.slices_other_beam_Sigma_14 = self._arr2ctx( slices_other_beam_Sigma_14) else: self.slices_other_beam_Sigma_14_star = self._arr2ctx( slices_other_beam_Sigma_14_star) if slices_other_beam_Sigma_22 is not None: self.slices_other_beam_Sigma_22 = self._arr2ctx( slices_other_beam_Sigma_22) else: self.slices_other_beam_Sigma_22_star = self._arr2ctx( slices_other_beam_Sigma_22_star) if slices_other_beam_Sigma_23 is not None: self.slices_other_beam_Sigma_23 = self._arr2ctx( slices_other_beam_Sigma_23) else: self.slices_other_beam_Sigma_23_star = self._arr2ctx( slices_other_beam_Sigma_23_star) if slices_other_beam_Sigma_24 is not None: self.slices_other_beam_Sigma_24 = self._arr2ctx( slices_other_beam_Sigma_24) else: self.slices_other_beam_Sigma_24_star = self._arr2ctx( slices_other_beam_Sigma_24_star) if slices_other_beam_Sigma_33 is not None: self.slices_other_beam_Sigma_33 = self._arr2ctx( slices_other_beam_Sigma_33) else: self.slices_other_beam_Sigma_33_star = self._arr2ctx( slices_other_beam_Sigma_33_star) if slices_other_beam_Sigma_34 is not None: self.slices_other_beam_Sigma_34 = self._arr2ctx( slices_other_beam_Sigma_34) else: self.slices_other_beam_Sigma_34_star = self._arr2ctx( slices_other_beam_Sigma_34_star) if slices_other_beam_Sigma_44 is not None: self.slices_other_beam_Sigma_44 = self._arr2ctx( slices_other_beam_Sigma_44) else: self.slices_other_beam_Sigma_44_star = self._arr2ctx( slices_other_beam_Sigma_44_star) def _init_starred_positions( self, slices_other_beam_x_center, slices_other_beam_px_center, slices_other_beam_y_center, slices_other_beam_py_center, slices_other_beam_zeta_center, slices_other_beam_pzeta_center, slices_other_beam_x_center_star, slices_other_beam_px_center_star, slices_other_beam_y_center_star, slices_other_beam_py_center_star, slices_other_beam_zeta_center_star, slices_other_beam_pzeta_center_star): if slices_other_beam_zeta_center is not None: # Check correct according to z, head at the first position in the arrays assert np.all( slices_other_beam_zeta_center[:-1] >= slices_other_beam_zeta_center[1:] ), ('slices_other_beam_zeta_center must be sorted from to tail (descending zeta)' ) ( x_slices_star, px_slices_star, y_slices_star, py_slices_star, zeta_slices_star, pzeta_slices_star, ) = _python_boost( x=slices_other_beam_x_center, px=slices_other_beam_px_center, y=slices_other_beam_y_center, py=slices_other_beam_py_center, zeta=slices_other_beam_zeta_center, pzeta=slices_other_beam_pzeta_center, sphi=self.sin_phi, cphi=self.cos_phi, tphi=self.tan_phi, salpha=self.sin_alpha, calpha=self.cos_alpha, ) # User-provided value has priority if slices_other_beam_x_center_star is not None: self.slices_other_beam_x_center_star = slices_other_beam_x_center_star else: self.slices_other_beam_x_center_star = self._arr2ctx(x_slices_star) if slices_other_beam_px_center_star is not None: self.slices_other_beam_px_center_star = slices_other_beam_px_center_star else: self.slices_other_beam_px_center_star = self._arr2ctx( px_slices_star) if slices_other_beam_y_center_star is not None: self.slices_other_beam_y_center_star = slices_other_beam_y_center_star else: self.slices_other_beam_y_center_star = self._arr2ctx(y_slices_star) if slices_other_beam_py_center_star is not None: self.slices_other_beam_py_center_star = slices_other_beam_py_center_star else: self.slices_other_beam_py_center_star = self._arr2ctx( py_slices_star) if slices_other_beam_zeta_center_star is not None: self.slices_other_beam_zeta_center_star = slices_other_beam_zeta_center_star else: self.slices_other_beam_zeta_center_star = self._arr2ctx( zeta_slices_star) if slices_other_beam_pzeta_center_star is not None: self.slices_other_beam_pzeta_center_star = slices_other_beam_pzeta_center_star else: self.slices_other_beam_pzeta_center_star = self._arr2ctx( pzeta_slices_star) # The following properties are generate by this code: ## for nn in 'x px y py zeta pzeta'.split(): ## print(f''' ## @property ## def slices_other_beam_{nn}_center(self): ## (x_slices, px_slices, y_slices, py_slices, ## zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() ## return self._buffer.context.linked_array_type.from_array( ## {nn}_slices, ## mode="readonly") ## @slices_other_beam_{nn}_center.setter ## def slices_other_beam_{nn}_center(self, value): ## raise NotImplementedError( ## "Setting slices_other_beam_{nn}_center is not implemented yet")\n''') @property def slices_other_beam_x_center(self): (x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() return self._buffer.context.linked_array_type.from_array( x_slices, mode="readonly") @slices_other_beam_x_center.setter def slices_other_beam_x_center(self, value): raise NotImplementedError( "Setting slices_other_beam_x_center is not implemented yet") @property def slices_other_beam_px_center(self): (x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() return self._buffer.context.linked_array_type.from_array( px_slices, mode="readonly") @slices_other_beam_px_center.setter def slices_other_beam_px_center(self, value): raise NotImplementedError( "Setting slices_other_beam_px_center is not implemented yet") @property def slices_other_beam_y_center(self): (x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() return self._buffer.context.linked_array_type.from_array( y_slices, mode="readonly") @slices_other_beam_y_center.setter def slices_other_beam_y_center(self, value): raise NotImplementedError( "Setting slices_other_beam_y_center is not implemented yet") @property def slices_other_beam_py_center(self): (x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() return self._buffer.context.linked_array_type.from_array( py_slices, mode="readonly") @slices_other_beam_py_center.setter def slices_other_beam_py_center(self, value): raise NotImplementedError( "Setting slices_other_beam_py_center is not implemented yet") @property def slices_other_beam_zeta_center(self): (x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() return self._buffer.context.linked_array_type.from_array( zeta_slices, mode="readonly") @slices_other_beam_zeta_center.setter def slices_other_beam_zeta_center(self, value): raise NotImplementedError( "Setting slices_other_beam_zeta_center is not implemented yet") @property def slices_other_beam_pzeta_center(self): (x_slices, px_slices, y_slices, py_slices, zeta_slices, pzeta_slices) = self._inv_boost_slice_centers() return self._buffer.context.linked_array_type.from_array( pzeta_slices, mode="readonly") @slices_other_beam_pzeta_center.setter def slices_other_beam_pzeta_center(self, value): raise NotImplementedError( "Setting slices_other_beam_pzeta_center is not implemented yet") # The following properties are generate by this code: ## for nn, factor in ( ## ('11', '1.'), ## ('12', 'self.cos_phi'), ## ('13', '1.'), ## ('14', 'self.cos_phi'), ## ('22', '(self.cos_phi * self.cos_phi)'), ## ('23', 'self.cos_phi'), ## ('24', '(self.cos_phi * self.cos_phi)'), ## ('33', '1.'), ## ('34', 'self.cos_phi'), ## ('44', '(self.cos_phi * self.cos_phi)')): ## print(f""" ## @property ## def slices_other_beam_Sigma_{nn}(self): ## return self._buffer.context.linked_array_type.from_array( ## self.slices_other_beam_Sigma_{nn}_star * {factor}, ## mode='setitem_from_container', ## container=self, ## container_setitem_name='_Sigma_{nn}_setitem') ## def _Sigma_{nn}_setitem(self, indx, val): ## self.slices_other_beam_Sigma_{nn}_star[indx] = val / {factor} ## ## @slices_other_beam_Sigma_{nn}.setter ## def slices_other_beam_Sigma_{nn}(self, value): ## self.slices_other_beam_Sigma_{nn}[:] = value\n""") @property def slices_other_beam_Sigma_11(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_11_star * 1., mode='setitem_from_container', container=self, container_setitem_name='_Sigma_11_setitem') def _Sigma_11_setitem(self, indx, val): self.slices_other_beam_Sigma_11_star[indx] = val / 1. @slices_other_beam_Sigma_11.setter def slices_other_beam_Sigma_11(self, value): self.slices_other_beam_Sigma_11[:] = value @property def slices_other_beam_Sigma_12(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_12_star * self.cos_phi, mode='setitem_from_container', container=self, container_setitem_name='_Sigma_12_setitem') def _Sigma_12_setitem(self, indx, val): self.slices_other_beam_Sigma_12_star[indx] = val / self.cos_phi @slices_other_beam_Sigma_12.setter def slices_other_beam_Sigma_12(self, value): self.slices_other_beam_Sigma_12[:] = value @property def slices_other_beam_Sigma_13(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_13_star * 1., mode='setitem_from_container', container=self, container_setitem_name='_Sigma_13_setitem') def _Sigma_13_setitem(self, indx, val): self.slices_other_beam_Sigma_13_star[indx] = val / 1. @slices_other_beam_Sigma_13.setter def slices_other_beam_Sigma_13(self, value): self.slices_other_beam_Sigma_13[:] = value @property def slices_other_beam_Sigma_14(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_14_star * self.cos_phi, mode='setitem_from_container', container=self, container_setitem_name='_Sigma_14_setitem') def _Sigma_14_setitem(self, indx, val): self.slices_other_beam_Sigma_14_star[indx] = val / self.cos_phi @slices_other_beam_Sigma_14.setter def slices_other_beam_Sigma_14(self, value): self.slices_other_beam_Sigma_14[:] = value @property def slices_other_beam_Sigma_22(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_22_star * (self.cos_phi * self.cos_phi), mode='setitem_from_container', container=self, container_setitem_name='_Sigma_22_setitem') def _Sigma_22_setitem(self, indx, val): self.slices_other_beam_Sigma_22_star[indx] = val / (self.cos_phi * self.cos_phi) @slices_other_beam_Sigma_22.setter def slices_other_beam_Sigma_22(self, value): self.slices_other_beam_Sigma_22[:] = value @property def slices_other_beam_Sigma_23(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_23_star * self.cos_phi, mode='setitem_from_container', container=self, container_setitem_name='_Sigma_23_setitem') def _Sigma_23_setitem(self, indx, val): self.slices_other_beam_Sigma_23_star[indx] = val / self.cos_phi @slices_other_beam_Sigma_23.setter def slices_other_beam_Sigma_23(self, value): self.slices_other_beam_Sigma_23[:] = value @property def slices_other_beam_Sigma_24(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_24_star * (self.cos_phi * self.cos_phi), mode='setitem_from_container', container=self, container_setitem_name='_Sigma_24_setitem') def _Sigma_24_setitem(self, indx, val): self.slices_other_beam_Sigma_24_star[indx] = val / (self.cos_phi * self.cos_phi) @slices_other_beam_Sigma_24.setter def slices_other_beam_Sigma_24(self, value): self.slices_other_beam_Sigma_24[:] = value @property def slices_other_beam_Sigma_33(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_33_star * 1., mode='setitem_from_container', container=self, container_setitem_name='_Sigma_33_setitem') def _Sigma_33_setitem(self, indx, val): self.slices_other_beam_Sigma_33_star[indx] = val / 1. @slices_other_beam_Sigma_33.setter def slices_other_beam_Sigma_33(self, value): self.slices_other_beam_Sigma_33[:] = value @property def slices_other_beam_Sigma_34(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_34_star * self.cos_phi, mode='setitem_from_container', container=self, container_setitem_name='_Sigma_34_setitem') def _Sigma_34_setitem(self, indx, val): self.slices_other_beam_Sigma_34_star[indx] = val / self.cos_phi @slices_other_beam_Sigma_34.setter def slices_other_beam_Sigma_34(self, value): self.slices_other_beam_Sigma_34[:] = value @property def slices_other_beam_Sigma_44(self): return self._buffer.context.linked_array_type.from_array( self.slices_other_beam_Sigma_44_star * (self.cos_phi * self.cos_phi), mode='setitem_from_container', container=self, container_setitem_name='_Sigma_44_setitem') def _Sigma_44_setitem(self, indx, val): self.slices_other_beam_Sigma_44_star[indx] = val / (self.cos_phi * self.cos_phi) @slices_other_beam_Sigma_44.setter def slices_other_beam_Sigma_44(self, value): self.slices_other_beam_Sigma_44[:] = value
fa = xo.Float64 fb = xo.Int64[:] source = """ void k1(StructA obj){ printf("hello\\n"); printf("fa=%g\\n", StructA_get_fa(obj)); printf("len(fb)=%ld\\n", StructA_len_fb(obj)); printf("fb[3]=%ld\\n", StructA_get_fb(obj,3)); }; double k2(StructA obj){ return StructA_get_fa(obj)*3; }; """ ks = { "k1": xo.Kernel([xo.Arg(StructA, name="obj")]), "k2": xo.Kernel([xo.Arg(StructA, name="obj")], ret=xo.Arg(xo.Float64)), } ctx = xo.ContextCpu() ctx.add_kernels([source], ks, extra_headers=["#include <stdio.h>"]) s1 = StructA(fa=0, fb=4, _context=ctx) s1.fa = 2.3 s1.fb[3] = 1 ctx.kernels.k1(obj=s1) ctx.kernels.k2(obj=s1)
def test_cerrf_q1(): ctx = xo.ContextCpu(omp_num_threads=2) xx = np.logspace(-8, +8, 51, dtype=np.float64) yy = np.logspace(-8, +8, 51, dtype=np.float64) n_re = len(xx) n_im = len(yy) n_z = len(yy) * len(xx) re_absc = np.arange(n_z, dtype=np.float64).reshape(n_im, n_re) im_absc = np.arange(n_z, dtype=np.float64).reshape(n_im, n_re) wz_cmp_re = np.arange(n_z, dtype=np.float64).reshape(n_im, n_re) wz_cmp_im = np.arange(n_z, dtype=np.float64).reshape(n_im, n_re) for jj, y in enumerate(yy): re_absc[jj, :] = xx[:] for ii, x in enumerate(xx): im_absc[:, ii] = yy[:] # Using scipy's wofz implemenation of the Faddeeva method. This is # (at the time of this writing in 2021) based on the MIT ab-initio # implementation using a combination of Algorithm 680 for large |z| and # Algorithm 916 for the remainder fo C. It claims a relative accuracy of # 1e-13 across the whole of C and is thus suitable to check the accuracy # of the cerrf_q1 implementation which has a target accuracy of 10^{-10} # in the *absolute* error. for jj, y in enumerate(yy): for ii, x in enumerate(xx): z = x + 1.0j * y wz = wofz_scipy(z) wz_cmp_re[jj, ii] = wz.real wz_cmp_im[jj, ii] = wz.imag src_code = """ /*gpukern*/ void eval_cerrf_q1( const int n, /*gpuglmem*/ double const* /*restrict*/ re, /*gpuglmem*/ double const* /*restrict*/ im, /*gpuglmem*/ double* /*restrict*/ wz_re, /*gpuglmem*/ double* /*restrict*/ wz_im ) { int tid = 0; for( ; tid < n ; ++tid ) { //autovectorized if( tid < n ) { double const x = re[ tid ]; double const y = im[ tid ]; double wz_x, wz_y; cerrf_q1( x, y, &wz_x, &wz_y ); wz_re[ tid ] = wz_x; wz_im[ tid ] = wz_y; } } } """ kernel_descriptions = { "eval_cerrf_q1": xo.Kernel( args=[ xo.Arg(xo.Int32, name="n"), xo.Arg(xo.Float64, name="re", const=True, pointer=True), xo.Arg(xo.Float64, name="im", const=True, pointer=True), xo.Arg(xo.Float64, name="wz_re", pointer=True), xo.Arg(xo.Float64, name="wz_im", pointer=True), ], n_threads="n", ), } headers = [ _pkg_root.joinpath("headers/constants.h"), _pkg_root.joinpath("headers/sincos.h"), _pkg_root.joinpath("headers/power_n.h"), _pkg_root.joinpath( "fieldmaps/bigaussian_src/complex_error_function.h"), ] wz_re = np.arange(n_z, dtype=np.float64) wz_im = np.arange(n_z, dtype=np.float64) re_absc_dev = ctx.nparray_to_context_array(re_absc.reshape(n_z)) im_absc_dev = ctx.nparray_to_context_array(im_absc.reshape(n_z)) wz_re_dev = ctx.nparray_to_context_array(wz_re) wz_im_dev = ctx.nparray_to_context_array(wz_im) ctx.add_kernels(sources=[src_code], kernels=kernel_descriptions, extra_headers=headers) ctx.kernels.eval_cerrf_q1(n=n_z, re=re_absc_dev, im=im_absc_dev, wz_re=wz_re_dev, wz_im=wz_im_dev) wz_re = ctx.nparray_from_context_array(wz_re_dev).reshape(n_im, n_re) wz_im = ctx.nparray_from_context_array(wz_im_dev).reshape(n_im, n_re) d_abs_re = np.fabs(wz_re - wz_cmp_re) d_abs_im = np.fabs(wz_im - wz_cmp_im) # NOTE: target accuracy of cerrf_q1 is 0.5e-10 but the algorithm does # not converge to within target accuracy for all arguments in C, # especially close to the real axis. We therfore require that # d_abs_re.max(), d_abs_im.max() < 0.5e-9 assert d_abs_re.max() < 0.5e-9 assert d_abs_im.max() < 0.5e-9
_sigma_z = xo.Float64 _q_param = xo.Float64 _cq_param = xo.Float64 _beta_param = xo.Float64 _sqrt_beta_param = xo.Float64 _support_min = xo.Float64 _support_max = xo.Float64 LongitudinalProfileQGaussianData.extra_sources = [ _pkg_root.joinpath('longitudinal_profiles/qgaussian_src/qgaussian.h') ] LongitudinalProfileQGaussianData.custom_kernels = { 'line_density_qgauss': xo.Kernel(args=[ xo.Arg(LongitudinalProfileQGaussianData, name='prof'), xo.Arg(xo.Int64, name='n'), xo.Arg(xo.Float64, pointer=True, name='z'), xo.Arg(xo.Float64, pointer=True, name='res') ], n_threads='n') } class LongitudinalProfileQGaussian(xo.dress(LongitudinalProfileQGaussianData)): @staticmethod def cq_from_q(q, q_tol): cq = sqrt(pi) if q >= (1 + q_tol): cq *= gamma((3 - q) / (2 * q - 2)) cq /= sqrt((q - 1)) * gamma(1 / (q - 1))
# This file is part of the Xfields Package. # # Copyright (c) CERN, 2021. # # ########################################### # import numpy as np import xobjects as xo import xpart as xp from .interpolated import _configure_grid from ..general import _pkg_root _TriCubicInterpolatedFieldMap_kernels = { 'central_diff': xo.Kernel( args=[ xo.Arg(xo.Int32, pointer=False, name='nelem'), xo.Arg(xo.Int32, pointer=False, name='row_size'), xo.Arg(xo.Int32, pointer=False, name='stride_in_dbl'), xo.Arg(xo.Float64, pointer=False, name='factor'), xo.Arg(xo.Int8, pointer=True, name='matrix_buffer'), xo.Arg(xo.Int64, pointer=False, name='matrix_offset'), xo.Arg(xo.Int8, pointer=True, name='res_buffer'), xo.Arg(xo.Int64, pointer=False, name='res_offset'), ], n_threads='nelem' ), 'p2m_rectmesh3d_xparticles': xo.Kernel( args=[ xo.Arg(xo.Int32, pointer=False, name='nparticles'), xo.Arg(xp.Particles, pointer=False, name='particles'), xo.Arg(xo.Float64, pointer=False, name='x0'),
_extra_c_source = """ /*gpukern*/ void Prism_compute_volume(Prism pr){ Base base = Prism_getp_base(pr); double height = Prism_get_height(pr); double base_area = Base_compute_area(base, 3.); Prism_set_volume(pr, base_area*height); } """ context = xo.ContextCpu() context.add_kernels( kernels={ "Prism_compute_volume": xo.Kernel(args=[xo.Arg(Prism, name="prism")]) }) triangle = Triangle(b=2, h=3) prism_triangle = Prism(base=triangle, height=5) square = Square(a=2) prism_square = Prism(base=square, height=10) context.kernels.Prism_compute_volume(prism=prism_triangle) context.kernels.Prism_compute_volume(prism=prism_square) assert prism_triangle.volume == 45 assert prism_square.volume == 120 # OpenCL context = xo.ContextPyopencl()
class LongitudinalProfileQGaussian(xo.HybridClass): _xofields = { 'number_of_particles': xo.Float64, '_q_tol': xo.Float64, '_z0': xo.Float64, '_sigma_z': xo.Float64, '_q_param': xo.Float64, '_cq_param': xo.Float64, '_beta_param': xo.Float64, '_sqrt_beta_param': xo.Float64, '_support_min': xo.Float64, '_support_max': xo.Float64, } _extra_c_sources = [ _pkg_root.joinpath('longitudinal_profiles/qgaussian_src/qgaussian.h') ] _kernels = {'line_density_qgauss': xo.Kernel(args=[xo.Arg(xo.ThisClass, name='prof'), xo.Arg(xo.Int64, name='n'), xo.Arg(xo.Float64, pointer=True, name='z'), xo.Arg(xo.Float64, pointer=True, name='res')], n_threads='n')} @staticmethod def cq_from_q(q, q_tol): cq = sqrt(pi) if q >= (1 + q_tol): cq *= gamma((3 - q) / (2 * q - 2)) cq /= sqrt((q - 1)) * gamma(1 / (q - 1)) elif q <= (1 - q_tol): cq *= 2 * gamma(1 / (1 - q)) cq /= ( (3 - q) * sqrt(1 - q) * gamma((3 - q) / (2 - 2 * q)) ) return cq def __init__(self, _context=None, _buffer=None, _offset=None, _xobject=None, number_of_particles=None, sigma_z=None, z0=0., q_parameter=1., z_min = -1e10, z_max = 1e10, q_tol=1e-6): if _xobject is not None: self.xoinitialize(_context=_context, _buffer=_buffer, _offset=_offset, _xobject=_xobject) return assert number_of_particles is not None assert sigma_z is not None self.xoinitialize(_context=_context, _buffer=_buffer, _offset=_offset) self._z_min = z_min self._z_max = z_max self.number_of_particles = number_of_particles self.sigma_z = sigma_z self.z0 = z0 self.q_tol = q_tol self.q_parameter = q_parameter def _recompute_beta_param(self): self._beta_param = 1./(self.sigma_z*self.sigma_z*(5.-3.*self.q_parameter)) self._sqrt_beta_param = np.sqrt(self._beta_param) def _recompute_support(self): support_min = self._z_min support_max = self._z_max # Handle limited support if self.q_parameter < (1. - self.q_tol): rng = 1./sqrt(self.beta_param*(1-self.q_parameter)) allowed_min = self.z0 - rng allowed_max = self.z0 + rng if support_min < allowed_min: support_min = allowed_min if support_max > allowed_max: support_max = allowed_max self._support_min = support_min self._support_max = support_max @property def sigma_z(self): return self._sigma_z @sigma_z.setter def sigma_z(self, value): self._sigma_z = value self._recompute_beta_param() self._recompute_support() @property def z0(self): return self._z0 @z0.setter def z0(self, value): self._z0 = value self._recompute_support() @property def q_parameter(self): return self._q_param @q_parameter.setter def q_parameter(self, value): if value >= 5./3.: raise NotImplementedError self._q_param = value self._cq_param = self.__class__.cq_from_q(value, self.q_tol) self._recompute_beta_param() self._recompute_support() @property def q_tol(self): return self._q_tol @q_tol.setter def q_tol(self, value): self._q_tol = value self._recompute_support() @property def beta_param(self): return self._beta_param @property def z_min(self): return self._z_min @z_min.setter def z_min(self, value): self._z_min = value self._recompute_support() @property def z_max(self): return self._z_max @z_max.setter def z_max(self, value): self._z_max = value self._recompute_support() def line_density(self, z): context = self._buffer.context res = context.zeros(len(z), dtype=np.float64) if 'line_density_q_gauss' not in context.kernels.keys(): self.compile_kernels() context.kernels.line_density_qgauss(prof=self._xobject, n=len(z), z=z, res=res) return res
source = """ #include <math.h> //only_for_context cpu #define SQ(x) x*x /*gpukern*/ void length(/*gpugblmem*/ ArrNPoint pp, double* res){ /*interate over ii*/ for (int ii=1; ii<ArrNPoint_len1(pp); ii++){ res=sqrt(SQ(ArrNPoint_get_x(pp,ii-1)-ArrNPoint_get_x(pp,ii))+ SQ(ArrNPoint_get_y(pp,ii-1)-ArrNPoint_get_y(pp,ii))); /*iterate over ii*/} """ points = ArrNPoint(10) # instantiate Array for ii in range(len(points)): # fill Array points[ii].x = ii points[ii].y = ii ctx = xo.ContextCpu() kernels = { "length": xo.Kernel([xo.Arg(ArrNPoint, name="obj"), xo.Arg(xo.Float64[:])]) } ctx.add_kernels(sources=[source], kernels=kernels) res = np.zeros(len(points))
ms2 = MyStruct2(_buffer=ms._buffer, sr=ms, a=[0, 0, 0]) src = """ /*gpukern*/ void cp_sra_to_a(MyStruct2 ms, int64_t n){ for(int64_t ii=0; ii<n; ii++){ //vectorize_over ii n double const val = MyStruct2_get_sr_a(ms, ii); MyStruct2_set_a(ms, ii, val); }//end_vectorize } """ context.add_kernels( sources=[src], kernels={ "cp_sra_to_a": xo.Kernel( args=[xo.Arg(MyStruct2, name="ms"), xo.Arg(xo.Int64, name="n")], n_threads="n", ) }, ) context.kernels.cp_sra_to_a(ms=ms2, n=len(ms.a)) for vv, ww in zip(ms2.a, ms2.sr.a): assert vv == ww
class Ref(xo.UnionRef): _reftypes = (StructA, StructB) ArrNRef = Ref[:] ctx = xo.ContextCpu() ar = ArrNRef(5, _context=ctx) ar[0] = StructA(fa=3.1) ar[1] = StructB(fa=5) ks = { "kr": xo.Kernel([xo.Arg(ArrNRef, name="obj")]), } source = """ void kr(ArrNRef obj){ printf("hello\\n"); printf("typeid(ar[0])=%ld\\n", ArrNRef_typeid(obj,0)); printf("typeid(ar[1])=%ld\\n", ArrNRef_typeid(obj,1)); StructA s1 = (StructA) ArrNRef_member(obj,0); StructB s2 = (StructB) ArrNRef_member(obj,1); printf("ar[0].fa=%g\\n", StructA_get_fa(s1)); printf("ar[1].fa=%ld\\n", StructB_get_fa(s2)); }; """
def test_cerrf_all_quadrants(): x0 = 5.33 y0 = 4.29 num_args = 10000 if xo.ContextCpu not in available: return ctx = xo.ContextCpu(omp_num_threads=2) re_max = np.float64(np.sqrt(2.0) * x0) im_max = np.float64(np.sqrt(2.0) * y0) # Extending the sampled area symmetrically into Q3 and Q4 would # get the zeros of w(z) into the fold which are located close to the # first medians of these quadrants at Im(z) = \pm Re(z) for Re(z) > 1.99146 # # This would lead to a degradation in the accuracy by at least an order # of magnitude due to cancellation effects and could distort the test -> # By excluding anything with an imaginary part < -1.95, this should be on # the safe side. np.random.seed(20210811) im_min = np.float64(-1.95) re_min = -re_max re_absc = np.random.uniform(re_min, re_max, num_args) im_absc = np.random.uniform(im_min, im_max, num_args) wz_cmp_re = np.arange(num_args, dtype=np.float64) wz_cmp_im = np.arange(num_args, dtype=np.float64) # Create comparison data for veryfing the correctness of cerrf(). # Cf. the comments about scipy's wofz implementation in test_cerrf_q1() # for details! for ii, (x, y) in enumerate(zip(re_absc, im_absc)): wz = wofz_scipy(x + 1.0j * y) wz_cmp_re[ii] = wz.real wz_cmp_im[ii] = wz.imag src_code = """ /*gpukern*/ void eval_cerrf_all_quadrants( const int n, /*gpuglmem*/ double const* /*restrict*/ re, /*gpuglmem*/ double const* /*restrict*/ im, /*gpuglmem*/ double* /*restrict*/ wz_re, /*gpuglmem*/ double* /*restrict*/ wz_im ) { int tid = 0; for( ; tid < n ; ++tid ) { //autovectorized if( tid < n ) { double const x = re[ tid ]; double const y = im[ tid ]; double wz_x, wz_y; cerrf( x, y, &wz_x, &wz_y ); wz_re[ tid ] = wz_x; wz_im[ tid ] = wz_y; } } } """ kernel_descriptions = { "eval_cerrf_all_quadrants": xo.Kernel( args=[ xo.Arg(xo.Int32, name="n"), xo.Arg(xo.Float64, name="re", const=True, pointer=True), xo.Arg(xo.Float64, name="im", const=True, pointer=True), xo.Arg(xo.Float64, name="wz_re", pointer=True), xo.Arg(xo.Float64, name="wz_im", pointer=True), ], n_threads="n", ), } headers = [ _pkg_root.joinpath("headers/constants.h"), _pkg_root.joinpath("headers/sincos.h"), _pkg_root.joinpath("headers/power_n.h"), _pkg_root.joinpath( "fieldmaps/bigaussian_src/complex_error_function.h"), ] wz_re = np.arange(num_args, dtype=np.float64) wz_im = np.arange(num_args, dtype=np.float64) re_absc_dev = ctx.nparray_to_context_array(re_absc) im_absc_dev = ctx.nparray_to_context_array(im_absc) wz_re_dev = ctx.nparray_to_context_array(wz_re) wz_im_dev = ctx.nparray_to_context_array(wz_im) ctx.add_kernels(sources=[src_code], kernels=kernel_descriptions, extra_headers=headers) ctx.kernels.eval_cerrf_all_quadrants( n=num_args, re=re_absc_dev, im=im_absc_dev, wz_re=wz_re_dev, wz_im=wz_im_dev, ) wz_re = ctx.nparray_from_context_array(wz_re_dev) wz_im = ctx.nparray_from_context_array(wz_im_dev) d_abs_re = np.fabs(wz_re - wz_cmp_re) d_abs_im = np.fabs(wz_im - wz_cmp_im) assert d_abs_re.max() < 0.5e-9 assert d_abs_im.max() < 0.5e-9
class StructB(xo.Struct): fa = xo.Float64 fb = xo.Ref[xo.Int64[:]] ctx = xo.ContextCpu() sa = StructA(fa=3, fb=4, _context=ctx) sb = StructB(fa=3, _context=ctx) sb.fb = [1, 2, 3, 4] sa.fb[3] = 1 sb.fb[3] = 1 ks = { "ka": xo.Kernel([xo.Arg(StructA, name="obj")]), "kb": xo.Kernel([xo.Arg(StructB, name="obj")]), } source = """ void ka(StructA obj){ printf("hello\\n"); printf("fa=%g\\n", StructA_get_fa(obj)); printf("len(fb)=%ld\\n", StructA_len_fb(obj)); printf("fb[3]=%ld\\n", StructA_get_fb(obj,3)); }; void kb(StructB obj){ printf("hello\\n"); printf("fa=%g\\n", StructB_get_fa(obj)); printf("len(fb)=%ld\\n", StructB_len_fb(obj)); printf("fb[3]=%ld\\n", StructB_get_fb(obj,3));
double wz_x, wz_y; cerrf( x, y, &wz_x, &wz_y ); wz_re[ tid ] = wz_x; wz_im[ tid ] = wz_y; } } } """ kernel_descriptions = { "eval_cerrf_q1": xo.Kernel( args=[ xo.Arg(xo.Int32, name="n"), xo.Arg(xo.Float64, name="re", const=True, pointer=True), xo.Arg(xo.Float64, name="im", const=True, pointer=True), xo.Arg(xo.Float64, name="wz_re", pointer=True), xo.Arg(xo.Float64, name="wz_im", pointer=True), ], n_threads="n", ), } headers = [ _pkg_root.joinpath("headers/constants.h"), _pkg_root.joinpath("headers/sincos.h"), _pkg_root.joinpath("headers/power_n.h"), _pkg_root.joinpath("fieldmaps/bigaussian_src/complex_error_function.h"), ]
dphi_dx = xo.Float64[:] dphi_dy = xo.Float64[:] dphi_dz = xo.Float64[:] TriLinearInterpolatedFieldMapData.extra_sources = [ _pkg_root.joinpath('headers/constants.h'), _pkg_root.joinpath('fieldmaps/interpolated_src/central_diff.h'), _pkg_root.joinpath('fieldmaps/interpolated_src/linear_interpolators.h'), _pkg_root.joinpath('fieldmaps/interpolated_src/charge_deposition.h'), ] TriLinearInterpolatedFieldMapData.custom_kernels = { 'central_diff': xo.Kernel(args=[ xo.Arg(xo.Int32, pointer=False, name='nelem'), xo.Arg(xo.Int32, pointer=False, name='row_size'), xo.Arg(xo.Int32, pointer=False, name='stride_in_dbl'), xo.Arg(xo.Float64, pointer=False, name='factor'), xo.Arg(xo.Int8, pointer=True, name='matrix_buffer'), xo.Arg(xo.Int64, pointer=False, name='matrix_offset'), xo.Arg(xo.Int8, pointer=True, name='res_buffer'), xo.Arg(xo.Int64, pointer=False, name='res_offset'), ], n_threads='nelem'), 'p2m_rectmesh3d_xparticles': xo.Kernel(args=[ xo.Arg(xo.Int32, pointer=False, name='nparticles'), xo.Arg(xp.Particles.XoStruct, pointer=False, name='particles'), xo.Arg(xo.Float64, pointer=False, name='x0'), xo.Arg(xo.Float64, pointer=False, name='y0'),