def _compile_macroscopic_setter_and_getter(self): lb_method = self.method cqc = lb_method.conserved_quantity_computation pdf_field = self._data_handling.fields[self._pdf_arr_name] rho_field = self._data_handling.fields[self.density_data_name] rho_field = rho_field.center if self.density_data_index is None else rho_field( self.density_data_index) vel_field = self._data_handling.fields[self.velocity_data_name] getter_eqs = cqc.output_equations_from_pdfs(pdf_field.center_vector, { 'density': rho_field, 'velocity': vel_field }) getter_kernel = create_kernel( getter_eqs, target=Target.CPU, cpu_openmp=self._config.cpu_openmp).compile() setter_eqs = pdf_initialization_assignments(lb_method, rho_field, vel_field.center_vector, pdf_field.center_vector) setter_eqs = create_simplification_strategy(lb_method)(setter_eqs) setter_kernel = create_kernel( setter_eqs, target=Target.CPU, cpu_openmp=self._config.cpu_openmp).compile() return getter_kernel, setter_kernel
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None): if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni': pytest.xfail('AES not yet implemented for this architecture') cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target} dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU) f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2, dtype=np.float32 if dtype == 'float' else np.float64, alignment=True) dh.fill(f.name, 42.0) ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kwargs = {'time_step': t} if offset_values is not None: kwargs.update({k.name: v for k, v in zip(offsets, offset_values)}) dh.run_kernel(kernel, **kwargs) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)] kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() dh.run_kernel(kernel, **kwargs) ref_data = dh.gather_array(ref.name) data = dh.gather_array(f.name) assert np.allclose(ref_data, data)
def test_tensorflow_jit_cpu(): pytest.importorskip('tensorflow') module_name = "Ololol" target = 'cpu' z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])}) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward_jit' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward_jit' module = TensorflowModule(module_name, [forward_ast, backward_ast]) lib = pystencils_autodiff.tensorflow_jit.compile_sources_and_load( [str(module)]) assert 'call_forward_jit' in dir(lib) assert 'call_backward_jit' in dir(lib) lib = module.compile() assert 'call_forward_jit' in dir(lib) assert 'call_backward_jit' in dir(lib)
def test_full_scalar_field(): """Tests fully (un)packing a scalar field (from)to a buffer.""" fields = _generate_fields() for (src_arr, dst_arr, buffer_arr) in fields: src_field = Field.create_from_numpy_array("src_field", src_arr) dst_field = Field.create_from_numpy_array("dst_field", dst_arr) buffer = Field.create_generic("buffer", spatial_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [Assignment(buffer.center(), src_field.center())] pack_code = create_kernel(pack_eqs, data_type={ 'src_field': src_arr.dtype, 'buffer': buffer.dtype }) pack_kernel = pack_code.compile() pack_kernel(buffer=buffer_arr, src_field=src_arr) unpack_eqs = [Assignment(dst_field.center(), buffer.center())] unpack_code = create_kernel(unpack_eqs, data_type={ 'dst_field': dst_arr.dtype, 'buffer': buffer.dtype }) unpack_kernel = unpack_code.compile() unpack_kernel(dst_field=dst_arr, buffer=buffer_arr) np.testing.assert_equal(src_arr, dst_arr)
def test_strided(instruction_set, dtype): f, g = ps.fields(f"f, g : float{64 if dtype == 'double' else 32}[2D]") update_rule = [ ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0) ] if 'storeS' not in get_vector_instruction_set( dtype, instruction_set) and not instruction_set in [ 'avx512', 'rvv' ] and not instruction_set.startswith('sve'): with pytest.warns(UserWarning) as warn: config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) assert 'Could not vectorize loop' in warn[0].message.args[0] else: with pytest.warns(None) as warn: config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) assert len(warn) == 0 func = ast.compile() ref_func = ps.create_kernel(update_rule).compile() arr = np.random.random( (23 + 2, 17 + 2)).astype(np.float64 if dtype == 'double' else np.float32) dst = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) ref = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) func(g=dst, f=arr) ref_func(g=ref, f=arr) np.testing.assert_almost_equal(dst, ref, 13 if dtype == 'double' else 5)
def test_dynamic_matrix_location_dependent(): try: from pystencils.data_types import TypedMatrixSymbol except ImportError: import pytest pytest.skip() x, y = pystencils.fields('x, y: float32[3d]') A = TypedMatrixSymbol('A', 3, 1, create_type('double'), CustomCppType('Vector3<double>')) my_fun_call = DynamicFunction( TypedSymbol('my_fun', 'std::function<Vector3<double>(int, int, int)>'), A.dtype, *pystencils.x_vector(3)) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter()) my_fun_call = DynamicFunction( TypedSymbol('my_fun', TemplateType('Functor_T')), A.dtype, *pystencils.x_vector(3)) assignments = pystencils.AssignmentCollection({ A: my_fun_call, y.center: A[0] + A[1] + A[2] }) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
def test_logical_operators(instruction_set=instruction_set): arr = np.zeros((22, 22)) @ps.kernel def kernel_and(s): f, g = ps.fields(f=arr, g=arr) s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0) g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True]) ast = ps.create_kernel(kernel_and) vectorize(ast, instruction_set=instruction_set) ast.compile() @ps.kernel def kernel_or(s): f, g = ps.fields(f=arr, g=arr) s.c @= sp.Or(f[0, 1] < 0.0, f[1, 0] < 0.0) g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True]) ast = ps.create_kernel(kernel_or) vectorize(ast, instruction_set=instruction_set) ast.compile() @ps.kernel def kernel_equal(s): f, g = ps.fields(f=arr, g=arr) s.c @= sp.Eq(f[0, 1], 2.0) g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True]) ast = ps.create_kernel(kernel_equal) vectorize(ast, instruction_set=instruction_set) ast.compile()
def test_native_tensorflow_compilation_cpu(): tf = pytest.importorskip('tensorflow') module_name = "Ololol" target = 'cpu' z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection({ z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0]) }) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward' module = TensorflowModule(module_name, [forward_ast, backward_ast]) print(module) # temp_file = write_cached_content(str(module), '.cpp') # command = ['c++', '-fPIC', temp_file, '-O2', '-shared', # '-o', 'foo.so'] + compile_flags + link_flags + extra_flags # print(command) # subprocess.check_call(command, env=_compile_env) lib = module.compile() assert 'call_forward' in dir(lib) assert 'call_backward' in dir(lib)
def test_pybind11_compilation_cpu(with_python_bindings): pytest.importorskip('pybind11') pytest.importorskip('cppimport') module_name = "Olololsada" target = 'cpu' z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])}) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward' module = PybindModule(module_name, [forward_ast, backward_ast], with_python_bindings=with_python_bindings) print(module) if with_python_bindings: pybind_extension = module.compile() assert pybind_extension is not None assert 'call_forward' in dir(pybind_extension) assert 'call_backward' in dir(pybind_extension)
def test_advection(dim): L = (8, ) * dim dh = ps.create_data_handling(L, periodicity=True, default_target=ps.Target.CPU) c = dh.add_array('c', values_per_cell=1) j = dh.add_array('j', values_per_cell=3**dh.dim // 2, field_type=ps.FieldType.STAGGERED_FLUX) u = dh.add_array('u', values_per_cell=dh.dim) dh.cpu_arrays[c.name][:] = (np.random.random([l + 2 for l in L])) dh.cpu_arrays[u.name][:] = (np.random.random([l + 2 for l in L] + [dim]) - 0.5) / 5 vof1 = ps.create_kernel(ps.fd.VOF(j, u, c)).compile() dh.fill(j.name, np.nan, ghost_layers=True) dh.run_kernel(vof1) j1 = dh.gather_array(j.name).copy() vof2 = ps.create_kernel(VOF2(j, u, c, simplify=False)).compile() dh.fill(j.name, np.nan, ghost_layers=True) dh.run_kernel(vof2) j2 = dh.gather_array(j.name) assert np.allclose(j1, j2)
def test_sqrt_of_integer(): """Regression test for bug where sqrt(3) was classified as integer""" f = ps.fields("f: [1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_double = np.array([1], dtype=np.float64) kernel = ps.create_kernel(assignments).compile() kernel(f=arr_double) assert 1.7 < arr_double[0] < 1.8 f = ps.fields("f: float32[1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_single = np.array([1], dtype=np.float32) config = ps.CreateKernelConfig(data_type="float32") kernel = ps.create_kernel(assignments, config=config).compile() kernel(f=arr_single) code = ps.get_code_str(kernel.ast) # ps.show_code(kernel.ast) # 1.7320508075688772935 --> it is actually correct to round to ...773. This was wrong before !282 assert "1.7320508075688773f" in code assert 1.7 < arr_single[0] < 1.8
def test_torch_native_compilation_cpu(): from torch.utils.cpp_extension import load module_name = "Ololol" target = 'cpu' z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])}) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward' module = TorchModule(module_name, [forward_ast, backward_ast]) print(module) temp_file = write_cached_content(str(module), '.cpp') torch_extension = load(module_name, [temp_file]) assert torch_extension is not None assert 'call_forward' in dir(torch_extension) assert 'call_backward' in dir(torch_extension) torch_extension = module.compile() assert torch_extension is not None assert 'call_forward' in dir(torch_extension) assert 'call_backward' in dir(torch_extension)
def test_reproducability(): from sympy.core.cache import clear_cache output_0 = None for i in range(10): module_name = "Ololol" target = 'cpu' z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])}) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward' new_output = str(TorchModule(module_name, [forward_ast, backward_ast])) TorchModule(module_name, [forward_ast, backward_ast]).compile() clear_cache() if not output_0: output_0 = new_output assert output_0 == new_output
def test_fixed_constant_bh(num_ghost_layers): ndim = 2 offsets = list(itertools.product(range(num_ghost_layers + 1), repeat=ndim)) x, y = pystencils.fields(f'x, y: float64[{ndim}d]') assignments = pystencils.AssignmentCollection({ y.center: sp.Add(*[x.__getitem__(o) for o in offsets]) / len(offsets) }) kernel = pystencils.create_kernel(assignments).compile() print(kernel.code) bh_assignments = add_fixed_constant_boundary_handling( assignments, num_ghost_layers) bh_kernel = pystencils.create_kernel(bh_assignments, ghost_layers=0).compile() print(bh_kernel.code) noise = np.random.rand(*[20, 30, 40][:ndim]) out1 = np.zeros_like(noise) out2 = np.zeros_like(noise) kernel(x=noise, y=out1) bh_kernel(x=noise, y=out2)
def test_module_printing_parameter(): module_name = "Ololol" for target in ('cpu', 'gpu'): z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection( {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])}) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward' module = TorchModule(module_name, [forward_ast, backward_ast]) print(module) module = TensorflowModule(module_name, {forward_ast: backward_ast}) print(module) if target == 'cpu': module = PybindModule(module_name, [forward_ast, backward_ast]) print(module) module = PybindModule(module_name, forward_ast) print(module)
def test_kernel_decorator_config(): config = ps.CreateKernelConfig() a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100)) @ps.kernel_config(config) def test(): a[0] @= b[0] + c[0] ps.create_kernel(**test)
def test_native_tensorflow_compilation_gpu(): tf = pytest.importorskip('tensorflow') module_name = "Ololol" target = 'gpu' z, y, x = pystencils.fields("z, y, x: [20,40]") a = sympy.Symbol('a') forward_assignments = pystencils.AssignmentCollection({ z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0]) }) backward_assignments = create_backward_assignments(forward_assignments) forward_ast = pystencils.create_kernel(forward_assignments, target) forward_ast.function_name = 'forward2' backward_ast = pystencils.create_kernel(backward_assignments, target) backward_ast.function_name = 'backward2' module = TensorflowModule(module_name, [forward_ast, backward_ast]) print(str(module)) # temp_file = write_cached_content(str(module), '.cu') # if 'tensorflow_host_compiler' not in get_compiler_config(): # get_compiler_config()['tensorflow_host_compiler'] = get_compiler_config()['command'] # # on my machine g++-6 and clang-7 are working # # '-ccbin', # # 'g++-6', # command = ['nvcc', # temp_file.name, # '--expt-relaxed-constexpr', # '-ccbin', # get_compiler_config()['tensorflow_host_compiler'], # '-std=c++14', # '-x', # 'cu', # '-Xcompiler', # '-fPIC', # '-c', # '-o', # 'foo_gpu.o'] + compile_flags + extra_flags # subprocess.check_call(command) # command = ['c++', '-fPIC', 'foo_gpu.o', # '-shared', '-o', 'foo_gpu.so'] + link_flags # subprocess.check_call(command) lib = module.compile() assert 'call_forward2' in dir(lib) # assert 'call_backward2' in dir(lib)
def generate_lattice_model(generation_context, class_name, collision_rule, refinement_scaling=None, **create_kernel_params): # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial # coordinates should be ordered in reverse direction i.e. zyx is_float = not generation_context.double_accuracy dtype = np.float32 if is_float else np.float64 lb_method = collision_rule.method q = len(lb_method.stencil) dim = lb_method.dim create_kernel_params = default_create_kernel_parameters( generation_context, create_kernel_params) if create_kernel_params['target'] == 'gpu': raise ValueError( "Lattice Models can only be generated for CPUs. To generate LBM on GPUs use sweeps directly" ) src_field = ps.Field.create_generic('pdfs', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q, )) dst_field = ps.Field.create_generic('pdfs_tmp', dim, dtype, index_dimensions=1, layout='fzyx', index_shape=(q, )) stream_collide_update_rule = create_lbm_kernel( collision_rule, src_field, dst_field, StreamPullTwoFieldsAccessor()) stream_collide_ast = create_kernel(stream_collide_update_rule, **create_kernel_params) stream_collide_ast.function_name = 'kernel_streamCollide' collide_update_rule = create_lbm_kernel(collision_rule, src_field, dst_field, CollideOnlyInplaceAccessor()) collide_ast = create_kernel(collide_update_rule, **create_kernel_params) collide_ast.function_name = 'kernel_collide' stream_update_rule = create_stream_pull_only_kernel( lb_method.stencil, None, 'pdfs', 'pdfs_tmp', 'fzyx', dtype) stream_ast = create_kernel(stream_update_rule, **create_kernel_params) stream_ast.function_name = 'kernel_stream' __lattice_model(generation_context, class_name, lb_method, stream_collide_ast, collide_ast, stream_ast, refinement_scaling)
def test_fixed_size_mismatch_check(): """Create kernel with two differently sized but constant fields """ src = np.zeros((20, 21, 9)) dst = np.zeros((21, 21, 9)) sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1) sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1) update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2)) with pytest.raises(ValueError) as e: create_kernel([update_rule]) assert 'Differently sized field accesses' in str(e.value)
def test_timeloop(): dh = create_data_handling(domain_size=(2, 2), periodicity=True) pre = dh.add_array('pre_run_field', values_per_cell=1) dh.fill("pre_run_field", 0.0, ghost_layers=True) f = dh.add_array('field', values_per_cell=1) dh.fill("field", 0.0, ghost_layers=True) post = dh.add_array('post_run_field', values_per_cell=1) dh.fill("post_run_field", 0.0, ghost_layers=True) single_step = dh.add_array('single_step_field', values_per_cell=1) dh.fill("single_step_field", 0.0, ghost_layers=True) pre_assignments = Assignment(pre.center, pre.center + 1) pre_kernel = create_kernel(pre_assignments).compile() assignments = Assignment(f.center, f.center + 1) kernel = create_kernel(assignments).compile() post_assignments = Assignment(post.center, post.center + 1) post_kernel = create_kernel(post_assignments).compile() single_step_assignments = Assignment(single_step.center, single_step.center + 1) single_step_kernel = create_kernel(single_step_assignments).compile() fixed_steps = 2 timeloop = TimeLoop(steps=fixed_steps) assert timeloop.fixed_steps == fixed_steps def pre_run(): dh.run_kernel(pre_kernel) def post_run(): dh.run_kernel(post_kernel) def single_step_run(): dh.run_kernel(single_step_kernel) timeloop.add_pre_run_function(pre_run) timeloop.add_post_run_function(post_run) timeloop.add_single_step_function(single_step_run) timeloop.add_call(kernel, {'field': dh.cpu_arrays["field"]}) # the timeloop is initialised with 2 steps. This means a single time step consists of two steps. # Therefore, we have 2 main iterations and one single step iteration in this configuration timeloop.run(time_steps=5) assert np.all(dh.cpu_arrays["pre_run_field"] == 1.0) assert np.all(dh.cpu_arrays["field"] == 2.0) assert np.all(dh.cpu_arrays["single_step_field"] == 1.0) assert np.all(dh.cpu_arrays["post_run_field"] == 1.0) seconds = 2 start = time.perf_counter() timeloop.run_time_span(seconds=seconds) end = time.perf_counter() np.testing.assert_almost_equal(seconds, end - start, decimal=2)
def test_fixed_and_variable_field_check(): """Create kernel with two variable sized fields - calling them with different sizes""" src = np.zeros((20, 21, 9)) sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1) sym_dst = Field.create_generic("dst", spatial_dimensions=2, index_dimensions=1) update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2)) with pytest.raises(ValueError) as e: create_kernel(update_rule) assert 'Mixing fixed-shaped and variable-shape fields' in str(e.value)
def test_loop_independence_checks(): f, g = fields("f, g : double[2D]") v = fields("v(2) : double[2D]") with pytest.raises(ValueError) as e: create_kernel( [Assignment(g[0, 1], f[0, 1]), Assignment(g[0, 0], f[1, 0])]) assert 'Field g is written at two different locations' in str(e.value) # This is allowed - because only one element of g is accessed create_kernel( [Assignment(g[0, 2], f[0, 1]), Assignment(g[0, 2], 2 * g[0, 2])]) create_kernel([ Assignment(v[0, 2](1), f[0, 1]), Assignment(v[0, 1](0), 4), Assignment(v[0, 2](1), 2 * v[0, 2](1)) ]) with pytest.raises(ValueError) as e: create_kernel( [Assignment(g[0, 1], 3), Assignment(f[0, 1], 2 * g[0, 2])]) assert 'Field g is read at (0, 2) and written at (0, 1)' in str(e.value)
def test_subset_cell_values(): """Tests (un)packing a subset of cell values of the a field (from)to a buffer.""" num_cell_values = 19 # Cell indices of the field to be (un)packed (from)to the buffer cell_indices = [1, 5, 7, 8, 10, 12, 13] fields = _generate_fields(num_directions=num_cell_values) for (src_arr, dst_arr, bufferArr) in fields: src_field = Field.create_from_numpy_array("src_field", src_arr, index_dimensions=1) dst_field = Field.create_from_numpy_array("dst_field", dst_arr, index_dimensions=1) buffer = Field.create_generic("buffer", spatial_dimensions=1, index_dimensions=1, field_type=FieldType.BUFFER, dtype=src_arr.dtype) pack_eqs = [] # Since we are packing all cell values for all cells, then # the buffer index is equivalent to the field index for buffer_idx, cell_idx in enumerate(cell_indices): eq = Assignment(buffer(buffer_idx), src_field(cell_idx)) pack_eqs.append(eq) pack_code = create_kernel(pack_eqs, data_type={ 'src_field': src_arr.dtype, 'buffer': buffer.dtype }) pack_kernel = pack_code.compile() pack_kernel(buffer=bufferArr, src_field=src_arr) unpack_eqs = [] for buffer_idx, cell_idx in enumerate(cell_indices): eq = Assignment(dst_field(cell_idx), buffer(buffer_idx)) unpack_eqs.append(eq) unpack_code = create_kernel(unpack_eqs, data_type={ 'dst_field': dst_arr.dtype, 'buffer': buffer.dtype }) unpack_kernel = unpack_code.compile() unpack_kernel(buffer=bufferArr, dst_field=dst_arr) mask_arr = np.ma.masked_where((src_arr - dst_arr) != 0, src_arr) np.testing.assert_equal(dst_arr, mask_arr.filled(int(0)))
def test_address_of_with_cse(): x, y = pystencils.fields('x,y: int64[2d]') s = pystencils.TypedSymbol('s', PointerType(create_type('int64'))) assignments = pystencils.AssignmentCollection({ y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + s, x[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + 1 }, {}) ast = pystencils.create_kernel(assignments) pystencils.show_code(ast) assignments_cse = sympy_cse(assignments) ast = pystencils.create_kernel(assignments_cse) pystencils.show_code(ast)
def pystencils_2d_cpu_impl(x, y, coef, N, I=1): if x.dtype == np.dtype('f4'): src, dst = ps.fields( 'src, dst: float32[2D]', src=x, dst=y ) elif x.dtype == np.dtype('f8'): src, dst = ps.fields( 'src, dst: double[2D]', src=x, dst=y ) else: raise TypeError if N == 1: update_rule = make_2d_update_rule_1(src, dst, coef) elif N == 6: update_rule = make_2d_update_rule_6(src, dst, coef) else: raise ValueError kernel = ps.create_kernel(update_rule, cpu_openmp=True).compile() s = time.time() for i in range(I): if i % 2 == 0: kernel(src=x, dst=y) else: kernel(src=y, dst=x) e = time.time() if (I - 1) % 2 == 0: res = y else: res = x return e - s, res
def test_vectorization_fixed_size(): configurations = [] # Fixed size - multiple of four arr = np.ones((20 + 2, 24 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) configurations.append((arr, f, g)) # Fixed size - no multiple of four arr = np.ones((21 + 2, 25 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) configurations.append((arr, f, g)) # Fixed size - different remainder arr = np.ones((23 + 2, 17 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) configurations.append((arr, f, g)) for arr, f, g in configurations: update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)] ast = ps.create_kernel(update_rule) vectorize(ast) func = ast.compile() dst = np.zeros_like(arr) func(g=dst, f=arr) np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
def test_sum_use_float(): sum = sympy.Sum(k, (k, 1, 100)) expanded_sum = sum.doit() print(sum) print(expanded_sum) x = pystencils.fields('x: float32[1d]') assignments = pystencils.AssignmentCollection({x.center(): sum}) ast = pystencils.create_kernel(assignments, data_type=create_type('float32')) code = str(pystencils.show_code(ast)) kernel = ast.compile() print(code) print(pystencils.show_code(ast)) assert 'float sum' in code array = np.zeros((10, ), np.float32) kernel(x=array) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
def test_vec_all(instruction_set, dtype): if instruction_set in ['sve', 'rvv']: width = 1000 # we don't know the actual value, need something guaranteed larger than vector else: width = get_vector_instruction_set(dtype, instruction_set)['width'] data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:9, 1:3 * width - 1] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ Conditional(vec_all(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) if instruction_set in ['sve', 'rvv']: # we only know that some values in the middle have been replaced assert np.all(data_arr[3:9, :2] <= 1.0) assert np.any(data_arr[3:9, 2:] == 2.0) else: np.testing.assert_equal(data_arr[3:9, :1], 0.0) np.testing.assert_equal(data_arr[3:9, 1:width], 1.0) np.testing.assert_equal(data_arr[3:9, width:2 * width], 2.0) np.testing.assert_equal(data_arr[3:9, 2 * width:3 * width - 1], 1.0) np.testing.assert_equal(data_arr[3:9, 3 * width - 1:], 0.0)
def test_vec_any(instruction_set, dtype): if instruction_set in ['sve', 'rvv']: width = 4 # we don't know the actual value else: width = get_vector_instruction_set(dtype, instruction_set)['width'] data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:9, 1:3 * width - 1] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)), Conditional(vec_any(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=ps.Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) if instruction_set in ['sve', 'rvv']: # we only know that the first value has changed np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0) else: np.testing.assert_equal(data_arr[3:9, :3 * width], 2.0)
def test_prod_var_limit(): k = pystencils.TypedSymbol('k', create_type('int64')) limit = pystencils.TypedSymbol('limit', create_type('int64')) sum = sympy.Sum(k, (k, 1, limit)) expanded_sum = sum.replace(limit, 100).doit() print(sum) print(expanded_sum) x = pystencils.fields('x: int64[1d]') assignments = pystencils.AssignmentCollection({x.center(): sum}) ast = pystencils.create_kernel(assignments) code = str(pystencils.show_code(ast)) kernel = ast.compile() print(code) array = np.zeros((10, ), np.int64) kernel(x=array, limit=100) assert np.allclose(array, int(expanded_sum) * np.ones_like(array))