def test_symbol_renaming(): """When two loops have assignments to the same symbol with different rhs and both are pulled before the loops, one of them has to be renamed """ f, g = ps.fields("f, g : double[2D]") a, b, c = [TypedSymbol(n, np.float64) for n in ('a', 'b', 'c')] loop1 = LoopOverCoordinate( Block( [SympyAssignment(c, a + b), SympyAssignment(g[0, 0], f[0, 0] + c)]), 0, 0, 10) loop2 = LoopOverCoordinate( Block([ SympyAssignment(c, a**2 + b**2), SympyAssignment(g[0, 0], f[0, 0] + c) ]), 0, 0, 10) block = Block([loop1, loop2]) move_constants_before_loop(block) loops = block.atoms(LoopOverCoordinate) assert len(loops) == 2 for loop in loops: assert len(loop.body.args) == 1 assert len(loop.parent.args) == 4 # 2 loops + 2 subexpressions assert loop.parent.args[0].lhs.name != loop.parent.args[1].lhs.name
def test_jacobi_variable_field_size(): size = (3, 3, 3) f = Field.create_generic("f", 3) d = Field.create_generic("d", 3) jacobi = SympyAssignment( d[0, 0, 0], (f[1, 0, 0] + f[-1, 0, 0] + f[0, 1, 0] + f[0, -1, 0]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): for z in range(1, size[2] - 1): dst_field_py[x, y, z] = 0.25 * ( src_field_py[x - 1, y, z] + src_field_py[x + 1, y, z] + src_field_py[x, y - 1, z] + src_field_py[x, y + 1, z]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13)
def test_staggered_iteration_manual(): dim = 2 f_arr = np.arange(5**dim).reshape([5] * dim) s_arr = np.ones([5] * dim + [dim]) * 1234 s_arr_ref = s_arr.copy() f = Field.create_from_numpy_array('f', f_arr) s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1) eqs = [] counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)] for d in range(dim): eq = SympyAssignment( s(d), sum(f[o] for o in offsets_in_plane(d, 0, dim)) - sum(f[o] for o in offsets_in_plane(d, -1, dim))) cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) eqs.append(Conditional(cond, eq)) kernel_ast = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)]) func = make_python_function(kernel_ast) func(f=f_arr, s=s_arr_ref) inner_loop = [ n for n in kernel_ast.atoms(ast.LoopOverCoordinate) if n.is_innermost_loop ][0] cut_loop(inner_loop, [4]) outer_loop = [ n for n in kernel_ast.atoms(ast.LoopOverCoordinate) if n.is_outermost_loop ][0] cut_loop(outer_loop, [4]) simplify_conditionals(kernel_ast.body, loop_counter_simplification=True) cleanup_blocks(kernel_ast.body) move_constants_before_loop(kernel_ast.body) cleanup_blocks(kernel_ast.body) assert not kernel_ast.atoms( Conditional), "Loop cutting optimization did not work" func_optimized = make_python_function(kernel_ast) func_optimized(f=f_arr, s=s_arr) np.testing.assert_almost_equal(s_arr_ref, s_arr)
def test_jacobi_fixed_field_size(): size = (30, 20) src_field_c = np.random.rand(*size) src_field_py = np.copy(src_field_c) dst_field_c = np.zeros(size) dst_field_py = np.zeros(size) f = Field.create_from_numpy_array("f", src_field_c) d = Field.create_from_numpy_array("d", dst_field_c) jacobi = SympyAssignment(d[0, 0], (f[1, 0] + f[-1, 0] + f[0, 1] + f[0, -1]) / 4) body = Block([jacobi]) loop_node, gl_info = make_loop_over_domain(body) ast_node = KernelFunction(loop_node, 'cpu', 'c', make_python_function, ghost_layers=gl_info) resolve_field_accesses(ast_node) move_constants_before_loop(ast_node) for x in range(1, size[0] - 1): for y in range(1, size[1] - 1): dst_field_py[ x, y] = 0.25 * (src_field_py[x - 1, y] + src_field_py[x + 1, y] + src_field_py[x, y - 1] + src_field_py[x, y + 1]) kernel = ast_node.compile() kernel(f=src_field_c, d=dst_field_c) error = np.sum(np.abs(dst_field_py - dst_field_c)) np.testing.assert_allclose(error, 0.0, atol=1e-13) code_display = show_code(ast_node) assert 'for' in str(code_display) assert 'for' in code_display._repr_html_()
def create_staggered_kernel(staggered_field, expressions, subexpressions=(), target='cpu', gpu_exclusive_conditions=False, **kwargs): """Kernel that updates a staggered field. .. image:: /img/staggered_grid.svg Args: staggered_field: field where the first index coordinate defines the location of the staggered value can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location a vector is stored, expressions parameter has to be a sequence of sequences then where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell boundary and ``f[0,0](1)`` the southern cell boundary etc. expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary should be updated. subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions target: 'cpu' or 'gpu' gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed Returns: AST, see `create_kernel` """ assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs assert staggered_field.index_dimensions in ( 1, 2), 'Staggered field must have one or two index dimensions' dim = staggered_field.spatial_dimensions counters = [ LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim) ] conditions = [ counters[i] < staggered_field.shape[i] - 1 for i in range(dim) ] assert len(expressions) == dim if staggered_field.index_dimensions == 2: assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \ "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \ "same length." final_assignments = [] last_conditional = None def add(condition, dimensions, as_else_block=False): nonlocal last_conditional if staggered_field.index_dimensions == 1: assignments = [ Assignment(staggered_field(d), expressions[d]) for d in dimensions ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered( [staggered_field(d) for d in dimensions]) elif staggered_field.index_dimensions == 2: assert staggered_field.has_fixed_index_shape assignments = [ Assignment(staggered_field(d, i), expr) for d in dimensions for i, expr in enumerate(expressions[d]) ] a_coll = AssignmentCollection(assignments, list(subexpressions)) a_coll = a_coll.new_filtered([ staggered_field(d, i) for i in range(staggered_field.index_shape[1]) for d in dimensions ]) sp_assignments = [ SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments ] if as_else_block and last_conditional: new_cond = Conditional(condition, Block(sp_assignments)) last_conditional.false_block = Block([new_cond]) last_conditional = new_cond else: last_conditional = Conditional(condition, Block(sp_assignments)) final_assignments.append(last_conditional) if target == 'cpu' or not gpu_exclusive_conditions: for d in range(dim): cond = sp.And(*[conditions[i] for i in range(dim) if d != i]) add(cond, [d]) elif target == 'gpu': full_conditions = [ sp.And(*[conditions[i] for i in range(dim) if d != i]) for d in range(dim) ] for include in itertools.product(*[[1, 0]] * dim): case_conditions = sp.And(*[ c if value else sp.Not(c) for c, value in zip(full_conditions, include) ]) dimensions_to_include = [i for i in range(dim) if include[i]] if dimensions_to_include: add(case_conditions, dimensions_to_include, True) ghost_layers = [(1, 0)] * dim blocking = kwargs.get('cpu_blocking', None) if blocking: del kwargs['cpu_blocking'] cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None) if cpu_vectorize_info: del kwargs['cpu_vectorize_info'] openmp = kwargs.get('cpu_openmp', None) if openmp: del kwargs['cpu_openmp'] ast = create_kernel(final_assignments, ghost_layers=ghost_layers, target=target, **kwargs) if target == 'cpu': remove_conditionals_in_staggered_kernel(ast) move_constants_before_loop(ast) omp_collapse = None if blocking: omp_collapse = loop_blocking(ast, blocking) if openmp: from pystencils.cpu import add_openmp add_openmp(ast, num_threads=openmp, collapse=omp_collapse, assume_single_outer_loop=False) if cpu_vectorize_info is True: vectorize(ast) elif isinstance(cpu_vectorize_info, dict): vectorize(ast, **cpu_vectorize_info) return ast
def create_kernel(assignments: AssignmentOrAstNodeList, function_name: str = "kernel", type_info='double', split_groups=(), iteration_slice=None, ghost_layers=None, skip_independence_check=False) -> KernelFunction: """Creates an abstract syntax tree for a kernel function, by taking a list of update rules. Loops are created according to the field accesses in the equations. Args: assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`. Defining the update rules of the kernel function_name: name of the generated function - only important if generated code is written out type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to be of type 'double' except symbols which occur on the left hand side of equations where the right hand side is a sympy Boolean which are assumed to be 'bool' . split_groups: Specification on how to split up inner loop into multiple loops. For details see transformation :func:`pystencils.transformation.split_inner_loop` iteration_slice: if not None, iteration is done only over this slice of the field ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers if None, the number of ghost layers is determined automatically and assumed to be equal for a all dimensions skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for periodicity kernel, that access the field outside the iteration bounds. Use with care! Returns: AST node representing a function, that can be printed as C or CUDA code """ def type_symbol(term): if isinstance(term, Field.Access) or isinstance(term, TypedSymbol): return term elif isinstance(term, sp.Symbol): if not hasattr(type_info, '__getitem__'): return TypedSymbol(term.name, create_type(type_info)) else: return TypedSymbol(term.name, type_info[term.name]) else: raise ValueError("Term has to be field access or symbol") fields_read, fields_written, assignments = add_types( assignments, type_info, not skip_independence_check) all_fields = fields_read.union(fields_written) read_only_fields = set([f.name for f in fields_read - fields_written]) buffers = set([f for f in all_fields if FieldType.is_buffer(f)]) fields_without_buffers = all_fields - buffers body = ast.Block(assignments) loop_order = get_optimal_loop_ordering(fields_without_buffers) loop_node, ghost_layer_info = make_loop_over_domain( body, iteration_slice=iteration_slice, ghost_layers=ghost_layers, loop_order=loop_order) ast_node = KernelFunction(loop_node, 'cpu', 'c', compile_function=make_python_function, ghost_layers=ghost_layer_info, function_name=function_name) if split_groups: typed_split_groups = [[type_symbol(s) for s in split_group] for split_group in split_groups] split_inner_loop(ast_node, typed_split_groups) base_pointer_spec = [['spatialInner0'], ['spatialInner1'] ] if len(loop_order) >= 2 else [['spatialInner0']] base_pointer_info = { field.name: parse_base_pointer_info(base_pointer_spec, loop_order, field.spatial_dimensions, field.index_dimensions) for field in fields_without_buffers } buffer_base_pointer_info = { field.name: parse_base_pointer_info([['spatialInner0']], [0], field.spatial_dimensions, field.index_dimensions) for field in buffers } base_pointer_info.update(buffer_base_pointer_info) if any(FieldType.is_buffer(f) for f in all_fields): resolve_buffer_accesses(ast_node, get_base_buffer_index(ast_node), read_only_fields) resolve_field_accesses(ast_node, read_only_fields, field_to_base_pointer_info=base_pointer_info) move_constants_before_loop(ast_node) return ast_node
def create_indexed_kernel( assignments: AssignmentOrAstNodeList, index_fields, function_name="kernel", type_info=None, coordinate_names=('x', 'y', 'z')) -> KernelFunction: """ Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling. The coordinates are stored in a separate index_field, which is a one dimensional array with struct data type. This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the 'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for example boundary parameters. Args: assignments: list of assignments index_fields: list of index fields, i.e. 1D fields with struct data type type_info: see documentation of :func:`create_kernel` function_name: see documentation of :func:`create_kernel` coordinate_names: name of the coordinate fields in the struct data type """ fields_read, fields_written, assignments = add_types( assignments, type_info, check_independence_condition=False) all_fields = fields_read.union(fields_written) for index_field in index_fields: index_field.field_type = FieldType.INDEXED assert FieldType.is_indexed(index_field) assert index_field.spatial_dimensions == 1, "Index fields have to be 1D" non_index_fields = [f for f in all_fields if f not in index_fields] spatial_coordinates = {f.spatial_dimensions for f in non_index_fields} assert len( spatial_coordinates ) == 1, "Non-index fields do not have the same number of spatial coordinates" spatial_coordinates = list(spatial_coordinates)[0] def get_coordinate_symbol_assignment(name): for idx_field in index_fields: assert isinstance( idx_field.dtype, StructType), "Index fields have to have a struct data type" data_type = idx_field.dtype if data_type.has_element(name): rhs = idx_field[0](name) lhs = TypedSymbol(name, BasicType(data_type.get_element_type(name))) return SympyAssignment(lhs, rhs) raise ValueError( "Index %s not found in any of the passed index fields" % (name, )) coordinate_symbol_assignments = [ get_coordinate_symbol_assignment(n) for n in coordinate_names[:spatial_coordinates] ] coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments] assignments = coordinate_symbol_assignments + assignments # make 1D loop over index fields loop_body = Block([]) loop_node = LoopOverCoordinate(loop_body, coordinate_to_loop_over=0, start=0, stop=index_fields[0].shape[0]) for assignment in assignments: loop_body.append(assignment) function_body = Block([loop_node]) ast_node = KernelFunction(function_body, "cpu", "c", make_python_function, ghost_layers=None, function_name=function_name) fixed_coordinate_mapping = { f.name: coordinate_typed_symbols for f in non_index_fields } read_only_fields = set([f.name for f in fields_read - fields_written]) resolve_field_accesses(ast_node, read_only_fields, field_to_fixed_coordinates=fixed_coordinate_mapping) move_constants_before_loop(ast_node) return ast_node