def test_match_for_assignment_collection(): x, y = pystencils.fields('x, y: float32[3d]') a, b, c, d = sp.symbols('a, b, c, d') assignments = pystencils.AssignmentCollection({ a: sp.floor(1), b: 2, c: a + c, y.center(): sp.ceiling(x.center()) + sp.floor(x.center()) }) w1 = sp.Wild('w1') w2 = sp.Wild('w2') w3 = sp.Wild('w3') wild_ceiling = sp.ceiling(w1) wild_addition = w1 + w2 assert assignments.match(pystencils.Assignment(w3, wild_ceiling + w2))[w1] == x.center() assert assignments.match(pystencils.Assignment(w3, wild_ceiling + w2)) == { w3: y.center(), w2: sp.floor(x.center()), w1: x.center() } assert assignments.find(wild_ceiling) == {sp.ceiling(x.center())} assert len([a for a in assignments.find(wild_addition) if isinstance(a, sp.Add)]) == 2
def test_free_and_bound_symbols(): a1 = ps.Assignment(a, d[0, 0](0)) a2 = ps.Assignment(f[0, 0](1), b * c) ac = ps.AssignmentCollection([a2], subexpressions=[a1]) assert f[0, 0](1) in ac.bound_symbols assert d[0, 0](0) in ac.free_symbols
def test_staggered(vectorized): """Make sure that the RNG counter can be substituted during loop cutting""" dh = ps.create_data_handling((8, 8), default_ghost_layers=0, default_target=Target.CPU) j = dh.add_array("j", values_per_cell=dh.dim, field_type=ps.FieldType.STAGGERED_FLUX) a = ps.AssignmentCollection([ps.Assignment(j.staggered_access(n), 0) for n in j.staggered_stencil]) rng_symbol_gen = random_symbol(a.subexpressions, dim=dh.dim, rng_node=PhiloxTwoDoubles) a.main_assignments[0] = ps.Assignment(a.main_assignments[0].lhs, next(rng_symbol_gen)) kernel = ps.create_staggered_kernel(a, target=dh.default_target).compile() if not vectorized: return if not instruction_sets: pytest.skip("cannot detect CPU instruction set") pytest.importorskip('islpy') cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': False, 'instruction_set': instruction_sets[-1]} dh.fill(j.name, 867) dh.run_kernel(kernel, seed=5, time_step=309) ref_data = dh.gather_array(j.name) kernel2 = ps.create_staggered_kernel(a, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() dh.fill(j.name, 867) dh.run_kernel(kernel2, seed=5, time_step=309) data = dh.gather_array(j.name) assert np.allclose(ref_data, data)
def test_assignment_collection_dict_conversion(): x, y = pystencils.fields('x,y: [2D]') collection_normal = pystencils.AssignmentCollection( [pystencils.Assignment(x.center(), y[1, 0] + y[0, 0])], []) collection_dict = pystencils.AssignmentCollection( {x.center(): y[1, 0] + y[0, 0]}, {}) assert str(collection_normal) == str(collection_dict) assert collection_dict.main_assignments_dict == { x.center(): y[1, 0] + y[0, 0] } assert collection_dict.subexpressions_dict == {} collection_normal = pystencils.AssignmentCollection([ pystencils.Assignment(y[1, 0], x.center()), pystencils.Assignment(y[0, 0], x.center()) ], []) collection_dict = pystencils.AssignmentCollection( { y[1, 0]: x.center(), y[0, 0]: x.center() }, {}) assert str(collection_normal) == str(collection_dict) assert collection_dict.main_assignments_dict == { y[1, 0]: x.center(), y[0, 0]: x.center() } assert collection_dict.subexpressions_dict == {}
def test_sqrt_of_integer(): """Regression test for bug where sqrt(3) was classified as integer""" f = ps.fields("f: [1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_double = np.array([1], dtype=np.float64) kernel = ps.create_kernel(assignments).compile() kernel(f=arr_double) assert 1.7 < arr_double[0] < 1.8 f = ps.fields("f: float32[1D]") tmp = sp.symbols("tmp") assignments = [ps.Assignment(tmp, sp.sqrt(3)), ps.Assignment(f[0], tmp)] arr_single = np.array([1], dtype=np.float32) config = ps.CreateKernelConfig(data_type="float32") kernel = ps.create_kernel(assignments, config=config).compile() kernel(f=arr_single) code = ps.get_code_str(kernel.ast) # ps.show_code(kernel.ast) # 1.7320508075688772935 --> it is actually correct to round to ...773. This was wrong before !282 assert "1.7320508075688773f" in code assert 1.7 < arr_single[0] < 1.8
def test_vec_any(instruction_set, dtype): if instruction_set in ['sve', 'rvv']: width = 4 # we don't know the actual value else: width = get_vector_instruction_set(dtype, instruction_set)['width'] data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:9, 1:3 * width - 1] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)), Conditional(vec_any(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=ps.Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) if instruction_set in ['sve', 'rvv']: # we only know that the first value has changed np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0) else: np.testing.assert_equal(data_arr[3:9, :3 * width], 2.0)
def test_philox_double(): for target in ('cpu', 'gpu'): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) f = dh.add_array("f", values_per_cell=2) dh.fill('f', 42.0) philox_node = PhiloxTwoDoubles(dh.dim) assignments = [ philox_node, ps.Assignment(f(0), philox_node.result_symbols[0]), ps.Assignment(f(1), philox_node.result_symbols[1]) ] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() dh.all_to_gpu() dh.run_kernel(kernel, time_step=124) dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all() x = philox_reference[:, :, 0::2] y = philox_reference[:, :, 1::2] z = x ^ y << (53 - 32) double_reference = z * 2.**-53 + 2.**-54 assert (np.allclose(arr, double_reference, rtol=0, atol=np.finfo(np.float64).eps))
def apply_wieners(complex_field: Field, wieners: Field, output_weight_field: Field): assert complex_field.index_dimensions == 3 assert wieners.index_dimensions == 2 assert output_weight_field.index_dimensions == 1 assignments = [] wiener_sum = [] for stack_index in range(complex_field.index_shape[0]): for patch_index in range(complex_field.index_shape[1]): wien = wieners(stack_index, patch_index) wiener_sum.append(wien**2) assignments.extend( pystencils.Assignment(complex_field.center(stack_index, patch_index, i), complex_field.center(stack_index, patch_index, i) * wien) for i in (0, 1) ) assignments.append(pystencils.Assignment( output_weight_field.center(stack_index), 1 / sympy.Add(*wiener_sum) )) return AssignmentCollection(assignments)
def wiener_filtering(complex_field: Field, output_weight_field: Field, sigma): assert complex_field.index_dimensions == 3 assert output_weight_field.index_dimensions == 1 assignments = [] norm_factor = complex_field.index_shape[0] * complex_field.index_shape[1] wiener_sum = [] for stack_index in range(complex_field.index_shape[0]): for patch_index in range(complex_field.index_shape[1]): magnitude = sum(complex_field.center(stack_index, patch_index, i) ** 2 for i in (0, 1)) val = magnitude / norm_factor # implementation differ whether to apply norm_factor on val on wien wien = val / (val + sigma * sigma) wiener_sum.append(wien**2) assignments.extend( pystencils.Assignment(complex_field.center(stack_index, patch_index, i), complex_field.center(stack_index, patch_index, i) * wien) for i in (0, 1) ) assignments.append(pystencils.Assignment( output_weight_field.center(stack_index), 1 / sympy.Add(*wiener_sum) )) return AssignmentCollection(assignments)
def hard_thresholding(complex_field: Field, output_weight_field, threshold): assert complex_field.index_dimensions == 3 assert output_weight_field.index_dimensions == 1 assignments = [] for stack_index in range(complex_field.index_shape[0]): num_nonzeros = [] for patch_index in range(complex_field.index_shape[1]): magnitude = sum(complex_field.center(stack_index, patch_index, i) ** 2 for i in (0, 1)) assignments.extend( pystencils.Assignment(complex_field.center(stack_index, patch_index, i), sympy.Piecewise( (complex_field.center(stack_index, patch_index, i), magnitude > threshold ** 2), (0, True))) for i in (0, 1) ) num_nonzeros.append(sympy.Piecewise((1, magnitude > threshold ** 2), (0, True))) assignments.append(pystencils.Assignment( output_weight_field.center(stack_index), sympy.Add(*num_nonzeros) )) return AssignmentCollection(assignments)
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None): if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni': pytest.xfail('AES not yet implemented for this architecture') cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target} dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU) f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2, dtype=np.float32 if dtype == 'float' else np.float64, alignment=True) dh.fill(f.name, 42.0) ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() kwargs = {'time_step': t} if offset_values is not None: kwargs.update({k.name: v for k, v in zip(offsets, offset_values)}) dh.run_kernel(kernel, **kwargs) rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets) assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)] kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile() dh.run_kernel(kernel, **kwargs) ref_data = dh.gather_array(ref.name) data = dh.gather_array(f.name) assert np.allclose(ref_data, data)
def test_copy(): a1 = ps.Assignment(f[0, 0](0), a * b) a2 = ps.Assignment(f[0, 0](1), b * c) ac = ps.AssignmentCollection([a1, a2], subexpressions=[]) ac2 = ac.copy() assert ac2 == ac
def test_staggered_subexpressions(): dh = ps.create_data_handling((10, 10), periodicity=True, default_target=Target.CPU) j = dh.add_array('j', values_per_cell=2, field_type=ps.FieldType.STAGGERED) c = sp.symbols("c") assignments = [ ps.Assignment(j.staggered_access("W"), c), ps.Assignment(c, 1) ] ps.create_staggered_kernel(assignments, target=dh.default_target).compile()
def aggregate(block_scores: Field, patch_input_field: Field, destination_field: Field, block_stencil, matching_stencil, threshold, max_selected, compilation_target, patch_weights: Field = None, accumulated_weights: Field = None, **compilation_kwargs): max_offset = max(max(o) for o in matching_stencil) max_offset += max(max(o) for o in block_stencil) offset = pystencils_reco.typed_symbols('_o:%i' % patch_input_field.spatial_dimensions, 'int32') copies = [] assert destination_field.index_dimensions == 2 assert destination_field.index_shape[-1] == len(block_stencil) n, nth_hit = pystencils_reco.typed_symbols('_n, nth_hit', 'int32') for i, s in enumerate(block_stencil): shifted = tuple(s + o for s, o in zip(offset, s)) weight = patch_weights.center(nth_hit) if patch_weights else 1 assignment = pystencils.Assignment(_get_dummy_symbol(), sympy.Function('atomicAdd')(address_of(patch_input_field[shifted]), weight * destination_field.center(nth_hit, i))) copies.append(assignment) if accumulated_weights: assignment = pystencils.Assignment(_get_dummy_symbol(), sympy.Function('atomicAdd')( address_of(accumulated_weights[shifted]), weight)) copies.append(assignment) assignments = AssignmentCollection(copies) ast = pystencils.create_kernel(assignments, target=compilation_target, data_type=patch_input_field.dtype, ghost_layers=max_offset, **compilation_kwargs) ast._body = Select(ast.body, what=offset, from_iterable=matching_stencil, predicate=block_scores.center(n) < threshold, counter_symbol=n, hit_counter_symbol=nth_hit, compilation_target=compilation_target, max_selected=max_selected) return ast.compile()
def test_tfmad_gradient_check_torch_native(with_offsets, with_cuda): torch = pytest.importorskip('torch') import torch a, b, out = ps.fields("a, b, out: float64[5,7]") if with_offsets: cont = 2 * ps.fd.Diff(a, 0) - 1.5 * ps.fd.Diff(a, 1) - ps.fd.Diff( b, 0) + 3 * ps.fd.Diff(b, 1) discretize = ps.fd.Discretization2ndOrder(dx=1) discretization = discretize(cont) assignment = ps.Assignment(out.center(), discretization + 1.2 * a.center()) else: assignment = ps.Assignment(out.center(), 1.2 * a.center + 0.1 * b.center) assignment_collection = ps.AssignmentCollection([assignment], []) print('Forward') print(assignment_collection) print('Backward') auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection, boundary_handling='zeros', diff_mode='transposed-forward') backward = auto_diff.backward_assignments print(backward) print('Forward output fields (to check order)') print(auto_diff.forward_input_fields) a_tensor = torch.zeros(*a.shape, dtype=torch.float64, requires_grad=True).contiguous() b_tensor = torch.zeros(*b.shape, dtype=torch.float64, requires_grad=True).contiguous() if with_cuda: a_tensor = a_tensor.cuda() b_tensor = b_tensor.cuda() function = auto_diff.create_tensorflow_op(use_cuda=with_cuda, backend='torch_native') dict = {a: a_tensor, b: b_tensor} torch.autograd.gradcheck( function.apply, tuple([dict[f] for f in auto_diff.forward_input_fields]), atol=1e-4, raise_exception=True)
def test_vectorization_fixed_size(): configurations = [] # Fixed size - multiple of four arr = np.ones((20 + 2, 24 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) configurations.append((arr, f, g)) # Fixed size - no multiple of four arr = np.ones((21 + 2, 25 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) configurations.append((arr, f, g)) # Fixed size - different remainder arr = np.ones((23 + 2, 17 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) configurations.append((arr, f, g)) for arr, f, g in configurations: update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)] ast = ps.create_kernel(update_rule) vectorize(ast) func = ast.compile() dst = np.zeros_like(arr) func(g=dst, f=arr) np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
def test_tfmad_gradient_check_torch(): torch = pytest.importorskip('torch') a, b, out = ps.fields("a, b, out: float[5,7]") cont = 2 * ps.fd.Diff(a, 0) - 1.5 * ps.fd.Diff(a, 1) \ - ps.fd.Diff(b, 0) + 3 * ps.fd.Diff(b, 1) discretize = ps.fd.Discretization2ndOrder(dx=1) discretization = discretize(cont) + 1.2 * a.center assignment = ps.Assignment(out.center(), discretization) assignment_collection = ps.AssignmentCollection([assignment], []) print('Forward') print(assignment_collection) print('Backward') auto_diff = pystencils_autodiff.AutoDiffOp(assignment_collection, diff_mode='transposed-forward') backward = auto_diff.backward_assignments print(backward) print('Forward output fields (to check order)') print(auto_diff.forward_input_fields) a_tensor = torch.zeros(*a.shape, dtype=torch.float64, requires_grad=True) b_tensor = torch.zeros(*b.shape, dtype=torch.float64, requires_grad=True) function = auto_diff.create_tensorflow_op({ a: a_tensor, b: b_tensor }, backend='torch') torch.autograd.gradcheck(function.apply, [a_tensor, b_tensor])
def backward_projection(input_projection, output_volume, projection_matrix, normalization): projection_matrix = pystencils_reco.ProjectiveMatrix(projection_matrix) assignments = pystencils_reco.resampling.generic_spatial_matrix_transform( input_projection, output_volume, None, inverse_matrix=projection_matrix) for a in assignments.all_assignments: a = pystencils.Assignment(a.lhs, a.rhs / normalization) return assignments a = pystencils.Assignment(a.lhs, a.rhs / normalization) return assignments
def test_vec_all(instruction_set, dtype): if instruction_set in ['sve', 'rvv']: width = 1000 # we don't know the actual value, need something guaranteed larger than vector else: width = get_vector_instruction_set(dtype, instruction_set)['width'] data_arr = np.zeros((4 * width, 4 * width), dtype=np.float64 if dtype == 'double' else np.float32) data_arr[3:9, 1:3 * width - 1] = 1.0 data = ps.fields(f"data: {dtype}[2D]", data=data_arr) c = [ Conditional(vec_all(data.center() > 0.0), Block([ps.Assignment(data.center(), 2.0)])) ] ast = ps.create_kernel( c, target=Target.CPU, cpu_vectorize_info={'instruction_set': instruction_set}) kernel = ast.compile() kernel(data=data_arr) if instruction_set in ['sve', 'rvv']: # we only know that some values in the middle have been replaced assert np.all(data_arr[3:9, :2] <= 1.0) assert np.any(data_arr[3:9, 2:] == 2.0) else: np.testing.assert_equal(data_arr[3:9, :1], 0.0) np.testing.assert_equal(data_arr[3:9, 1:width], 1.0) np.testing.assert_equal(data_arr[3:9, width:2 * width], 2.0) np.testing.assert_equal(data_arr[3:9, 2 * width:3 * width - 1], 1.0) np.testing.assert_equal(data_arr[3:9, 3 * width - 1:], 0.0)
def test_Basic_data_type(): assert typed_symbols(("s", "f"), np.uint) == typed_symbols("s, f", np.uint) t_symbols = typed_symbols(("s", "f"), np.uint) s = t_symbols[0] assert t_symbols[0] == TypedSymbol("s", np.uint) assert s.dtype.is_uint() assert s.dtype.is_complex() == 0 assert typed_symbols("s", str).dtype.is_other() assert typed_symbols("s", bool).dtype.is_other() assert typed_symbols("s", np.void).dtype.is_other() assert typed_symbols("s", np.float64).dtype.base_name == 'double' # removed for old sympy version # assert typed_symbols(("s"), np.float64).dtype.sympy_dtype == typed_symbols(("s"), float).dtype.sympy_dtype f, g = ps.fields("f, g : double[2D]") expr = ps.Assignment(f.center(), 2 * g.center() + 5) new_expr = type_all_numbers(expr, np.float64) assert "cast_func(2, double)" in str(new_expr) assert "cast_func(5, double)" in str(new_expr) m = matrix_symbols("a, b", np.uint, 3, 3) assert len(m) == 2 m = m[0] for i, elem in enumerate(m): assert elem == TypedSymbol(f"a{i}", np.uint) assert elem.dtype.is_uint() assert TypedSymbol("s", np.uint).canonical == TypedSymbol("s", np.uint) assert TypedSymbol("s", np.uint).reversed == TypedSymbol("s", np.uint)
def test_philox_float(): for target in ('cpu', 'gpu'): dh = ps.create_data_handling((2, 2), default_ghost_layers=0, default_target=target) f = dh.add_array("f", values_per_cell=4) dh.fill('f', 42.0) philox_node = PhiloxFourFloats(dh.dim) assignments = [philox_node] + [ ps.Assignment(f(i), philox_node.result_symbols[i]) for i in range(4) ] kernel = ps.create_kernel(assignments, target=dh.default_target).compile() dh.all_to_gpu() dh.run_kernel(kernel, time_step=124) dh.all_to_cpu() arr = dh.gather_array('f') assert np.logical_and(arr <= 1.0, arr >= 0).all() float_reference = philox_reference * 2.**-32 + 2.**-33 assert (np.allclose(arr, float_reference, rtol=0, atol=np.finfo(np.float32).eps))
def jacobi(dst, src): assert dst.spatial_dimensions == src.spatial_dimensions assert src.index_dimensions == 0 and dst.index_dimensions == 0 neighbors = [] for d in range(src.spatial_dimensions): neighbors += [src.neighbor(d, offset) for offset in (1, -1)] return ps.Assignment(dst.center, sp.Add(*neighbors) / len(neighbors))
def add_fixed_constant_boundary_handling(assignments, with_cse=True): field_accesses = set().union( itertools.chain.from_iterable( [a.atoms(Field.Access) for a in assignments])) if all(all(o == 0 for o in a.offsets) for a in field_accesses): return assignments common_shape = next(iter(field_accesses)).field.spatial_shape ndim = len(common_shape) def is_out_of_bound(access, shape): return sp.Or(*[sp.Or(a < 0, a >= s) for a, s in zip(access, shape)]) safe_assignments = [ pystencils.Assignment( assignment.lhs, assignment.rhs.subs({ a: ConditionalFieldAccess( a, is_out_of_bound( sp.Matrix(a.offsets) + x_vector(ndim), common_shape)) for a in assignment.rhs.atoms(Field.Access) if not a.is_absolute_access })) for assignment in assignments.all_assignments ] if with_cse: safe_assignments = sympy_cse( pystencils.AssignmentCollection(safe_assignments)) return safe_assignments else: return pystencils.AssignmentCollection(safe_assignments)
def test_piecewise1(): a, b, c, d, e = sp.symbols("a b c d e") arr = np.ones((2 ** 3 + 2, 2 ** 4 + 2)) * 5.0 f, g = ps.fields(f=arr, g=arr) update_rule = [ps.Assignment(a, f[1, 0]), ps.Assignment(b, a), ps.Assignment(c, f[0, 0] > 0.0), ps.Assignment(g[0, 0], sp.Piecewise((b + 3 + f[0, 1], c), (0.0, True)))] ast = ps.create_kernel(update_rule) vectorize(ast) func = ast.compile() dst = np.zeros_like(arr) func(g=dst, f=arr) np.testing.assert_equal(dst[1:-1, 1:-1], 5 + 3 + 5.0)
def discrete_continuity(self, flux_field: ps.field.Field): """Return a list of assignments for the continuity equation, which includes the source term Args: flux_field: a staggered field from which the fluxes are taken """ assert ps.FieldType.is_staggered(flux_field) neighbors = flux_field.staggered_stencil + [ ps.stencil.inverse_direction_string(d) for d in flux_field.staggered_stencil ] divergence = flux_field.staggered_vector_access(neighbors[0]) for d in neighbors[1:]: divergence += flux_field.staggered_vector_access(d) source = self.discrete_source() source = {s.lhs: s.rhs for s in source} return [ ps.Assignment(lhs, (lhs - rhs + source[lhs]) if lhs in source else (lhs - rhs)) for lhs, rhs in zip(self.c.center_vector, divergence) ]
def test_strided(instruction_set, dtype): f, g = ps.fields(f"f, g : float{64 if dtype == 'double' else 32}[2D]") update_rule = [ ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0) ] if 'storeS' not in get_vector_instruction_set( dtype, instruction_set) and not instruction_set in [ 'avx512', 'rvv' ] and not instruction_set.startswith('sve'): with pytest.warns(UserWarning) as warn: config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) assert 'Could not vectorize loop' in warn[0].message.args[0] else: with pytest.warns(None) as warn: config = ps.CreateKernelConfig( cpu_vectorize_info={'instruction_set': instruction_set}) ast = ps.create_kernel(update_rule, config=config) assert len(warn) == 0 func = ast.compile() ref_func = ps.create_kernel(update_rule).compile() arr = np.random.random( (23 + 2, 17 + 2)).astype(np.float64 if dtype == 'double' else np.float32) dst = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) ref = np.zeros_like(arr, dtype=np.float64 if dtype == 'double' else np.float32) func(g=dst, f=arr) ref_func(g=ref, f=arr) np.testing.assert_almost_equal(dst, ref, 13 if dtype == 'double' else 5)
def make_2d_update_rule_6(src, dst, coef): update_rule = [ ps.Assignment( lhs=dst[0, 0], rhs=( src[ 0, 0] + src[ 0, -6] + src[ 0, -5] + src[ 0, -4] + src[ 0, -3] + src[ 0, -2] + src[ 0, -1] + src[ 0, 6] + src[ 0, 5] + src[ 0, 4] + src[ 0, 3] + src[ 0, 2] + src[ 0, 1] + src[-6, 0] + src[-5, 0] + src[-4, 0] + src[-3, 0] + src[-2, 0] + src[-1, 0] + src[ 6, 0] + src[ 5, 0] + src[ 4, 0] + src[ 3, 0] + src[ 2, 0] + src[ 1, 0] ) * coef, ) ] return update_rule
def test_print_infinity(type, negative, target): x = pystencils.fields(f'x: {type}[1d]') if negative: assignment = pystencils.Assignment(x.center, -oo) else: assignment = pystencils.Assignment(x.center, oo) ast = pystencils.create_kernel(assignment, data_type=type, target=target) if target == pystencils.Target.GPU: pytest.importorskip('pycuda') ast.compile() print(ast.compile().code)
def test_simple_2d_check_assignment_collection(): # use simply example z, y, x = ps.fields("z, y, x: [2d]") forward_assignments = ps.AssignmentCollection([ps.Assignment( z[0, 0], x[0, 0]*sp.log(x[0, 0]*y[0, 0]))], []) jac = pystencils_autodiff.get_jacobian_of_assignments( forward_assignments, [x[0, 0], y[0, 0]]) assert jac.shape == (len(forward_assignments.bound_symbols), len(forward_assignments.free_symbols)) print(repr(jac)) assert repr(jac) == 'Matrix([[log(x_C*y_C) + 1, x_C/y_C]])' for diff_mode in DiffModes: pystencils_autodiff.create_backward_assignments( forward_assignments, diff_mode=diff_mode) pystencils_autodiff.create_backward_assignments( pystencils_autodiff.create_backward_assignments(forward_assignments), diff_mode=diff_mode) result1 = pystencils_autodiff.create_backward_assignments( forward_assignments, diff_mode=DiffModes.TRANSPOSED) result2 = pystencils_autodiff.create_backward_assignments( forward_assignments, diff_mode=DiffModes.TF_MAD) assert result1 == result2
def single_block_matching( input_field: Field, comparision_field: Field, output_block_scores: Field, block_stencil, matching_offset, match_index, matching_function=pystencils_reco.functions.squared_difference, ): assignments = [] i, m = match_index, matching_offset rhs = [] for s in block_stencil: shifted = tuple(i + j for i, j in zip(s, m)) rhs.append( matching_function(input_field[s], comparision_field[shifted])) lhs = output_block_scores(i) assignment = pystencils.Assignment(lhs, sympy.Add(*rhs)) assignments.append(assignment) return AssignmentCollection(assignments, perform_cse=False)