示例#1
0
文件: lbstep.py 项目: mabau/lbmpy
    def _compile_macroscopic_setter_and_getter(self):
        lb_method = self.method
        cqc = lb_method.conserved_quantity_computation
        pdf_field = self._data_handling.fields[self._pdf_arr_name]
        rho_field = self._data_handling.fields[self.density_data_name]
        rho_field = rho_field.center if self.density_data_index is None else rho_field(
            self.density_data_index)
        vel_field = self._data_handling.fields[self.velocity_data_name]

        getter_eqs = cqc.output_equations_from_pdfs(pdf_field.center_vector, {
            'density': rho_field,
            'velocity': vel_field
        })
        getter_kernel = create_kernel(
            getter_eqs, target=Target.CPU,
            cpu_openmp=self._config.cpu_openmp).compile()

        setter_eqs = pdf_initialization_assignments(lb_method, rho_field,
                                                    vel_field.center_vector,
                                                    pdf_field.center_vector)
        setter_eqs = create_simplification_strategy(lb_method)(setter_eqs)
        setter_kernel = create_kernel(
            setter_eqs, target=Target.CPU,
            cpu_openmp=self._config.cpu_openmp).compile()
        return getter_kernel, setter_kernel
示例#2
0
def test_rng_vectorized(target, rng, precision, dtype, t=130, offsets=(1, 3), keys=(0, 0), offset_values=None):
    if (target in ['neon', 'vsx', 'rvv'] or target.startswith('sve')) and rng == 'aesni':
        pytest.xfail('AES not yet implemented for this architecture')
    cpu_vectorize_info = {'assume_inner_stride_one': True, 'assume_aligned': True, 'instruction_set': target}

    dh = ps.create_data_handling((131, 131), default_ghost_layers=0, default_target=Target.CPU)
    f = dh.add_array("f", values_per_cell=4 if precision == 'float' else 2,
                     dtype=np.float32 if dtype == 'float' else np.float64, alignment=True)
    dh.fill(f.name, 42.0)
    ref = dh.add_array("ref", values_per_cell=4 if precision == 'float' else 2)

    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
    assignments = [rng_node] + [ps.Assignment(ref(i), s) for i, s in enumerate(rng_node.result_symbols)]
    kernel = ps.create_kernel(assignments, target=dh.default_target).compile()

    kwargs = {'time_step': t}
    if offset_values is not None:
        kwargs.update({k.name: v for k, v in zip(offsets, offset_values)})
    dh.run_kernel(kernel, **kwargs)

    rng_node = RNGs[(rng, precision)](dh.dim, offsets=offsets)
    assignments = [rng_node] + [ps.Assignment(f(i), s) for i, s in enumerate(rng_node.result_symbols)]
    kernel = ps.create_kernel(assignments, target=dh.default_target, cpu_vectorize_info=cpu_vectorize_info).compile()

    dh.run_kernel(kernel, **kwargs)

    ref_data = dh.gather_array(ref.name)
    data = dh.gather_array(f.name)

    assert np.allclose(ref_data, data)
示例#3
0
def test_tensorflow_jit_cpu():

    pytest.importorskip('tensorflow')

    module_name = "Ololol"

    target = 'cpu'

    z, y, x = pystencils.fields("z, y, x: [20,40]")
    a = sympy.Symbol('a')

    forward_assignments = pystencils.AssignmentCollection(
        {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])})

    backward_assignments = create_backward_assignments(forward_assignments)

    forward_ast = pystencils.create_kernel(forward_assignments, target)
    forward_ast.function_name = 'forward_jit'
    backward_ast = pystencils.create_kernel(backward_assignments, target)
    backward_ast.function_name = 'backward_jit'
    module = TensorflowModule(module_name, [forward_ast, backward_ast])

    lib = pystencils_autodiff.tensorflow_jit.compile_sources_and_load(
        [str(module)])
    assert 'call_forward_jit' in dir(lib)
    assert 'call_backward_jit' in dir(lib)

    lib = module.compile()
    assert 'call_forward_jit' in dir(lib)
    assert 'call_backward_jit' in dir(lib)
示例#4
0
def test_full_scalar_field():
    """Tests fully (un)packing a scalar field (from)to a buffer."""
    fields = _generate_fields()
    for (src_arr, dst_arr, buffer_arr) in fields:
        src_field = Field.create_from_numpy_array("src_field", src_arr)
        dst_field = Field.create_from_numpy_array("dst_field", dst_arr)
        buffer = Field.create_generic("buffer",
                                      spatial_dimensions=1,
                                      field_type=FieldType.BUFFER,
                                      dtype=src_arr.dtype)

        pack_eqs = [Assignment(buffer.center(), src_field.center())]
        pack_code = create_kernel(pack_eqs,
                                  data_type={
                                      'src_field': src_arr.dtype,
                                      'buffer': buffer.dtype
                                  })

        pack_kernel = pack_code.compile()
        pack_kernel(buffer=buffer_arr, src_field=src_arr)

        unpack_eqs = [Assignment(dst_field.center(), buffer.center())]
        unpack_code = create_kernel(unpack_eqs,
                                    data_type={
                                        'dst_field': dst_arr.dtype,
                                        'buffer': buffer.dtype
                                    })

        unpack_kernel = unpack_code.compile()
        unpack_kernel(dst_field=dst_arr, buffer=buffer_arr)

        np.testing.assert_equal(src_arr, dst_arr)
def test_strided(instruction_set, dtype):
    f, g = ps.fields(f"f, g : float{64 if dtype == 'double' else 32}[2D]")
    update_rule = [
        ps.Assignment(g[0, 0],
                      f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)
    ]
    if 'storeS' not in get_vector_instruction_set(
            dtype, instruction_set) and not instruction_set in [
                'avx512', 'rvv'
            ] and not instruction_set.startswith('sve'):
        with pytest.warns(UserWarning) as warn:
            config = ps.CreateKernelConfig(
                cpu_vectorize_info={'instruction_set': instruction_set})
            ast = ps.create_kernel(update_rule, config=config)
            assert 'Could not vectorize loop' in warn[0].message.args[0]
    else:
        with pytest.warns(None) as warn:
            config = ps.CreateKernelConfig(
                cpu_vectorize_info={'instruction_set': instruction_set})
            ast = ps.create_kernel(update_rule, config=config)
            assert len(warn) == 0
    func = ast.compile()
    ref_func = ps.create_kernel(update_rule).compile()

    arr = np.random.random(
        (23 + 2,
         17 + 2)).astype(np.float64 if dtype == 'double' else np.float32)
    dst = np.zeros_like(arr,
                        dtype=np.float64 if dtype == 'double' else np.float32)
    ref = np.zeros_like(arr,
                        dtype=np.float64 if dtype == 'double' else np.float32)

    func(g=dst, f=arr)
    ref_func(g=ref, f=arr)
    np.testing.assert_almost_equal(dst, ref, 13 if dtype == 'double' else 5)
def test_dynamic_matrix_location_dependent():
    try:
        from pystencils.data_types import TypedMatrixSymbol
    except ImportError:
        import pytest
        pytest.skip()

    x, y = pystencils.fields('x, y:  float32[3d]')

    A = TypedMatrixSymbol('A', 3, 1, create_type('double'),
                          CustomCppType('Vector3<double>'))

    my_fun_call = DynamicFunction(
        TypedSymbol('my_fun', 'std::function<Vector3<double>(int, int, int)>'),
        A.dtype, *pystencils.x_vector(3))

    assignments = pystencils.AssignmentCollection({
        A: my_fun_call,
        y.center: A[0] + A[1] + A[2]
    })

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())

    my_fun_call = DynamicFunction(
        TypedSymbol('my_fun', TemplateType('Functor_T')), A.dtype,
        *pystencils.x_vector(3))

    assignments = pystencils.AssignmentCollection({
        A: my_fun_call,
        y.center: A[0] + A[1] + A[2]
    })

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast, custom_backend=FrameworkIntegrationPrinter())
示例#7
0
def test_logical_operators(instruction_set=instruction_set):
    arr = np.zeros((22, 22))

    @ps.kernel
    def kernel_and(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.And(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_and)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    @ps.kernel
    def kernel_or(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.Or(f[0, 1] < 0.0, f[1, 0] < 0.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_or)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()

    @ps.kernel
    def kernel_equal(s):
        f, g = ps.fields(f=arr, g=arr)
        s.c @= sp.Eq(f[0, 1], 2.0)
        g[0, 0] @= sp.Piecewise([1.0 / f[1, 0], s.c], [1.0, True])

    ast = ps.create_kernel(kernel_equal)
    vectorize(ast, instruction_set=instruction_set)
    ast.compile()
def test_native_tensorflow_compilation_cpu():
    tf = pytest.importorskip('tensorflow')

    module_name = "Ololol"

    target = 'cpu'

    z, y, x = pystencils.fields("z, y, x: [20,40]")
    a = sympy.Symbol('a')

    forward_assignments = pystencils.AssignmentCollection({
        z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])
    })

    backward_assignments = create_backward_assignments(forward_assignments)

    forward_ast = pystencils.create_kernel(forward_assignments, target)
    forward_ast.function_name = 'forward'
    backward_ast = pystencils.create_kernel(backward_assignments, target)
    backward_ast.function_name = 'backward'
    module = TensorflowModule(module_name, [forward_ast, backward_ast])
    print(module)

    # temp_file = write_cached_content(str(module), '.cpp')

    # command = ['c++', '-fPIC', temp_file, '-O2', '-shared',
    # '-o', 'foo.so'] + compile_flags + link_flags + extra_flags
    # print(command)
    # subprocess.check_call(command, env=_compile_env)

    lib = module.compile()
    assert 'call_forward' in dir(lib)
    assert 'call_backward' in dir(lib)
def test_pybind11_compilation_cpu(with_python_bindings):

    pytest.importorskip('pybind11')
    pytest.importorskip('cppimport')

    module_name = "Olololsada"

    target = 'cpu'

    z, y, x = pystencils.fields("z, y, x: [20,40]")
    a = sympy.Symbol('a')

    forward_assignments = pystencils.AssignmentCollection(
        {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])})

    backward_assignments = create_backward_assignments(forward_assignments)

    forward_ast = pystencils.create_kernel(forward_assignments, target)
    forward_ast.function_name = 'forward'
    backward_ast = pystencils.create_kernel(backward_assignments, target)
    backward_ast.function_name = 'backward'
    module = PybindModule(module_name, [forward_ast, backward_ast],
                          with_python_bindings=with_python_bindings)
    print(module)

    if with_python_bindings:
        pybind_extension = module.compile()
        assert pybind_extension is not None
        assert 'call_forward' in dir(pybind_extension)
        assert 'call_backward' in dir(pybind_extension)
示例#10
0
def test_advection(dim):
    L = (8, ) * dim
    dh = ps.create_data_handling(L,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)
    c = dh.add_array('c', values_per_cell=1)
    j = dh.add_array('j',
                     values_per_cell=3**dh.dim // 2,
                     field_type=ps.FieldType.STAGGERED_FLUX)
    u = dh.add_array('u', values_per_cell=dh.dim)

    dh.cpu_arrays[c.name][:] = (np.random.random([l + 2 for l in L]))
    dh.cpu_arrays[u.name][:] = (np.random.random([l + 2 for l in L] + [dim]) -
                                0.5) / 5

    vof1 = ps.create_kernel(ps.fd.VOF(j, u, c)).compile()
    dh.fill(j.name, np.nan, ghost_layers=True)
    dh.run_kernel(vof1)
    j1 = dh.gather_array(j.name).copy()

    vof2 = ps.create_kernel(VOF2(j, u, c, simplify=False)).compile()
    dh.fill(j.name, np.nan, ghost_layers=True)
    dh.run_kernel(vof2)
    j2 = dh.gather_array(j.name)

    assert np.allclose(j1, j2)
示例#11
0
def test_sqrt_of_integer():
    """Regression test for bug where sqrt(3) was classified as integer"""
    f = ps.fields("f: [1D]")
    tmp = sp.symbols("tmp")

    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
                   ps.Assignment(f[0], tmp)]
    arr_double = np.array([1], dtype=np.float64)
    kernel = ps.create_kernel(assignments).compile()
    kernel(f=arr_double)
    assert 1.7 < arr_double[0] < 1.8

    f = ps.fields("f: float32[1D]")
    tmp = sp.symbols("tmp")

    assignments = [ps.Assignment(tmp, sp.sqrt(3)),
                   ps.Assignment(f[0], tmp)]
    arr_single = np.array([1], dtype=np.float32)
    config = ps.CreateKernelConfig(data_type="float32")
    kernel = ps.create_kernel(assignments, config=config).compile()
    kernel(f=arr_single)

    code = ps.get_code_str(kernel.ast)
    # ps.show_code(kernel.ast)
    # 1.7320508075688772935  --> it is actually correct to round to ...773. This was wrong before !282
    assert "1.7320508075688773f" in code
    assert 1.7 < arr_single[0] < 1.8
def test_torch_native_compilation_cpu():
    from torch.utils.cpp_extension import load

    module_name = "Ololol"

    target = 'cpu'

    z, y, x = pystencils.fields("z, y, x: [20,40]")
    a = sympy.Symbol('a')

    forward_assignments = pystencils.AssignmentCollection(
        {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])})

    backward_assignments = create_backward_assignments(forward_assignments)

    forward_ast = pystencils.create_kernel(forward_assignments, target)
    forward_ast.function_name = 'forward'
    backward_ast = pystencils.create_kernel(backward_assignments, target)
    backward_ast.function_name = 'backward'
    module = TorchModule(module_name, [forward_ast, backward_ast])
    print(module)

    temp_file = write_cached_content(str(module), '.cpp')
    torch_extension = load(module_name, [temp_file])
    assert torch_extension is not None
    assert 'call_forward' in dir(torch_extension)
    assert 'call_backward' in dir(torch_extension)

    torch_extension = module.compile()
    assert torch_extension is not None
    assert 'call_forward' in dir(torch_extension)
    assert 'call_backward' in dir(torch_extension)
def test_reproducability():
    from sympy.core.cache import clear_cache

    output_0 = None
    for i in range(10):
        module_name = "Ololol"

        target = 'cpu'

        z, y, x = pystencils.fields("z, y, x: [20,40]")
        a = sympy.Symbol('a')

        forward_assignments = pystencils.AssignmentCollection(
            {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])})

        backward_assignments = create_backward_assignments(forward_assignments)

        forward_ast = pystencils.create_kernel(forward_assignments, target)
        forward_ast.function_name = 'forward'
        backward_ast = pystencils.create_kernel(backward_assignments, target)
        backward_ast.function_name = 'backward'
        new_output = str(TorchModule(module_name, [forward_ast, backward_ast]))
        TorchModule(module_name, [forward_ast, backward_ast]).compile()

        clear_cache()

        if not output_0:
            output_0 = new_output

        assert output_0 == new_output
def test_fixed_constant_bh(num_ghost_layers):
    ndim = 2

    offsets = list(itertools.product(range(num_ghost_layers + 1), repeat=ndim))

    x, y = pystencils.fields(f'x, y:  float64[{ndim}d]')

    assignments = pystencils.AssignmentCollection({
        y.center:
        sp.Add(*[x.__getitem__(o) for o in offsets]) / len(offsets)
    })

    kernel = pystencils.create_kernel(assignments).compile()
    print(kernel.code)

    bh_assignments = add_fixed_constant_boundary_handling(
        assignments, num_ghost_layers)

    bh_kernel = pystencils.create_kernel(bh_assignments,
                                         ghost_layers=0).compile()
    print(bh_kernel.code)

    noise = np.random.rand(*[20, 30, 40][:ndim])
    out1 = np.zeros_like(noise)
    out2 = np.zeros_like(noise)

    kernel(x=noise, y=out1)
    bh_kernel(x=noise, y=out2)
def test_module_printing_parameter():
    module_name = "Ololol"

    for target in ('cpu', 'gpu'):

        z, y, x = pystencils.fields("z, y, x: [20,40]")
        a = sympy.Symbol('a')

        forward_assignments = pystencils.AssignmentCollection(
            {z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])})

        backward_assignments = create_backward_assignments(forward_assignments)

        forward_ast = pystencils.create_kernel(forward_assignments, target)
        forward_ast.function_name = 'forward'
        backward_ast = pystencils.create_kernel(backward_assignments, target)
        backward_ast.function_name = 'backward'
        module = TorchModule(module_name, [forward_ast, backward_ast])
        print(module)

        module = TensorflowModule(module_name, {forward_ast: backward_ast})
        print(module)

        if target == 'cpu':
            module = PybindModule(module_name, [forward_ast, backward_ast])
            print(module)
            module = PybindModule(module_name, forward_ast)
            print(module)
def test_kernel_decorator_config():
    config = ps.CreateKernelConfig()
    a, b, c = ps.fields(a=np.ones(100), b=np.ones(100), c=np.ones(100))

    @ps.kernel_config(config)
    def test():
        a[0] @= b[0] + c[0]

    ps.create_kernel(**test)
def test_native_tensorflow_compilation_gpu():
    tf = pytest.importorskip('tensorflow')

    module_name = "Ololol"

    target = 'gpu'

    z, y, x = pystencils.fields("z, y, x: [20,40]")
    a = sympy.Symbol('a')

    forward_assignments = pystencils.AssignmentCollection({
        z[0, 0]: x[0, 0] * sympy.log(a * x[0, 0] * y[0, 0])
    })

    backward_assignments = create_backward_assignments(forward_assignments)

    forward_ast = pystencils.create_kernel(forward_assignments, target)
    forward_ast.function_name = 'forward2'
    backward_ast = pystencils.create_kernel(backward_assignments, target)
    backward_ast.function_name = 'backward2'
    module = TensorflowModule(module_name, [forward_ast, backward_ast])
    print(str(module))

    # temp_file = write_cached_content(str(module), '.cu')
    # if 'tensorflow_host_compiler' not in get_compiler_config():
    # get_compiler_config()['tensorflow_host_compiler'] = get_compiler_config()['command']

    # # on my machine g++-6 and clang-7 are working
    # # '-ccbin',
    # # 'g++-6',
    # command = ['nvcc',
    # temp_file.name,
    # '--expt-relaxed-constexpr',
    # '-ccbin',
    # get_compiler_config()['tensorflow_host_compiler'],
    # '-std=c++14',
    # '-x',
    # 'cu',
    # '-Xcompiler',
    # '-fPIC',
    # '-c',
    # '-o',
    # 'foo_gpu.o'] + compile_flags + extra_flags

    # subprocess.check_call(command)

    # command = ['c++', '-fPIC', 'foo_gpu.o',
    # '-shared', '-o', 'foo_gpu.so'] + link_flags

    # subprocess.check_call(command)
    lib = module.compile()

    assert 'call_forward2' in dir(lib)
    #
    assert 'call_backward2' in dir(lib)
def generate_lattice_model(generation_context,
                           class_name,
                           collision_rule,
                           refinement_scaling=None,
                           **create_kernel_params):

    # usually a numpy layout is chosen by default i.e. xyzf - which is bad for waLBerla where at least the spatial
    # coordinates should be ordered in reverse direction i.e. zyx
    is_float = not generation_context.double_accuracy
    dtype = np.float32 if is_float else np.float64
    lb_method = collision_rule.method

    q = len(lb_method.stencil)
    dim = lb_method.dim

    create_kernel_params = default_create_kernel_parameters(
        generation_context, create_kernel_params)
    if create_kernel_params['target'] == 'gpu':
        raise ValueError(
            "Lattice Models can only be generated for CPUs. To generate LBM on GPUs use sweeps directly"
        )

    src_field = ps.Field.create_generic('pdfs',
                                        dim,
                                        dtype,
                                        index_dimensions=1,
                                        layout='fzyx',
                                        index_shape=(q, ))
    dst_field = ps.Field.create_generic('pdfs_tmp',
                                        dim,
                                        dtype,
                                        index_dimensions=1,
                                        layout='fzyx',
                                        index_shape=(q, ))

    stream_collide_update_rule = create_lbm_kernel(
        collision_rule, src_field, dst_field, StreamPullTwoFieldsAccessor())
    stream_collide_ast = create_kernel(stream_collide_update_rule,
                                       **create_kernel_params)
    stream_collide_ast.function_name = 'kernel_streamCollide'

    collide_update_rule = create_lbm_kernel(collision_rule, src_field,
                                            dst_field,
                                            CollideOnlyInplaceAccessor())
    collide_ast = create_kernel(collide_update_rule, **create_kernel_params)
    collide_ast.function_name = 'kernel_collide'

    stream_update_rule = create_stream_pull_only_kernel(
        lb_method.stencil, None, 'pdfs', 'pdfs_tmp', 'fzyx', dtype)
    stream_ast = create_kernel(stream_update_rule, **create_kernel_params)
    stream_ast.function_name = 'kernel_stream'
    __lattice_model(generation_context, class_name, lb_method,
                    stream_collide_ast, collide_ast, stream_ast,
                    refinement_scaling)
示例#19
0
def test_fixed_size_mismatch_check():
    """Create kernel with two differently sized but constant fields """
    src = np.zeros((20, 21, 9))
    dst = np.zeros((21, 21, 9))

    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
    sym_dst = Field.create_from_numpy_array("dst", dst, index_dimensions=1)
    update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2))

    with pytest.raises(ValueError) as e:
        create_kernel([update_rule])
    assert 'Differently sized field accesses' in str(e.value)
示例#20
0
def test_timeloop():
    dh = create_data_handling(domain_size=(2, 2), periodicity=True)

    pre = dh.add_array('pre_run_field', values_per_cell=1)
    dh.fill("pre_run_field", 0.0, ghost_layers=True)
    f = dh.add_array('field', values_per_cell=1)
    dh.fill("field", 0.0, ghost_layers=True)
    post = dh.add_array('post_run_field', values_per_cell=1)
    dh.fill("post_run_field", 0.0, ghost_layers=True)
    single_step = dh.add_array('single_step_field', values_per_cell=1)
    dh.fill("single_step_field", 0.0, ghost_layers=True)

    pre_assignments = Assignment(pre.center, pre.center + 1)
    pre_kernel = create_kernel(pre_assignments).compile()
    assignments = Assignment(f.center, f.center + 1)
    kernel = create_kernel(assignments).compile()
    post_assignments = Assignment(post.center, post.center + 1)
    post_kernel = create_kernel(post_assignments).compile()
    single_step_assignments = Assignment(single_step.center, single_step.center + 1)
    single_step_kernel = create_kernel(single_step_assignments).compile()

    fixed_steps = 2
    timeloop = TimeLoop(steps=fixed_steps)
    assert timeloop.fixed_steps == fixed_steps

    def pre_run():
        dh.run_kernel(pre_kernel)

    def post_run():
        dh.run_kernel(post_kernel)

    def single_step_run():
        dh.run_kernel(single_step_kernel)

    timeloop.add_pre_run_function(pre_run)
    timeloop.add_post_run_function(post_run)
    timeloop.add_single_step_function(single_step_run)
    timeloop.add_call(kernel, {'field': dh.cpu_arrays["field"]})

    # the timeloop is initialised with 2 steps. This means a single time step consists of two steps.
    # Therefore, we have 2 main iterations and one single step iteration in this configuration
    timeloop.run(time_steps=5)
    assert np.all(dh.cpu_arrays["pre_run_field"] == 1.0)
    assert np.all(dh.cpu_arrays["field"] == 2.0)
    assert np.all(dh.cpu_arrays["single_step_field"] == 1.0)
    assert np.all(dh.cpu_arrays["post_run_field"] == 1.0)

    seconds = 2
    start = time.perf_counter()
    timeloop.run_time_span(seconds=seconds)
    end = time.perf_counter()

    np.testing.assert_almost_equal(seconds, end - start, decimal=2)
示例#21
0
def test_fixed_and_variable_field_check():
    """Create kernel with two variable sized fields - calling them with different sizes"""
    src = np.zeros((20, 21, 9))

    sym_src = Field.create_from_numpy_array("src", src, index_dimensions=1)
    sym_dst = Field.create_generic("dst",
                                   spatial_dimensions=2,
                                   index_dimensions=1)

    update_rule = Assignment(sym_dst(0), sym_src[-1, 1](1) + sym_src[1, -1](2))

    with pytest.raises(ValueError) as e:
        create_kernel(update_rule)
    assert 'Mixing fixed-shaped and variable-shape fields' in str(e.value)
示例#22
0
def test_loop_independence_checks():
    f, g = fields("f, g : double[2D]")
    v = fields("v(2) : double[2D]")

    with pytest.raises(ValueError) as e:
        create_kernel(
            [Assignment(g[0, 1], f[0, 1]),
             Assignment(g[0, 0], f[1, 0])])
    assert 'Field g is written at two different locations' in str(e.value)

    # This is allowed - because only one element of g is accessed
    create_kernel(
        [Assignment(g[0, 2], f[0, 1]),
         Assignment(g[0, 2], 2 * g[0, 2])])

    create_kernel([
        Assignment(v[0, 2](1), f[0, 1]),
        Assignment(v[0, 1](0), 4),
        Assignment(v[0, 2](1), 2 * v[0, 2](1))
    ])

    with pytest.raises(ValueError) as e:
        create_kernel(
            [Assignment(g[0, 1], 3),
             Assignment(f[0, 1], 2 * g[0, 2])])
    assert 'Field g is read at (0, 2) and written at (0, 1)' in str(e.value)
示例#23
0
def test_subset_cell_values():
    """Tests (un)packing a subset of cell values of the a field (from)to a buffer."""
    num_cell_values = 19
    # Cell indices of the field to be (un)packed (from)to the buffer
    cell_indices = [1, 5, 7, 8, 10, 12, 13]
    fields = _generate_fields(num_directions=num_cell_values)
    for (src_arr, dst_arr, bufferArr) in fields:
        src_field = Field.create_from_numpy_array("src_field",
                                                  src_arr,
                                                  index_dimensions=1)
        dst_field = Field.create_from_numpy_array("dst_field",
                                                  dst_arr,
                                                  index_dimensions=1)
        buffer = Field.create_generic("buffer",
                                      spatial_dimensions=1,
                                      index_dimensions=1,
                                      field_type=FieldType.BUFFER,
                                      dtype=src_arr.dtype)

        pack_eqs = []
        # Since we are packing all cell values for all cells, then
        # the buffer index is equivalent to the field index
        for buffer_idx, cell_idx in enumerate(cell_indices):
            eq = Assignment(buffer(buffer_idx), src_field(cell_idx))
            pack_eqs.append(eq)

        pack_code = create_kernel(pack_eqs,
                                  data_type={
                                      'src_field': src_arr.dtype,
                                      'buffer': buffer.dtype
                                  })
        pack_kernel = pack_code.compile()
        pack_kernel(buffer=bufferArr, src_field=src_arr)

        unpack_eqs = []

        for buffer_idx, cell_idx in enumerate(cell_indices):
            eq = Assignment(dst_field(cell_idx), buffer(buffer_idx))
            unpack_eqs.append(eq)

        unpack_code = create_kernel(unpack_eqs,
                                    data_type={
                                        'dst_field': dst_arr.dtype,
                                        'buffer': buffer.dtype
                                    })
        unpack_kernel = unpack_code.compile()
        unpack_kernel(buffer=bufferArr, dst_field=dst_arr)

        mask_arr = np.ma.masked_where((src_arr - dst_arr) != 0, src_arr)
        np.testing.assert_equal(dst_arr, mask_arr.filled(int(0)))
示例#24
0
def test_address_of_with_cse():
    x, y = pystencils.fields('x,y: int64[2d]')
    s = pystencils.TypedSymbol('s', PointerType(create_type('int64')))

    assignments = pystencils.AssignmentCollection({
        y[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + s,
        x[0, 0]: cast_func(address_of(x[0, 0]), create_type('int64')) + 1
    }, {})

    ast = pystencils.create_kernel(assignments)
    pystencils.show_code(ast)
    assignments_cse = sympy_cse(assignments)

    ast = pystencils.create_kernel(assignments_cse)
    pystencils.show_code(ast)
示例#25
0
def pystencils_2d_cpu_impl(x, y, coef, N, I=1):
    if x.dtype == np.dtype('f4'):
        src, dst = ps.fields(
            'src, dst: float32[2D]',
            src=x, dst=y
        )
    elif x.dtype == np.dtype('f8'):
        src, dst = ps.fields(
            'src, dst: double[2D]',
            src=x, dst=y
        )
    else:
        raise TypeError

    if N == 1:
        update_rule = make_2d_update_rule_1(src, dst, coef)
    elif N == 6:
        update_rule = make_2d_update_rule_6(src, dst, coef)
    else:
        raise ValueError
    kernel = ps.create_kernel(update_rule, cpu_openmp=True).compile()

    s = time.time()
    for i in range(I):
        if i % 2 == 0:
            kernel(src=x, dst=y)
        else:
            kernel(src=y, dst=x)
    e = time.time()

    if (I - 1) % 2 == 0:
        res = y
    else:
        res = x
    return e - s, res
示例#26
0
def test_vectorization_fixed_size():
    configurations = []
    # Fixed size - multiple of four
    arr = np.ones((20 + 2, 24 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))
    # Fixed size - no multiple of four
    arr = np.ones((21 + 2, 25 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))
    # Fixed size - different remainder
    arr = np.ones((23 + 2, 17 + 2)) * 5.0
    f, g = ps.fields(f=arr, g=arr)
    configurations.append((arr, f, g))

    for arr, f, g in configurations:
        update_rule = [ps.Assignment(g[0, 0], f[0, 0] + f[-1, 0] + f[1, 0] + f[0, 1] + f[0, -1] + 42.0)]

        ast = ps.create_kernel(update_rule)
        vectorize(ast)

        func = ast.compile()
        dst = np.zeros_like(arr)
        func(g=dst, f=arr)
        np.testing.assert_equal(dst[1:-1, 1:-1], 5 * 5.0 + 42.0)
示例#27
0
def test_sum_use_float():

    sum = sympy.Sum(k, (k, 1, 100))
    expanded_sum = sum.doit()

    print(sum)
    print(expanded_sum)

    x = pystencils.fields('x: float32[1d]')

    assignments = pystencils.AssignmentCollection({x.center(): sum})

    ast = pystencils.create_kernel(assignments,
                                   data_type=create_type('float32'))
    code = str(pystencils.show_code(ast))
    kernel = ast.compile()

    print(code)
    print(pystencils.show_code(ast))
    assert 'float sum' in code

    array = np.zeros((10, ), np.float32)

    kernel(x=array)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))
示例#28
0
def test_vec_all(instruction_set, dtype):
    if instruction_set in ['sve', 'rvv']:
        width = 1000  # we don't know the actual value, need something guaranteed larger than vector
    else:
        width = get_vector_instruction_set(dtype, instruction_set)['width']
    data_arr = np.zeros((4 * width, 4 * width),
                        dtype=np.float64 if dtype == 'double' else np.float32)

    data_arr[3:9, 1:3 * width - 1] = 1.0
    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)

    c = [
        Conditional(vec_all(data.center() > 0.0),
                    Block([ps.Assignment(data.center(), 2.0)]))
    ]
    ast = ps.create_kernel(
        c,
        target=Target.CPU,
        cpu_vectorize_info={'instruction_set': instruction_set})
    kernel = ast.compile()
    kernel(data=data_arr)
    if instruction_set in ['sve', 'rvv']:
        # we only know that some values in the middle have been replaced
        assert np.all(data_arr[3:9, :2] <= 1.0)
        assert np.any(data_arr[3:9, 2:] == 2.0)
    else:
        np.testing.assert_equal(data_arr[3:9, :1], 0.0)
        np.testing.assert_equal(data_arr[3:9, 1:width], 1.0)
        np.testing.assert_equal(data_arr[3:9, width:2 * width], 2.0)
        np.testing.assert_equal(data_arr[3:9, 2 * width:3 * width - 1], 1.0)
        np.testing.assert_equal(data_arr[3:9, 3 * width - 1:], 0.0)
示例#29
0
def test_vec_any(instruction_set, dtype):
    if instruction_set in ['sve', 'rvv']:
        width = 4  # we don't know the actual value
    else:
        width = get_vector_instruction_set(dtype, instruction_set)['width']
    data_arr = np.zeros((4 * width, 4 * width),
                        dtype=np.float64 if dtype == 'double' else np.float32)

    data_arr[3:9, 1:3 * width - 1] = 1.0
    data = ps.fields(f"data: {dtype}[2D]", data=data_arr)

    c = [
        ps.Assignment(sp.Symbol("t1"), vec_any(data.center() > 0.0)),
        Conditional(vec_any(data.center() > 0.0),
                    Block([ps.Assignment(data.center(), 2.0)]))
    ]
    ast = ps.create_kernel(
        c,
        target=ps.Target.CPU,
        cpu_vectorize_info={'instruction_set': instruction_set})
    kernel = ast.compile()
    kernel(data=data_arr)
    if instruction_set in ['sve', 'rvv']:
        # we only know that the first value has changed
        np.testing.assert_equal(data_arr[3:9, :3 * width - 1], 2.0)
    else:
        np.testing.assert_equal(data_arr[3:9, :3 * width], 2.0)
示例#30
0
def test_prod_var_limit():

    k = pystencils.TypedSymbol('k', create_type('int64'))
    limit = pystencils.TypedSymbol('limit', create_type('int64'))

    sum = sympy.Sum(k, (k, 1, limit))
    expanded_sum = sum.replace(limit, 100).doit()

    print(sum)
    print(expanded_sum)

    x = pystencils.fields('x: int64[1d]')

    assignments = pystencils.AssignmentCollection({x.center(): sum})

    ast = pystencils.create_kernel(assignments)
    code = str(pystencils.show_code(ast))
    kernel = ast.compile()

    print(code)

    array = np.zeros((10, ), np.int64)

    kernel(x=array, limit=100)

    assert np.allclose(array, int(expanded_sum) * np.ones_like(array))