Exemplo n.º 1
0
def create_kernel(assignments, function_name="kernel", type_info=None, split_groups=(),
                  iteration_slice=None, ghost_layers=None):
    """
    Creates an abstract syntax tree for a kernel function, by taking a list of update rules.

    Loops are created according to the field accesses in the equations.

    Args:
    assignments: list of sympy equations, containing accesses to :class:`pystencils.field.Field`.
                 Defining the update rules of the kernel
    function_name: name of the generated function - only important if generated code is written out
                   type_info: a map from symbol name to a C type specifier. If not specified all symbols are assumed to
                   be of type 'double' except symbols which occur on the left hand side of equations where the
                   right hand side is a sympy Boolean which are assumed to be 'bool' .
    split_groups: Specification on how to split up inner loop into multiple loops. For details see
                  transformation :func:`pystencils.transformation.split_inner_loop`
    iteration_slice: if not None, iteration is done only over this slice of the field
    ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
                 if None, the number of ghost layers is determined automatically and assumed to be equal for a
                 all dimensions

    :return: :class:`pystencils.ast.KernelFunction` node
    """
    from pystencils.cpu import create_kernel
    code = create_kernel(assignments, function_name, type_info, split_groups, iteration_slice, ghost_layers)
    code.body = insert_casts(code.body)
    code._compile_function = make_python_function
    code._backend = 'llvm'
    return code
Exemplo n.º 2
0
def test_staggered_iteration_manual():
    dim = 2
    f_arr = np.arange(5**dim).reshape([5] * dim)
    s_arr = np.ones([5] * dim + [dim]) * 1234
    s_arr_ref = s_arr.copy()

    f = Field.create_from_numpy_array('f', f_arr)
    s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1)

    eqs = []

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]
    conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)]

    for d in range(dim):
        eq = SympyAssignment(
            s(d),
            sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
            sum(f[o] for o in offsets_in_plane(d, -1, dim)))
        cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
        eqs.append(Conditional(cond, eq))

    kernel_ast = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)])

    func = make_python_function(kernel_ast)
    func(f=f_arr, s=s_arr_ref)

    inner_loop = [
        n for n in kernel_ast.atoms(ast.LoopOverCoordinate)
        if n.is_innermost_loop
    ][0]
    cut_loop(inner_loop, [4])
    outer_loop = [
        n for n in kernel_ast.atoms(ast.LoopOverCoordinate)
        if n.is_outermost_loop
    ][0]
    cut_loop(outer_loop, [4])

    simplify_conditionals(kernel_ast.body, loop_counter_simplification=True)
    cleanup_blocks(kernel_ast.body)
    move_constants_before_loop(kernel_ast.body)
    cleanup_blocks(kernel_ast.body)

    assert not kernel_ast.atoms(
        Conditional), "Loop cutting optimization did not work"

    func_optimized = make_python_function(kernel_ast)
    func_optimized(f=f_arr, s=s_arr)
    np.testing.assert_almost_equal(s_arr_ref, s_arr)
Exemplo n.º 3
0
def test_staggered_iteration():
    dim = 2
    f_arr = np.arange(5**dim).reshape([5] * dim).astype(np.float64)
    s_arr = np.ones([5] * dim + [dim]) * 1234
    s_arr_ref = s_arr.copy()

    fields_fixed = (Field.create_from_numpy_array('f', f_arr),
                    Field.create_from_numpy_array('s',
                                                  s_arr,
                                                  index_dimensions=1))
    fields_var = (Field.create_generic('f', 2),
                  Field.create_generic('s', 2, index_dimensions=1))

    for f, s in [fields_var, fields_fixed]:
        # --- Manual
        eqs = []
        counters = [
            LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
        ]
        conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)]
        for d in range(dim):
            eq = SympyAssignment(
                s(d),
                sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
                sum(f[o] for o in offsets_in_plane(d, -1, dim)))
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            eqs.append(Conditional(cond, eq))
        func = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0),
                                                (1, 0)]).compile()

        # --- Built-in optimized
        expressions = []
        for d in range(dim):
            expressions.append(
                sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
                sum(f[o] for o in offsets_in_plane(d, -1, dim)))
        func_optimized = create_staggered_kernel(s, expressions).compile()
        assert not func_optimized.ast.atoms(
            Conditional), "Loop cutting optimization did not work"

        func(f=f_arr, s=s_arr_ref)
        func_optimized(f=f_arr, s=s_arr)
        np.testing.assert_almost_equal(s_arr_ref, s_arr)
def test_2d_5pt():
    size = [30, 50, 3]
    kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
    with open(kernel_file_path) as kernel_file:
        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=None, filename=kernel_file_path)
    reference = analysis(reference_kernel)

    arr = np.zeros(size)
    a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
    b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
    s = sp.Symbol("s")
    rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
    update_rule = Assignment(b[0, 0], s * rhs)
    ast = create_kernel([update_rule])
    k = PyStencilsKerncraftKernel(ast)
    result = analysis(k)

    for e1, e2 in zip(reference.results['cycles'], result.results['cycles']):
        assert e1 == e2
def test_compilation():
    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)

    kernel_file_path = os.path.join(INPUT_FOLDER, "2d-5pt.c")
    with open(kernel_file_path) as kernel_file:
        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
        reference_kernel.as_code('likwid')

    size = [30, 50, 3]
    arr = np.zeros(size)
    a = Field.create_from_numpy_array('a', arr, index_dimensions=1)
    b = Field.create_from_numpy_array('b', arr, index_dimensions=1)
    s = sp.Symbol("s")
    rhs = a[0, -1](0) + a[0, 1] + a[-1, 0] + a[1, 0]
    update_rule = Assignment(b[0, 0], s * rhs)
    ast = create_kernel([update_rule])
    mine = generate_benchmark(ast, likwid=False)
    print(mine)
def test_3d_7pt_iaca():
    # Make sure you use the intel compiler
    size = [20, 200, 200]
    kernel_file_path = os.path.join(INPUT_FOLDER, "3d-7pt.c")
    machine_file_path = os.path.join(INPUT_FOLDER, "default_machine_file.yaml")
    machine = kerncraft.machinemodel.MachineModel(path_to_yaml=machine_file_path)
    with open(kernel_file_path) as kernel_file:
        reference_kernel = kerncraft.kernel.KernelCode(kernel_file.read(), machine=machine, filename=kernel_file_path)
    reference_kernel.set_constant('M', size[0])
    reference_kernel.set_constant('N', size[1])
    assert size[1] == size[2]
    analysis(reference_kernel, model='ecm')

    arr = np.zeros(size)
    a = Field.create_from_numpy_array('a', arr, index_dimensions=0)
    b = Field.create_from_numpy_array('b', arr, index_dimensions=0)
    s = sp.Symbol("s")
    rhs = a[0, -1, 0] + a[0, 1, 0] + a[-1, 0, 0] + a[1, 0, 0] + a[0, 0, -1] + a[0, 0, 1]

    update_rule = Assignment(b[0, 0, 0], s * rhs)
    ast = create_kernel([update_rule])
    k = PyStencilsKerncraftKernel(ast, machine)
    analysis(k, model='ecm')
    assert reference_kernel._flops == k._flops
Exemplo n.º 7
0
def create_staggered_kernel(staggered_field,
                            expressions,
                            subexpressions=(),
                            target='cpu',
                            gpu_exclusive_conditions=False,
                            **kwargs):
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    Args:
        staggered_field: field where the first index coordinate defines the location of the staggered value
                can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
                a vector is stored, expressions parameter has to be a sequence of sequences then
                where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
                boundary and ``f[0,0](1)`` the southern cell boundary etc.
        expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
                     should be updated.
        subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
        target: 'cpu' or 'gpu'
        gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
        kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
    """
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
    assert staggered_field.index_dimensions in (
        1, 2), 'Staggered field must have one or two index dimensions'
    dim = staggered_field.spatial_dimensions

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]
    conditions = [
        counters[i] < staggered_field.shape[i] - 1 for i in range(dim)
    ]
    assert len(expressions) == dim
    if staggered_field.index_dimensions == 2:
        assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
            "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
            "same length."

    final_assignments = []
    last_conditional = None

    def add(condition, dimensions, as_else_block=False):
        nonlocal last_conditional
        if staggered_field.index_dimensions == 1:
            assignments = [
                Assignment(staggered_field(d), expressions[d])
                for d in dimensions
            ]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered(
                [staggered_field(d) for d in dimensions])
        elif staggered_field.index_dimensions == 2:
            assert staggered_field.has_fixed_index_shape
            assignments = [
                Assignment(staggered_field(d, i), expr) for d in dimensions
                for i, expr in enumerate(expressions[d])
            ]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered([
                staggered_field(d, i)
                for i in range(staggered_field.index_shape[1])
                for d in dimensions
            ])
        sp_assignments = [
            SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments
        ]
        if as_else_block and last_conditional:
            new_cond = Conditional(condition, Block(sp_assignments))
            last_conditional.false_block = Block([new_cond])
            last_conditional = new_cond
        else:
            last_conditional = Conditional(condition, Block(sp_assignments))
            final_assignments.append(last_conditional)

    if target == 'cpu' or not gpu_exclusive_conditions:
        for d in range(dim):
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            add(cond, [d])
    elif target == 'gpu':
        full_conditions = [
            sp.And(*[conditions[i] for i in range(dim) if d != i])
            for d in range(dim)
        ]
        for include in itertools.product(*[[1, 0]] * dim):
            case_conditions = sp.And(*[
                c if value else sp.Not(c)
                for c, value in zip(full_conditions, include)
            ])
            dimensions_to_include = [i for i in range(dim) if include[i]]
            if dimensions_to_include:
                add(case_conditions, dimensions_to_include, True)

    ghost_layers = [(1, 0)] * dim

    blocking = kwargs.get('cpu_blocking', None)
    if blocking:
        del kwargs['cpu_blocking']

    cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
    if cpu_vectorize_info:
        del kwargs['cpu_vectorize_info']
    openmp = kwargs.get('cpu_openmp', None)
    if openmp:
        del kwargs['cpu_openmp']

    ast = create_kernel(final_assignments,
                        ghost_layers=ghost_layers,
                        target=target,
                        **kwargs)

    if target == 'cpu':
        remove_conditionals_in_staggered_kernel(ast)
        move_constants_before_loop(ast)
        omp_collapse = None
        if blocking:
            omp_collapse = loop_blocking(ast, blocking)
        if openmp:
            from pystencils.cpu import add_openmp
            add_openmp(ast,
                       num_threads=openmp,
                       collapse=omp_collapse,
                       assume_single_outer_loop=False)
        if cpu_vectorize_info is True:
            vectorize(ast)
        elif isinstance(cpu_vectorize_info, dict):
            vectorize(ast, **cpu_vectorize_info)
    return ast
Exemplo n.º 8
0
def create_kernel(assignments,
                  target='cpu',
                  data_type="double",
                  iteration_slice=None,
                  ghost_layers=None,
                  skip_independence_check=False,
                  cpu_openmp=False,
                  cpu_vectorize_info=None,
                  cpu_blocking=None,
                  gpu_indexing='block',
                  gpu_indexing_params=MappingProxyType({})):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        target: 'cpu', 'llvm' or 'gpu'
        data_type: data type used for all untyped symbols (i.e. non-fields), can also be a dict from symbol name
                  to type
        iteration_slice: rectangular subset to iterate over, if not specified the complete non-ghost layer \
                         part of the field is iterated over
        ghost_layers: if left to default, the number of necessary ghost layers is determined automatically
                     a single integer specifies the ghost layer count at all borders, can also be a sequence of
                     pairs ``[(x_lower_gl, x_upper_gl), .... ]``
        skip_independence_check: don't check that loop iterations are independent. This is needed e.g. for
                                 periodicity kernel, that access the field outside the iteration bounds. Use with care!
        cpu_openmp: True or number of threads for OpenMP parallelization, False for no OpenMP
        cpu_vectorize_info: a dictionary with keys, 'vector_instruction_set', 'assume_aligned' and 'nontemporal'
                            for documentation of these parameters see vectorize function. Example:
                            '{'instruction_set': 'avx512', 'assume_aligned': True, 'nontemporal':True}'
        cpu_blocking: a tuple of block sizes or None if no blocking should be applied
        gpu_indexing: either 'block' or 'line' , or custom indexing class, see `AbstractIndexing`
        gpu_indexing_params: dict with indexing parameters (constructor parameters of indexing class)
                             e.g. for 'block' one can specify '{'block_size': (20, 20, 10) }'

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> ast = ps.create_kernel(assignment, target='cpu', cpu_openmp=True)
        >>> kernel = ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
    # ----  Normalizing parameters
    split_groups = ()
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments
    if isinstance(assignments, Assignment):
        assignments = [assignments]

    # ----  Creating ast
    if target == 'cpu':
        from pystencils.cpu import create_kernel
        from pystencils.cpu import add_openmp
        ast = create_kernel(assignments,
                            type_info=data_type,
                            split_groups=split_groups,
                            iteration_slice=iteration_slice,
                            ghost_layers=ghost_layers,
                            skip_independence_check=skip_independence_check)
        omp_collapse = None
        if cpu_blocking:
            omp_collapse = loop_blocking(ast, cpu_blocking)
        if cpu_openmp:
            add_openmp(ast, num_threads=cpu_openmp, collapse=omp_collapse)
        if cpu_vectorize_info:
            if cpu_vectorize_info is True:
                vectorize(ast)
            elif isinstance(cpu_vectorize_info, dict):
                vectorize(ast, **cpu_vectorize_info)
            else:
                raise ValueError("Invalid value for cpu_vectorize_info")
        return ast
    elif target == 'llvm':
        from pystencils.llvm import create_kernel
        ast = create_kernel(assignments,
                            type_info=data_type,
                            split_groups=split_groups,
                            iteration_slice=iteration_slice,
                            ghost_layers=ghost_layers)
        return ast
    elif target == 'gpu':
        from pystencils.gpucuda import create_cuda_kernel
        ast = create_cuda_kernel(
            assignments,
            type_info=data_type,
            indexing_creator=indexing_creator_from_params(
                gpu_indexing, gpu_indexing_params),
            iteration_slice=iteration_slice,
            ghost_layers=ghost_layers,
            skip_independence_check=skip_independence_check)
        return ast
    else:
        raise ValueError(
            "Unknown target %s. Has to be one of 'cpu', 'gpu' or 'llvm' " %
            (target, ))
Exemplo n.º 9
0
def create_staggered_kernel(assignments,
                            target: Target = Target.CPU,
                            gpu_exclusive_conditions=False,
                            **kwargs):
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
        target: 'CPU' or 'GPU'
        gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
                                  handled in an else branch.
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
    """
    if 'ghost_layers' in kwargs:
        assert kwargs['ghost_layers'] is None
        del kwargs['ghost_layers']
    if 'iteration_slice' in kwargs:
        assert kwargs['iteration_slice'] is None
        del kwargs['iteration_slice']
    if 'omp_single_loop' in kwargs:
        assert kwargs['omp_single_loop'] is False
        del kwargs['omp_single_loop']

    if isinstance(assignments, AssignmentCollection):
        subexpressions = assignments.subexpressions + [
            a for a in assignments.main_assignments
            if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access
            or not FieldType.is_staggered(a.lhs.field)
        ]
        assignments = [
            a for a in assignments.main_assignments
            if hasattr(a, 'lhs') and type(a.lhs) is Field.Access
            and FieldType.is_staggered(a.lhs.field)
        ]
    else:
        subexpressions = [
            a for a in assignments
            if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access
            or not FieldType.is_staggered(a.lhs.field)
        ]
        assignments = [
            a for a in assignments
            if hasattr(a, 'lhs') and type(a.lhs) is Field.Access
            and FieldType.is_staggered(a.lhs.field)
        ]
    if len(set([tuple(a.lhs.field.staggered_stencil)
                for a in assignments])) != 1:
        raise ValueError(
            "All assignments need to be made to staggered fields with the same stencil"
        )
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError(
            "All assignments need to be made to staggered fields with the same shape"
        )

    staggered_field = assignments[0].lhs.field
    stencil = staggered_field.staggered_stencil
    dim = staggered_field.spatial_dimensions
    shape = staggered_field.shape

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]

    final_assignments = []

    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
    for direction in stencil:
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
    ghost_layers = [[0, 0] for d in range(dim)]
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
                ghost_layers[d][1] = 1
            elif s == -1:
                ghost_layers[d][0] = 1

    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
            if e in common_exclusions:
                continue
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
                    conditions.append(counters[i] < shape[i] - 1)
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

    if gpu_exclusive_conditions:
        outer_assignment = None
        conditions = {direction: condition(direction) for direction in stencil}
        for num_conditions in range(len(stencil)):
            for combination in itertools.combinations(conditions.values(),
                                                      num_conditions):
                for assignment in assignments:
                    direction = stencil[assignment.lhs.index[0]]
                    if conditions[direction] in combination:
                        assignment = SympyAssignment(assignment.lhs,
                                                     assignment.rhs)
                        outer_assignment = Conditional(sp.And(*combination),
                                                       Block([assignment]),
                                                       outer_assignment)

        inner_assignment = []
        for assignment in assignments:
            inner_assignment.append(
                SympyAssignment(assignment.lhs, assignment.rhs))
        last_conditional = Conditional(
            sp.And(*[condition(d) for d in stencil]), Block(inner_assignment),
            outer_assignment)
        final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                            [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                            [last_conditional]

        if target == Target.CPU:
            from pystencils.cpu import create_kernel as create_kernel_cpu
            ast = create_kernel_cpu(final_assignments,
                                    ghost_layers=ghost_layers,
                                    omp_single_loop=False,
                                    **kwargs)
        else:
            ast = create_kernel(final_assignments,
                                ghost_layers=ghost_layers,
                                target=target,
                                **kwargs)
        return ast

    for assignment in assignments:
        direction = stencil[assignment.lhs.index[0]]
        sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                         [SympyAssignment(assignment.lhs, assignment.rhs)]
        last_conditional = Conditional(condition(direction),
                                       Block(sp_assignments))
        final_assignments.append(last_conditional)

    remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
    prepend_optimizations = [
        lambda ast: remove_conditionals_in_staggered_kernel(
            ast, remove_start_conditional), move_constants_before_loop
    ]
    if 'cpu_prepend_optimizations' in kwargs:
        prepend_optimizations += kwargs['cpu_prepend_optimizations']
        del kwargs['cpu_prepend_optimizations']
    ast = create_kernel(final_assignments,
                        ghost_layers=ghost_layers,
                        target=target,
                        omp_single_loop=False,
                        cpu_prepend_optimizations=prepend_optimizations,
                        **kwargs)
    return ast
Exemplo n.º 10
0
def create_domain_kernel(assignments: List[Assignment], *,
                         config: CreateKernelConfig):
    """
    Creates abstract syntax tree (AST) of kernel, using a list of update equations.

    Args:
        assignments: can be a single assignment, sequence of assignments or an `AssignmentCollection`
        config: CreateKernelConfig which includes the needed configuration

    Returns:
        abstract syntax tree (AST) object, that can either be printed as source code with `show_code` or
        can be compiled with through its 'compile()' member

    Example:
        >>> import pystencils as ps
        >>> import numpy as np
        >>> s, d = ps.fields('s, d: [2D]')
        >>> assignment = ps.Assignment(d[0,0], s[0, 1] + s[0, -1] + s[1, 0] + s[-1, 0])
        >>> kernel_config = ps.CreateKernelConfig(cpu_openmp=True)
        >>> kernel_ast = ps.kernelcreation.create_domain_kernel([assignment], config=kernel_config)
        >>> kernel = kernel_ast.compile()
        >>> d_arr = np.zeros([5, 5])
        >>> kernel(d=d_arr, s=np.ones([5, 5]))
        >>> d_arr
        array([[0., 0., 0., 0., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 4., 4., 4., 0.],
               [0., 0., 0., 0., 0.]])
    """
    # --- applying first default simplifications
    try:
        if config.default_assignment_simplifications and isinstance(
                assignments, AssignmentCollection):
            simplification = create_simplification_strategy()
            assignments = simplification(assignments)
    except Exception as e:
        warnings.warn(
            f"It was not possible to apply the default pystencils optimisations to the "
            f"AssignmentCollection due to the following problem :{e}")

    # ----  Normalizing parameters
    split_groups = ()
    if isinstance(assignments, AssignmentCollection):
        if 'split_groups' in assignments.simplification_hints:
            split_groups = assignments.simplification_hints['split_groups']
        assignments = assignments.all_assignments

    try:
        if config.default_assignment_simplifications:
            assignments = apply_sympy_optimisations(assignments)
    except Exception as e:
        warnings.warn(
            f"It was not possible to apply the default SymPy optimisations to the "
            f"Assignments due to the following problem :{e}")

    # ----  Creating ast
    ast = None
    if config.target == Target.CPU:
        if config.backend == Backend.C:
            from pystencils.cpu import add_openmp, create_kernel
            ast = create_kernel(
                assignments,
                function_name=config.function_name,
                type_info=config.data_type,
                split_groups=split_groups,
                iteration_slice=config.iteration_slice,
                ghost_layers=config.ghost_layers,
                skip_independence_check=config.skip_independence_check)
            for optimization in config.cpu_prepend_optimizations:
                optimization(ast)
            omp_collapse = None
            if config.cpu_blocking:
                omp_collapse = loop_blocking(ast, config.cpu_blocking)
            if config.cpu_openmp:
                add_openmp(ast,
                           num_threads=config.cpu_openmp,
                           collapse=omp_collapse,
                           assume_single_outer_loop=config.omp_single_loop)
            if config.cpu_vectorize_info:
                if config.cpu_vectorize_info is True:
                    vectorize(ast)
                elif isinstance(config.cpu_vectorize_info, dict):
                    vectorize(ast, **config.cpu_vectorize_info)
                    if config.cpu_openmp and config.cpu_blocking and 'nontemporal' in config.cpu_vectorize_info and \
                            config.cpu_vectorize_info['nontemporal'] and 'cachelineZero' in ast.instruction_set:
                        # This condition is stricter than it needs to be: if blocks along the fastest axis start on a
                        # cache line boundary, it's okay. But we cannot determine that here.
                        # We don't need to disallow OpenMP collapsing because it is never applied to the inner loop.
                        raise ValueError(
                            "Blocking cannot be combined with cacheline-zeroing"
                        )
                else:
                    raise ValueError("Invalid value for cpu_vectorize_info")
    elif config.target == Target.GPU:
        if config.backend == Backend.CUDA:
            from pystencils.gpucuda import create_cuda_kernel
            ast = create_cuda_kernel(
                assignments,
                function_name=config.function_name,
                type_info=config.data_type,
                indexing_creator=indexing_creator_from_params(
                    config.gpu_indexing, config.gpu_indexing_params),
                iteration_slice=config.iteration_slice,
                ghost_layers=config.ghost_layers,
                skip_independence_check=config.skip_independence_check)

    if not ast:
        raise NotImplementedError(
            f'{config.target} together with {config.backend} is not supported by `create_domain_kernel`'
        )

    if config.use_auto_for_assignments:
        for a in ast.atoms(SympyAssignment):
            a.use_auto = True

    return ast
Exemplo n.º 11
0
def compile_macroscopic_values_getter(lb_method,
                                      output_quantities,
                                      pdf_arr=None,
                                      ghost_layers=1,
                                      iteration_slice=None,
                                      field_layout='numpy',
                                      target=Target.CPU,
                                      streaming_pattern='pull',
                                      previous_timestep=Timestep.BOTH):
    """
    Create kernel to compute macroscopic value(s) from a pdf field (e.g. density or velocity)

    Args:
        lb_method: instance of :class:`lbmpy.methods.AbstractLbMethod`
        output_quantities: sequence of quantities to compute e.g. ['density', 'velocity']
        pdf_arr: optional numpy array for pdf field - used to get optimal loop structure for kernel
        ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
                      that should be excluded from the iteration. If None, the number of ghost layers 
                      is determined automatically and assumed to be equal for all dimensions.        
        iteration_slice: if not None, iteration is done only over this slice of the field
        field_layout: layout for output field, also used for pdf field if pdf_arr is not given
        target: `Target.CPU` or `Target.GPU`
        previous_step_accessor: The accessor used by the streaming pattern of the previous timestep

    Returns:
        a function to compute macroscopic values:
        - pdf_array
        - keyword arguments from name of conserved quantity (as in output_quantities) to numpy field
    """
    if not (isinstance(output_quantities, list)
            or isinstance(output_quantities, tuple)):
        output_quantities = [output_quantities]

    cqc = lb_method.conserved_quantity_computation
    unknown_quantities = [
        oq for oq in output_quantities if oq not in cqc.conserved_quantities
    ]
    if unknown_quantities:
        raise ValueError(
            "No such conserved quantity: %s, conserved quantities are %s" %
            (str(unknown_quantities), str(cqc.conserved_quantities.keys())))

    if pdf_arr is None:
        pdf_field = Field.create_generic('pdfs',
                                         lb_method.dim,
                                         index_dimensions=1,
                                         layout=field_layout)
    else:
        pdf_field = Field.create_from_numpy_array('pdfs',
                                                  pdf_arr,
                                                  index_dimensions=1)

    output_mapping = {}
    for output_quantity in output_quantities:
        number_of_elements = cqc.conserved_quantities[output_quantity]
        assert number_of_elements >= 1

        ind_dims = 0 if number_of_elements <= 1 else 1
        if pdf_arr is None:
            output_field = Field.create_generic(output_quantity,
                                                lb_method.dim,
                                                layout=field_layout,
                                                index_dimensions=ind_dims)
        else:
            output_field_shape = pdf_arr.shape[:-1]
            if ind_dims > 0:
                output_field_shape += (number_of_elements, )
                field_layout = get_layout_of_array(pdf_arr)
            else:
                field_layout = get_layout_of_array(
                    pdf_arr, index_dimension_ids=[len(pdf_field.shape) - 1])
            output_field = Field.create_fixed_size(output_quantity,
                                                   output_field_shape,
                                                   ind_dims, pdf_arr.dtype,
                                                   field_layout)

        output_mapping[output_quantity] = [
            output_field(i) for i in range(number_of_elements)
        ]
        if len(output_mapping[output_quantity]) == 1:
            output_mapping[output_quantity] = output_mapping[output_quantity][
                0]

    stencil = lb_method.stencil
    previous_step_accessor = get_accessor(streaming_pattern, previous_timestep)
    pdf_symbols = previous_step_accessor.write(pdf_field, stencil)

    eqs = cqc.output_equations_from_pdfs(pdf_symbols,
                                         output_mapping).all_assignments

    if target == Target.CPU:
        import pystencils.cpu as cpu
        kernel = cpu.make_python_function(
            cpu.create_kernel(eqs,
                              ghost_layers=ghost_layers,
                              iteration_slice=iteration_slice))
    elif target == Target.GPU:
        import pystencils.gpucuda as gpu
        kernel = gpu.make_python_function(
            gpu.create_cuda_kernel(eqs,
                                   ghost_layers=ghost_layers,
                                   iteration_slice=iteration_slice))
    else:
        raise ValueError(
            "Unknown target '%s'. Possible targets are `Target.CPU` and `Target.GPU`"
            % (target, ))

    def getter(pdfs, **kwargs):
        if pdf_arr is not None:
            assert pdfs.shape == pdf_arr.shape and pdfs.strides == pdf_arr.strides, \
                "Pdf array not matching blueprint which was used to compile" + str(pdfs.shape) + str(pdf_arr.shape)
        if not set(output_quantities).issubset(kwargs.keys()):
            raise ValueError(
                "You have to specify the output field for each of the following quantities: %s"
                % (str(output_quantities), ))
        kernel(pdfs=pdfs, **kwargs)

    return getter
Exemplo n.º 12
0
def compile_macroscopic_values_setter(lb_method,
                                      quantities_to_set,
                                      pdf_arr=None,
                                      ghost_layers=1,
                                      iteration_slice=None,
                                      field_layout='numpy',
                                      target=Target.CPU,
                                      streaming_pattern='pull',
                                      previous_timestep=Timestep.BOTH):
    """
    Creates a function that sets a pdf field to specified macroscopic quantities
    The returned function can be called with the pdf field to set as single argument

    Args:
        lb_method: instance of :class:`lbmpy.methods.AbstractLbMethod`
        quantities_to_set: map from conserved quantity name to fixed value or numpy array
        pdf_arr: optional numpy array for pdf field - used to get optimal loop structure for kernel
        ghost_layers: a sequence of pairs for each coordinate with lower and upper nr of ghost layers
                      that should be excluded from the iteration. If None, the number of ghost layers 
                      is determined automatically and assumed to be equal for all dimensions.        
        iteration_slice: if not None, iteration is done only over this slice of the field
        field_layout: layout of the pdf field if pdf_arr was not given
        target: `Target.CPU` or `Target.GPU`
        previous_step_accessor: The accessor used by the streaming pattern of the previous timestep

    Returns:
        function taking pdf array as single argument and which sets the field to the given values
    """
    if pdf_arr is not None:
        pdf_field = Field.create_from_numpy_array('pdfs',
                                                  pdf_arr,
                                                  index_dimensions=1)
    else:
        pdf_field = Field.create_generic('pdfs',
                                         lb_method.dim,
                                         index_dimensions=1,
                                         layout=field_layout)

    fixed_kernel_parameters = {}
    cqc = lb_method.conserved_quantity_computation

    value_map = {}
    at_least_one_field_input = False
    for quantity_name, value in quantities_to_set.items():
        if hasattr(value, 'shape'):
            fixed_kernel_parameters[quantity_name] = value
            at_least_one_field_input = True
            num_components = cqc.conserved_quantities[quantity_name]
            field = Field.create_from_numpy_array(
                quantity_name,
                value,
                index_dimensions=0 if num_components <= 1 else 1)
            if num_components == 1:
                value = field(0)
            else:
                value = [field(i) for i in range(num_components)]

        value_map[quantity_name] = value

    cq_equations = cqc.equilibrium_input_equations_from_init_values(
        **value_map, force_substitution=False)

    eq = lb_method.get_equilibrium(conserved_quantity_equations=cq_equations)
    if at_least_one_field_input:
        simplification = create_simplification_strategy(lb_method)
        eq = simplification(eq)
    else:
        eq = eq.new_without_subexpressions()

    previous_step_accessor = get_accessor(streaming_pattern, previous_timestep)
    write_accesses = previous_step_accessor.write(pdf_field, lb_method.stencil)

    substitutions = {
        sym: write_accesses[i]
        for i, sym in enumerate(lb_method.post_collision_pdf_symbols)
    }
    eq = eq.new_with_substitutions(substitutions).all_assignments

    if target == Target.CPU:
        import pystencils.cpu as cpu
        kernel = cpu.make_python_function(cpu.create_kernel(eq))
        kernel = functools.partial(kernel, **fixed_kernel_parameters)
    elif target == Target.GPU:
        import pystencils.gpucuda as gpu
        kernel = gpu.make_python_function(gpu.create_cuda_kernel(eq))
        kernel = functools.partial(kernel, **fixed_kernel_parameters)
    else:
        raise ValueError(
            "Unknown target '%s'. Possible targets are `Target.CPU` and `Target.GPU`"
            % (target, ))

    def setter(pdfs, **kwargs):
        if pdf_arr is not None:
            assert pdfs.shape == pdf_arr.shape and pdfs.strides == pdf_arr.strides, \
                "Pdf array not matching blueprint which was used to compile" + str(pdfs.shape) + str(pdf_arr.shape)
        kernel(pdfs=pdfs, **kwargs)

    return setter