Exemplo n.º 1
0
def test_symbol_renaming():
    """When two loops have assignments to the same symbol with different rhs and both
    are pulled before the loops, one of them has to be renamed
    """

    f, g = ps.fields("f, g : double[2D]")
    a, b, c = [TypedSymbol(n, np.float64) for n in ('a', 'b', 'c')]

    loop1 = LoopOverCoordinate(
        Block(
            [SympyAssignment(c, a + b),
             SympyAssignment(g[0, 0], f[0, 0] + c)]), 0, 0, 10)
    loop2 = LoopOverCoordinate(
        Block([
            SympyAssignment(c, a**2 + b**2),
            SympyAssignment(g[0, 0], f[0, 0] + c)
        ]), 0, 0, 10)
    block = Block([loop1, loop2])

    move_constants_before_loop(block)

    loops = block.atoms(LoopOverCoordinate)
    assert len(loops) == 2
    for loop in loops:
        assert len(loop.body.args) == 1
        assert len(loop.parent.args) == 4  # 2 loops + 2 subexpressions
        assert loop.parent.args[0].lhs.name != loop.parent.args[1].lhs.name
Exemplo n.º 2
0
    def read(self, field, stencil):
        result = []
        for i, d in enumerate(stencil):
            pull_direction = inverse_direction(d)
            periodic_pull_direction = []
            for coord_id, dir_element in enumerate(pull_direction):
                if not self._periodicity[coord_id]:
                    periodic_pull_direction.append(dir_element)
                    continue

                lower_limit = self._ghostLayers
                upper_limit = field.spatial_shape[
                    coord_id] - 1 - self._ghostLayers
                limit_diff = upper_limit - lower_limit
                loop_counter = LoopOverCoordinate.get_loop_counter_symbol(
                    coord_id)
                if dir_element == 0:
                    periodic_pull_direction.append(0)
                elif dir_element == 1:
                    new_dir_element = sp.Piecewise(
                        (dir_element, loop_counter < upper_limit),
                        (-limit_diff, True))
                    periodic_pull_direction.append(new_dir_element)
                elif dir_element == -1:
                    new_dir_element = sp.Piecewise(
                        (dir_element, loop_counter > lower_limit),
                        (limit_diff, True))
                    periodic_pull_direction.append(new_dir_element)
                else:
                    raise NotImplementedError(
                        "This accessor supports only nearest neighbor stencils"
                    )
            result.append(field[tuple(periodic_pull_direction)](i))
        return result
Exemplo n.º 3
0
def border_conditions(direction, field, ghost_layers=1):
    abs_direction = tuple(-e if e < 0 else e for e in direction)
    assert sum(abs_direction) == 1
    idx = abs_direction.index(1)
    val = direction[idx]

    loop_ctrs = [
        LoopOverCoordinate.get_loop_counter_symbol(i)
        for i in range(len(direction))
    ]
    loop_ctr = loop_ctrs[idx]

    gl = ghost_layers
    border_condition = sp.Eq(loop_ctr,
                             gl if val < 0 else field.shape[idx] - gl - 1)

    if ghost_layers == 0:
        return type_all_numbers(border_condition, loop_ctr.dtype)
    else:
        other_min = [sp.Ge(c, gl) for c in loop_ctrs if c != loop_ctr]
        other_max = [
            sp.Lt(c, field.shape[i] - gl) for i, c in enumerate(loop_ctrs)
            if c != loop_ctr
        ]
        result = sp.And(border_condition, *other_min, *other_max)
        return type_all_numbers(result, loop_ctr.dtype)
Exemplo n.º 4
0
    def __init__(self,
                 dim,
                 time_step=TypedSymbol("time_step", np.uint32),
                 offsets=None,
                 keys=None):
        if keys is None:
            keys = (0, ) * self._num_keys
        if offsets is None:
            offsets = (0, ) * dim
        if len(keys) != self._num_keys:
            raise ValueError(
                f"Provided {len(keys)} keys but need {self._num_keys}")
        if len(offsets) != dim:
            raise ValueError(f"Provided {len(offsets)} offsets but need {dim}")
        coordinates = [
            LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i]
            for i in range(dim)
        ]
        if dim < 3:
            coordinates.append(0)

        self._args = sp.sympify([time_step, *coordinates, *keys])
        self.result_symbols = tuple(
            TypedSymbol(f'random_{self.id}_{i}', self._data_type)
            for i in range(self._num_vars))
        symbols_read = set.union(*[s.atoms(sp.Symbol) for s in self.args])
        super().__init__("",
                         symbols_read=symbols_read,
                         symbols_defined=self.result_symbols)

        self.headers = [f'"{self._name.split("_")[0]}_rand.h"']

        RNGBase.id += 1
Exemplo n.º 5
0
def test_loop_over_coordinate():
    assignments = [Assignment(dst[0, 0](0), s[0]), Assignment(x, dst[0, 0](2))]

    body = Block(assignments)
    loop = LoopOverCoordinate(body,
                              coordinate_to_loop_over=0,
                              start=0,
                              stop=10,
                              step=1)

    assert loop.body == body

    new_body = Block([assignments[0]])
    loop = loop.new_loop_with_different_body(new_body)
    assert loop.body == new_body

    assert loop.start == 0
    assert loop.stop == 10
    assert loop.step == 1

    loop.replace(loop.start, 2)
    loop.replace(loop.stop, 20)
    loop.replace(loop.step, 2)

    assert loop.start == 2
    assert loop.stop == 20
    assert loop.step == 2
Exemplo n.º 6
0
 def undefined_symbols(self):
     result = {
         a
         for a in (self._time_step, *self._offsets, *self.keys)
         if isinstance(a, sp.Symbol)
     }
     loop_counters = [
         LoopOverCoordinate.get_loop_counter_symbol(i)
         for i in range(self._dim)
     ]
     result.update(loop_counters)
     return result
Exemplo n.º 7
0
def test_staggered_iteration_manual():
    dim = 2
    f_arr = np.arange(5**dim).reshape([5] * dim)
    s_arr = np.ones([5] * dim + [dim]) * 1234
    s_arr_ref = s_arr.copy()

    f = Field.create_from_numpy_array('f', f_arr)
    s = Field.create_from_numpy_array('s', s_arr, index_dimensions=1)

    eqs = []

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]
    conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)]

    for d in range(dim):
        eq = SympyAssignment(
            s(d),
            sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
            sum(f[o] for o in offsets_in_plane(d, -1, dim)))
        cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
        eqs.append(Conditional(cond, eq))

    kernel_ast = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0), (1, 0)])

    func = make_python_function(kernel_ast)
    func(f=f_arr, s=s_arr_ref)

    inner_loop = [
        n for n in kernel_ast.atoms(ast.LoopOverCoordinate)
        if n.is_innermost_loop
    ][0]
    cut_loop(inner_loop, [4])
    outer_loop = [
        n for n in kernel_ast.atoms(ast.LoopOverCoordinate)
        if n.is_outermost_loop
    ][0]
    cut_loop(outer_loop, [4])

    simplify_conditionals(kernel_ast.body, loop_counter_simplification=True)
    cleanup_blocks(kernel_ast.body)
    move_constants_before_loop(kernel_ast.body)
    cleanup_blocks(kernel_ast.body)

    assert not kernel_ast.atoms(
        Conditional), "Loop cutting optimization did not work"

    func_optimized = make_python_function(kernel_ast)
    func_optimized(f=f_arr, s=s_arr)
    np.testing.assert_almost_equal(s_arr_ref, s_arr)
Exemplo n.º 8
0
def _get_rng_code(template, dialect, vector_instruction_set, time_step,
                  offsets, keys, dim, result_symbols):
    parameters = [time_step] + [
        LoopOverCoordinate.get_loop_counter_symbol(i) + offsets[i]
        for i in range(dim)
    ] + [0] * (3 - dim) + list(keys)

    if dialect == 'cuda' or (dialect == 'c'
                             and vector_instruction_set is None):
        return template.format(parameters=', '.join(
            str(p) for p in parameters),
                               result_symbols=result_symbols)
    else:
        raise NotImplementedError("Not yet implemented for this backend")
Exemplo n.º 9
0
def test_staggered_combined():
    from pystencils.fd import diff
    f = ps.fields("f : double[2D]")
    x, y = [LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(2)]
    dx = sp.symbols("dx")

    expr = diff(x * diff(f, 0) + y * diff(f, 1), 0)

    right = (x + sp.Rational(1, 2)) * (f[1, 0] - f[0, 0]) + y * (
        f[1, 1] - f[1, -1] + f[0, 1] - f[0, -1]) / 4
    left = (x - sp.Rational(1, 2)) * (f[0, 0] - f[-1, 0]) + y * (
        f[-1, 1] - f[-1, -1] + f[0, 1] - f[0, -1]) / 4
    reference = (right - left) / (dx**2)

    to_test = ps.fd.discretize_spatial_staggered(expr, dx)
    assert sp.expand(reference - to_test) == 0
Exemplo n.º 10
0
def test_staggered_iteration():
    dim = 2
    f_arr = np.arange(5**dim).reshape([5] * dim).astype(np.float64)
    s_arr = np.ones([5] * dim + [dim]) * 1234
    s_arr_ref = s_arr.copy()

    fields_fixed = (Field.create_from_numpy_array('f', f_arr),
                    Field.create_from_numpy_array('s',
                                                  s_arr,
                                                  index_dimensions=1))
    fields_var = (Field.create_generic('f', 2),
                  Field.create_generic('s', 2, index_dimensions=1))

    for f, s in [fields_var, fields_fixed]:
        # --- Manual
        eqs = []
        counters = [
            LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
        ]
        conditions = [counters[i] < f.shape[i] - 1 for i in range(dim)]
        for d in range(dim):
            eq = SympyAssignment(
                s(d),
                sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
                sum(f[o] for o in offsets_in_plane(d, -1, dim)))
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            eqs.append(Conditional(cond, eq))
        func = create_kernel(eqs, ghost_layers=[(1, 0), (1, 0),
                                                (1, 0)]).compile()

        # --- Built-in optimized
        expressions = []
        for d in range(dim):
            expressions.append(
                sum(f[o] for o in offsets_in_plane(d, 0, dim)) -
                sum(f[o] for o in offsets_in_plane(d, -1, dim)))
        func_optimized = create_staggered_kernel(s, expressions).compile()
        assert not func_optimized.ast.atoms(
            Conditional), "Loop cutting optimization did not work"

        func(f=f_arr, s=s_arr_ref)
        func_optimized(f=f_arr, s=s_arr)
        np.testing.assert_almost_equal(s_arr_ref, s_arr)
Exemplo n.º 11
0
 def staggered_visitor(e, coordinate, sign):
     if isinstance(e, Diff):
         arg, *indices = diff_args(e)
         if len(indices) != 1:
             raise ValueError("Function supports only up to second derivatives")
         if not isinstance(arg, Field.Access):
             raise ValueError("Argument of inner derivative has to be field access")
         target = indices[0]
         if target == coordinate:
             assert sign in (-1, 1)
             return (arg.neighbor(coordinate, sign) - arg) / dx * sign
         else:
             return (stencil(indices, dx, arg.neighbor(coordinate, sign))
                     + stencil(indices, dx, arg)) / 2
     elif isinstance(e, Field.Access):
         return (e.neighbor(coordinate, sign) + e) / 2
     elif isinstance(e, sp.Symbol):
         loop_idx = LoopOverCoordinate.is_loop_counter_symbol(e)
         return e + sign / 2 if loop_idx == coordinate else e
     else:
         new_args = [staggered_visitor(a, coordinate, sign) for a in e.args]
         return e.func(*new_args) if new_args else e
Exemplo n.º 12
0
    def _print_SympyAssignment(self, node):
        if node.is_declaration:
            if node.use_auto:
                data_type = 'auto '
            else:
                if node.is_const:
                    prefix = 'const '
                else:
                    prefix = ''
                data_type = prefix + self._print(node.lhs.dtype).replace(' const', '') + " "

            return "%s%s = %s;" % (data_type,
                                   self.sympy_printer.doprint(node.lhs),
                                   self.sympy_printer.doprint(node.rhs))
        else:
            lhs_type = get_type_of_expression(node.lhs)
            printed_mask = ""
            if type(lhs_type) is VectorType and isinstance(node.lhs, cast_func):
                arg, data_type, aligned, nontemporal, mask, stride = node.lhs.args
                instr = 'storeU'
                if aligned:
                    instr = 'stream' if nontemporal and 'stream' in self._vector_instruction_set else 'storeA'
                if mask != True:  # NOQA
                    instr = 'maskStoreA' if aligned else 'maskStoreU'
                    if instr not in self._vector_instruction_set:
                        self._vector_instruction_set[instr] = self._vector_instruction_set['store' + instr[-1]].format(
                            '{0}', self._vector_instruction_set['blendv'].format(
                                self._vector_instruction_set['load' + instr[-1]].format('{0}', **self._kwargs),
                                '{1}', '{2}', **self._kwargs), **self._kwargs)
                    printed_mask = self.sympy_printer.doprint(mask)
                    if data_type.base_type.base_name == 'double':
                        if self._vector_instruction_set['double'] == '__m256d':
                            printed_mask = f"_mm256_castpd_si256({printed_mask})"
                        elif self._vector_instruction_set['double'] == '__m128d':
                            printed_mask = f"_mm_castpd_si128({printed_mask})"
                    elif data_type.base_type.base_name == 'float':
                        if self._vector_instruction_set['float'] == '__m256':
                            printed_mask = f"_mm256_castps_si256({printed_mask})"
                        elif self._vector_instruction_set['float'] == '__m128':
                            printed_mask = f"_mm_castps_si128({printed_mask})"

                rhs_type = get_type_of_expression(node.rhs)
                if type(rhs_type) is not VectorType:
                    rhs = cast_func(node.rhs, VectorType(rhs_type))
                else:
                    rhs = node.rhs

                ptr = "&" + self.sympy_printer.doprint(node.lhs.args[0])

                if stride != 1:
                    instr = 'maskStoreS' if mask != True else 'storeS'  # NOQA
                    return self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
                                                                      stride, printed_mask, **self._kwargs) + ';'

                pre_code = ''
                if nontemporal and 'cachelineZero' in self._vector_instruction_set:
                    first_cond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == 0"
                    offset = sp.Add(*[sp.Symbol(LoopOverCoordinate.get_loop_counter_name(i))
                                      * node.lhs.args[0].field.spatial_strides[i] for i in
                                      range(len(node.lhs.args[0].field.spatial_strides))])
                    if stride == 1:
                        offset = offset.subs({node.lhs.args[0].field.spatial_strides[0]: 1})
                    size = sp.Mul(*node.lhs.args[0].field.spatial_shape)
                    element_size = 8 if data_type.base_type.base_name == 'double' else 4
                    size_cond = f"({offset} + {CachelineSize.symbol/element_size}) < {size}"
                    pre_code = f"if ({first_cond} && {size_cond}) " + "{\n\t" + \
                        self._vector_instruction_set['cachelineZero'].format(ptr, **self._kwargs) + ';\n}\n'

                code = self._vector_instruction_set[instr].format(ptr, self.sympy_printer.doprint(rhs),
                                                                  printed_mask, **self._kwargs) + ';'
                flushcond = f"((uintptr_t) {ptr} & {CachelineSize.mask_symbol}) == {CachelineSize.last_symbol}"
                if nontemporal and 'flushCacheline' in self._vector_instruction_set:
                    code2 = self._vector_instruction_set['flushCacheline'].format(
                        ptr, self.sympy_printer.doprint(rhs), **self._kwargs) + ';'
                    code = f"{code}\nif ({flushcond}) {{\n\t{code2}\n}}"
                elif nontemporal and 'storeAAndFlushCacheline' in self._vector_instruction_set:
                    tmpvar = '_tmp_' + hashlib.sha1(self.sympy_printer.doprint(rhs).encode('ascii')).hexdigest()[:8]
                    code = 'const ' + self._print(node.lhs.dtype).replace(' const', '') + ' ' + tmpvar + ' = ' \
                        + self.sympy_printer.doprint(rhs) + ';'
                    code1 = self._vector_instruction_set[instr].format(ptr, tmpvar, printed_mask, **self._kwargs) + ';'
                    code2 = self._vector_instruction_set['storeAAndFlushCacheline'].format(ptr, tmpvar, printed_mask,
                                                                                           **self._kwargs) + ';'
                    code += f"\nif ({flushcond}) {{\n\t{code2}\n}} else {{\n\t{code1}\n}}"
                return pre_code + code
            else:
                return f"{self.sympy_printer.doprint(node.lhs)} = {self.sympy_printer.doprint(node.rhs)};"
Exemplo n.º 13
0
def create_staggered_kernel(staggered_field,
                            expressions,
                            subexpressions=(),
                            target='cpu',
                            gpu_exclusive_conditions=False,
                            **kwargs):
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    Args:
        staggered_field: field where the first index coordinate defines the location of the staggered value
                can have 1 or 2 index coordinates, in case of two index coordinates at every staggered location
                a vector is stored, expressions parameter has to be a sequence of sequences then
                where e.g. ``f[0,0](0)`` is interpreted as value at the left cell boundary, ``f[1,0](0)`` the right cell
                boundary and ``f[0,0](1)`` the southern cell boundary etc.
        expressions: sequence of expressions of length dim, defining how the west, southern, (bottom) cell boundary
                     should be updated.
        subexpressions: optional sequence of Assignments, that define subexpressions used in the main expressions
        target: 'cpu' or 'gpu'
        gpu_exclusive_conditions: if/else construct to have only one code block for each of 2**dim code paths
        kwargs: passed directly to create_kernel, iteration slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
    """
    assert 'iteration_slice' not in kwargs and 'ghost_layers' not in kwargs
    assert staggered_field.index_dimensions in (
        1, 2), 'Staggered field must have one or two index dimensions'
    dim = staggered_field.spatial_dimensions

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]
    conditions = [
        counters[i] < staggered_field.shape[i] - 1 for i in range(dim)
    ]
    assert len(expressions) == dim
    if staggered_field.index_dimensions == 2:
        assert all(len(sublist) == len(expressions[0]) for sublist in expressions), \
            "If staggered field has two index dimensions expressions has to be a sequence of sequences of all the " \
            "same length."

    final_assignments = []
    last_conditional = None

    def add(condition, dimensions, as_else_block=False):
        nonlocal last_conditional
        if staggered_field.index_dimensions == 1:
            assignments = [
                Assignment(staggered_field(d), expressions[d])
                for d in dimensions
            ]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered(
                [staggered_field(d) for d in dimensions])
        elif staggered_field.index_dimensions == 2:
            assert staggered_field.has_fixed_index_shape
            assignments = [
                Assignment(staggered_field(d, i), expr) for d in dimensions
                for i, expr in enumerate(expressions[d])
            ]
            a_coll = AssignmentCollection(assignments, list(subexpressions))
            a_coll = a_coll.new_filtered([
                staggered_field(d, i)
                for i in range(staggered_field.index_shape[1])
                for d in dimensions
            ])
        sp_assignments = [
            SympyAssignment(a.lhs, a.rhs) for a in a_coll.all_assignments
        ]
        if as_else_block and last_conditional:
            new_cond = Conditional(condition, Block(sp_assignments))
            last_conditional.false_block = Block([new_cond])
            last_conditional = new_cond
        else:
            last_conditional = Conditional(condition, Block(sp_assignments))
            final_assignments.append(last_conditional)

    if target == 'cpu' or not gpu_exclusive_conditions:
        for d in range(dim):
            cond = sp.And(*[conditions[i] for i in range(dim) if d != i])
            add(cond, [d])
    elif target == 'gpu':
        full_conditions = [
            sp.And(*[conditions[i] for i in range(dim) if d != i])
            for d in range(dim)
        ]
        for include in itertools.product(*[[1, 0]] * dim):
            case_conditions = sp.And(*[
                c if value else sp.Not(c)
                for c, value in zip(full_conditions, include)
            ])
            dimensions_to_include = [i for i in range(dim) if include[i]]
            if dimensions_to_include:
                add(case_conditions, dimensions_to_include, True)

    ghost_layers = [(1, 0)] * dim

    blocking = kwargs.get('cpu_blocking', None)
    if blocking:
        del kwargs['cpu_blocking']

    cpu_vectorize_info = kwargs.get('cpu_vectorize_info', None)
    if cpu_vectorize_info:
        del kwargs['cpu_vectorize_info']
    openmp = kwargs.get('cpu_openmp', None)
    if openmp:
        del kwargs['cpu_openmp']

    ast = create_kernel(final_assignments,
                        ghost_layers=ghost_layers,
                        target=target,
                        **kwargs)

    if target == 'cpu':
        remove_conditionals_in_staggered_kernel(ast)
        move_constants_before_loop(ast)
        omp_collapse = None
        if blocking:
            omp_collapse = loop_blocking(ast, blocking)
        if openmp:
            from pystencils.cpu import add_openmp
            add_openmp(ast,
                       num_threads=openmp,
                       collapse=omp_collapse,
                       assume_single_outer_loop=False)
        if cpu_vectorize_info is True:
            vectorize(ast)
        elif isinstance(cpu_vectorize_info, dict):
            vectorize(ast, **cpu_vectorize_info)
    return ast
Exemplo n.º 14
0
def create_indexed_kernel(
    assignments: AssignmentOrAstNodeList,
    index_fields,
    function_name="kernel",
    type_info=None,
    coordinate_names=('x', 'y', 'z')) -> KernelFunction:
    """
    Similar to :func:`create_kernel`, but here not all cells of a field are updated but only cells with
    coordinates which are stored in an index field. This traversal method can e.g. be used for boundary handling.

    The coordinates are stored in a separate index_field, which is a one dimensional array with struct data type.
    This struct has to contain fields named 'x', 'y' and for 3D fields ('z'). These names are configurable with the
    'coordinate_names' parameter. The struct can have also other fields that can be read and written in the kernel, for
    example boundary parameters.

    Args:
        assignments: list of assignments
        index_fields: list of index fields, i.e. 1D fields with struct data type
        type_info: see documentation of :func:`create_kernel`
        function_name: see documentation of :func:`create_kernel`
        coordinate_names: name of the coordinate fields in the struct data type
    """
    fields_read, fields_written, assignments = add_types(
        assignments, type_info, check_independence_condition=False)
    all_fields = fields_read.union(fields_written)

    for index_field in index_fields:
        index_field.field_type = FieldType.INDEXED
        assert FieldType.is_indexed(index_field)
        assert index_field.spatial_dimensions == 1, "Index fields have to be 1D"

    non_index_fields = [f for f in all_fields if f not in index_fields]
    spatial_coordinates = {f.spatial_dimensions for f in non_index_fields}
    assert len(
        spatial_coordinates
    ) == 1, "Non-index fields do not have the same number of spatial coordinates"
    spatial_coordinates = list(spatial_coordinates)[0]

    def get_coordinate_symbol_assignment(name):
        for idx_field in index_fields:
            assert isinstance(
                idx_field.dtype,
                StructType), "Index fields have to have a struct data type"
            data_type = idx_field.dtype
            if data_type.has_element(name):
                rhs = idx_field[0](name)
                lhs = TypedSymbol(name,
                                  BasicType(data_type.get_element_type(name)))
                return SympyAssignment(lhs, rhs)
        raise ValueError(
            "Index %s not found in any of the passed index fields" % (name, ))

    coordinate_symbol_assignments = [
        get_coordinate_symbol_assignment(n)
        for n in coordinate_names[:spatial_coordinates]
    ]
    coordinate_typed_symbols = [eq.lhs for eq in coordinate_symbol_assignments]
    assignments = coordinate_symbol_assignments + assignments

    # make 1D loop over index fields
    loop_body = Block([])
    loop_node = LoopOverCoordinate(loop_body,
                                   coordinate_to_loop_over=0,
                                   start=0,
                                   stop=index_fields[0].shape[0])

    for assignment in assignments:
        loop_body.append(assignment)

    function_body = Block([loop_node])
    ast_node = KernelFunction(function_body,
                              "cpu",
                              "c",
                              make_python_function,
                              ghost_layers=None,
                              function_name=function_name)

    fixed_coordinate_mapping = {
        f.name: coordinate_typed_symbols
        for f in non_index_fields
    }

    read_only_fields = set([f.name for f in fields_read - fields_written])
    resolve_field_accesses(ast_node,
                           read_only_fields,
                           field_to_fixed_coordinates=fixed_coordinate_mapping)
    move_constants_before_loop(ast_node)
    return ast_node
Exemplo n.º 15
0
def create_staggered_kernel(assignments,
                            target: Target = Target.CPU,
                            gpu_exclusive_conditions=False,
                            **kwargs):
    """Kernel that updates a staggered field.

    .. image:: /img/staggered_grid.svg

    For a staggered field, the first index coordinate defines the location of the staggered value.
    Further index coordinates can be used to store vectors/tensors at each point.

    Args:
        assignments: a sequence of assignments or an AssignmentCollection.
                     Assignments to staggered field are processed specially, while subexpressions and assignments to
                     regular fields are passed through to `create_kernel`. Multiple different staggered fields can be
                     used, but they all need to use the same stencil (i.e. the same number of staggered points) and
                     shape.
        target: 'CPU' or 'GPU'
        gpu_exclusive_conditions: disable the use of multiple conditionals inside the loop. The outer layers are then
                                  handled in an else branch.
        kwargs: passed directly to create_kernel, iteration_slice and ghost_layers parameters are not allowed

    Returns:
        AST, see `create_kernel`
    """
    if 'ghost_layers' in kwargs:
        assert kwargs['ghost_layers'] is None
        del kwargs['ghost_layers']
    if 'iteration_slice' in kwargs:
        assert kwargs['iteration_slice'] is None
        del kwargs['iteration_slice']
    if 'omp_single_loop' in kwargs:
        assert kwargs['omp_single_loop'] is False
        del kwargs['omp_single_loop']

    if isinstance(assignments, AssignmentCollection):
        subexpressions = assignments.subexpressions + [
            a for a in assignments.main_assignments
            if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access
            or not FieldType.is_staggered(a.lhs.field)
        ]
        assignments = [
            a for a in assignments.main_assignments
            if hasattr(a, 'lhs') and type(a.lhs) is Field.Access
            and FieldType.is_staggered(a.lhs.field)
        ]
    else:
        subexpressions = [
            a for a in assignments
            if not hasattr(a, 'lhs') or type(a.lhs) is not Field.Access
            or not FieldType.is_staggered(a.lhs.field)
        ]
        assignments = [
            a for a in assignments
            if hasattr(a, 'lhs') and type(a.lhs) is Field.Access
            and FieldType.is_staggered(a.lhs.field)
        ]
    if len(set([tuple(a.lhs.field.staggered_stencil)
                for a in assignments])) != 1:
        raise ValueError(
            "All assignments need to be made to staggered fields with the same stencil"
        )
    if len(set([a.lhs.field.shape for a in assignments])) != 1:
        raise ValueError(
            "All assignments need to be made to staggered fields with the same shape"
        )

    staggered_field = assignments[0].lhs.field
    stencil = staggered_field.staggered_stencil
    dim = staggered_field.spatial_dimensions
    shape = staggered_field.shape

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(dim)
    ]

    final_assignments = []

    # find out whether any of the ghost layers is not needed
    common_exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
    for direction in stencil:
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])
        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        common_exclusions.intersection_update(exclusions)
    ghost_layers = [[0, 0] for d in range(dim)]
    for direction in common_exclusions:
        direction = direction_string_to_offset(direction)
        for d, s in enumerate(direction):
            if s == 1:
                ghost_layers[d][1] = 1
            elif s == -1:
                ghost_layers[d][0] = 1

    def condition(direction):
        """exclude those staggered points that correspond to fluxes between ghost cells"""
        exclusions = set(["E", "W", "N", "S", "T", "B"][:2 * dim])

        for elementary_direction in direction:
            exclusions.remove(inverse_direction_string(elementary_direction))
        conditions = []
        for e in exclusions:
            if e in common_exclusions:
                continue
            offset = direction_string_to_offset(e)
            for i, o in enumerate(offset):
                if o == 1:
                    conditions.append(counters[i] < shape[i] - 1)
                elif o == -1:
                    conditions.append(counters[i] > 0)
        return sp.And(*conditions)

    if gpu_exclusive_conditions:
        outer_assignment = None
        conditions = {direction: condition(direction) for direction in stencil}
        for num_conditions in range(len(stencil)):
            for combination in itertools.combinations(conditions.values(),
                                                      num_conditions):
                for assignment in assignments:
                    direction = stencil[assignment.lhs.index[0]]
                    if conditions[direction] in combination:
                        assignment = SympyAssignment(assignment.lhs,
                                                     assignment.rhs)
                        outer_assignment = Conditional(sp.And(*combination),
                                                       Block([assignment]),
                                                       outer_assignment)

        inner_assignment = []
        for assignment in assignments:
            inner_assignment.append(
                SympyAssignment(assignment.lhs, assignment.rhs))
        last_conditional = Conditional(
            sp.And(*[condition(d) for d in stencil]), Block(inner_assignment),
            outer_assignment)
        final_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                            [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                            [last_conditional]

        if target == Target.CPU:
            from pystencils.cpu import create_kernel as create_kernel_cpu
            ast = create_kernel_cpu(final_assignments,
                                    ghost_layers=ghost_layers,
                                    omp_single_loop=False,
                                    **kwargs)
        else:
            ast = create_kernel(final_assignments,
                                ghost_layers=ghost_layers,
                                target=target,
                                **kwargs)
        return ast

    for assignment in assignments:
        direction = stencil[assignment.lhs.index[0]]
        sp_assignments = [s for s in subexpressions if not hasattr(s, 'lhs')] + \
                         [SympyAssignment(s.lhs, s.rhs) for s in subexpressions if hasattr(s, 'lhs')] + \
                         [SympyAssignment(assignment.lhs, assignment.rhs)]
        last_conditional = Conditional(condition(direction),
                                       Block(sp_assignments))
        final_assignments.append(last_conditional)

    remove_start_conditional = any([gl[0] == 0 for gl in ghost_layers])
    prepend_optimizations = [
        lambda ast: remove_conditionals_in_staggered_kernel(
            ast, remove_start_conditional), move_constants_before_loop
    ]
    if 'cpu_prepend_optimizations' in kwargs:
        prepend_optimizations += kwargs['cpu_prepend_optimizations']
        del kwargs['cpu_prepend_optimizations']
    ast = create_kernel(final_assignments,
                        ghost_layers=ghost_layers,
                        target=target,
                        omp_single_loop=False,
                        cpu_prepend_optimizations=prepend_optimizations,
                        **kwargs)
    return ast
Exemplo n.º 16
0
def test_lees_edwards():

    domain_size = (64, 64)
    omega = 1.0  # relaxation rate of first component
    shear_velocity = 0.1  # shear velocity
    shear_dir = 0  # direction of shear flow
    shear_dir_normal = 1  # direction normal to shear plane, for interpolation

    stencil = LBStencil(Stencil.D2Q9)

    dh = ps.create_data_handling(domain_size,
                                 periodicity=True,
                                 default_target=ps.Target.CPU)

    src = dh.add_array('src', values_per_cell=stencil.Q)
    dh.fill('src', 1.0, ghost_layers=True)

    dst = dh.add_array_like('dst', 'src')
    dh.fill('dst', 0.0, ghost_layers=True)

    force = dh.add_array('force', values_per_cell=stencil.D)
    dh.fill('force', 0.0, ghost_layers=True)

    rho = dh.add_array('rho', values_per_cell=1)
    dh.fill('rho', 1.0, ghost_layers=True)
    u = dh.add_array('u', values_per_cell=stencil.D)
    dh.fill('u', 0.0, ghost_layers=True)

    counters = [
        LoopOverCoordinate.get_loop_counter_symbol(i) for i in range(stencil.D)
    ]
    points_up = sp.Symbol('points_up')
    points_down = sp.Symbol('points_down')

    u_p = sp.Piecewise(
        (1,
         sp.And(ps.data_types.type_all_numbers(counters[1] <= 1, 'int'),
                points_down)),
        (-1,
         sp.And(
             ps.data_types.type_all_numbers(counters[1] >= src.shape[1] - 2,
                                            'int'), points_up)),
        (0, True)) * shear_velocity

    lbm_config = LBMConfig(stencil=stencil,
                           relaxation_rate=omega,
                           compressible=True,
                           velocity_input=u.center_vector +
                           sp.Matrix([u_p, 0]),
                           density_input=rho,
                           force_model=ForceModel.LUO,
                           force=force.center_vector,
                           kernel_type='collide_only')
    lbm_opt = LBMOptimisation(symbolic_field=src)
    collision = create_lb_update_rule(lbm_config=lbm_config,
                                      lbm_optimisation=lbm_opt)

    to_insert = [
        s.lhs for s in collision.subexpressions
        if collision.method.first_order_equilibrium_moment_symbols[shear_dir]
        in s.free_symbols
    ]
    for s in to_insert:
        collision = collision.new_with_inserted_subexpression(s)
    ma = []
    for a, c in zip(collision.main_assignments, collision.method.stencil):
        if c[shear_dir_normal] == -1:
            b = (True, False)
        elif c[shear_dir_normal] == 1:
            b = (False, True)
        else:
            b = (False, False)
        a = ps.Assignment(a.lhs, a.rhs.replace(points_down, b[0]))
        a = ps.Assignment(a.lhs, a.rhs.replace(points_up, b[1]))
        ma.append(a)
    collision.main_assignments = ma

    stream = create_stream_pull_with_output_kernel(collision.method, src, dst,
                                                   {
                                                       'density': rho,
                                                       'velocity': u
                                                   })

    config = ps.CreateKernelConfig(target=dh.default_target)
    stream_kernel = ps.create_kernel(stream, config=config).compile()
    collision_kernel = ps.create_kernel(collision, config=config).compile()

    init = macroscopic_values_setter(collision.method,
                                     velocity=(0, 0),
                                     pdfs=src.center_vector,
                                     density=rho.center)
    init_kernel = ps.create_kernel(init, ghost_layers=0).compile()

    offset = [0.0]

    sync_pdfs = dh.synchronization_function([src.name],
                                            functor=partial(
                                                get_le_boundary_functor,
                                                shear_offset=offset))

    dh.run_kernel(init_kernel)

    time = 500

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
        offset[0] += shear_velocity
    dh.all_to_cpu()

    nu = lattice_viscosity_from_relaxation_rate(omega)
    h = domain_size[0]
    k_max = 100

    analytical_solution = get_solution_navier_stokes(
        np.linspace(0.5, h - 0.5, h), time, nu, shear_velocity, h, k_max)
    np.testing.assert_array_almost_equal(analytical_solution,
                                         dh.gather_array(u.name)[0, :, 0],
                                         decimal=5)

    dh.fill(rho.name, 1.0, ghost_layers=True)
    dh.run_kernel(init_kernel)
    dh.fill(u.name, 0.0, ghost_layers=True)
    dh.fill('force', 0.0, ghost_layers=True)
    dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1]

    offset[0] = 0

    time = 20

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
    dh.all_to_cpu()

    vel_unshifted = np.array(dh.gather_array(u.name)[:, -3:-1, :])

    dh.fill(rho.name, 1.0, ghost_layers=True)
    dh.run_kernel(init_kernel)
    dh.fill(u.name, 0.0, ghost_layers=True)
    dh.fill('force', 0.0, ghost_layers=True)
    dh.cpu_arrays[force.name][64 // 3, 1, :] = [1e-2, -1e-1]

    offset[0] = 10

    time = 20

    dh.all_to_gpu()
    for i in range(time):
        dh.run_kernel(collision_kernel)

        sync_pdfs()
        dh.run_kernel(stream_kernel)

        dh.swap(src.name, dst.name)
    dh.all_to_cpu()

    vel_shifted = np.array(dh.gather_array(u.name)[:, -3:-1, :])

    vel_rolled = np.roll(vel_shifted, -offset[0], axis=0)

    np.testing.assert_array_almost_equal(vel_unshifted, vel_rolled)
Exemplo n.º 17
0
    def __init__(self, ast: KernelFunction, machine: Optional[MachineModel] = None,
                 assumed_layout='SoA', debug_print=False, filename=None):
        """Create a kerncraft kernel using a pystencils AST

        Args:
            ast: pystencils ast
            machine: kerncraft machine model - specify this if kernel needs to be compiled
            assumed_layout: either 'SoA' or 'AoS' - if fields have symbolic sizes the layout of the index
                    coordinates is not known. In this case either a structures of array (SoA) or
                    array of structures (AoS) layout is assumed
        """
        kerncraft.kernel.Kernel.__init__(self, machine)

        # Initialize state
        self.asm_block = None
        self._filename = filename

        self.kernel_ast = ast
        self.temporary_dir = TemporaryDirectory()
        self._keep_intermediates = debug_print

        # Loops
        inner_loops = [l for l in filtered_tree_iteration(ast, LoopOverCoordinate, stop_type=SympyAssignment)
                       if l.is_innermost_loop]
        if len(inner_loops) == 0:
            raise ValueError("No loop found in pystencils AST")
        else:
            if len(inner_loops) > 1:
                warnings.warn("pystencils AST contains multiple inner loops. "
                              "Only one can be analyzed - choosing first one")
            inner_loop = inner_loops[0]

        self._loop_stack = []
        cur_node = inner_loop
        while cur_node is not None:
            if isinstance(cur_node, LoopOverCoordinate):
                loop_counter_sym = cur_node.loop_counter_symbol
                loop_info = (loop_counter_sym.name, cur_node.start, cur_node.stop, 1)
                # If the correct step were to be provided, all access within that step length will
                # also need to be passed to kerncraft: cur_node.step)
                self._loop_stack.append(loop_info)
            cur_node = cur_node.parent
        self._loop_stack = list(reversed(self._loop_stack))

        # Data sources & destinations
        self.sources = defaultdict(list)
        self.destinations = defaultdict(list)

        def get_layout_tuple(f):
            if f.has_fixed_shape:
                return get_layout_from_strides(f.strides)
            else:
                layout_list = list(f.layout)
                for _ in range(f.index_dimensions):
                    layout_list.insert(0 if assumed_layout == 'SoA' else -1, max(layout_list) + 1)
                return layout_list

        reads, writes = search_resolved_field_accesses_in_ast(inner_loop)
        for accesses, target_dict in [(reads, self.sources), (writes, self.destinations)]:
            for fa in accesses:
                coord = [sp.Symbol(LoopOverCoordinate.get_loop_counter_name(i), positive=True, integer=True) + off
                         for i, off in enumerate(fa.offsets)]
                coord += list(fa.idx_coordinate_values)
                layout = get_layout_tuple(fa.field)
                permuted_coord = [sp.sympify(coord[i]) for i in layout]
                target_dict[fa.field.name].append(permuted_coord)

        # Variables (arrays)
        fields_accessed = ast.fields_accessed
        for field in fields_accessed:
            layout = get_layout_tuple(field)
            permuted_shape = list(field.shape[i] for i in layout)
            self.set_variable(field.name, str(field.dtype), tuple(permuted_shape))

        # Scalars may be safely ignored
        # for param in ast.get_parameters():
        #     if not param.is_field_parameter:
        #         # self.set_variable(param.symbol.name, str(param.symbol.dtype), None)
        #         self.sources[param.symbol.name] = [None]

        # data type
        self.datatype = list(self.variables.values())[0][0]

        # flops
        operation_count = count_operations_in_ast(inner_loop)
        self._flops = {
            '+': operation_count['adds'],
            '*': operation_count['muls'],
            '/': operation_count['divs'],
        }
        for k in [k for k, v in self._flops.items() if v == 0]:
            del self._flops[k]
        self.check()

        if debug_print:
            from pprint import pprint
            print("-----------------------------  Loop Stack --------------------------")
            pprint(self._loop_stack)
            print("-----------------------------  Sources -----------------------------")
            pprint(self.sources)
            print("-----------------------------  Destinations ------------------------")
            pprint(self.destinations)
            print("-----------------------------  FLOPS -------------------------------")
            pprint(self._flops)
Exemplo n.º 18
0
def create_cuda_kernel(assignments,
                       function_name="kernel",
                       type_info=None,
                       indexing_creator=BlockIndexing,
                       iteration_slice=None,
                       ghost_layers=None,
                       skip_independence_check=False):
    assert assignments, "Assignments must not be empty!"
    fields_read, fields_written, assignments = add_types(assignments, type_info, not skip_independence_check)
    all_fields = fields_read.union(fields_written)
    read_only_fields = set([f.name for f in fields_read - fields_written])

    buffers = set([f for f in all_fields if FieldType.is_buffer(f) or FieldType.is_custom(f)])
    fields_without_buffers = all_fields - buffers

    field_accesses = set()
    num_buffer_accesses = 0
    for eq in assignments:
        field_accesses.update(eq.atoms(Field.Access))
        field_accesses = {e for e in field_accesses if not e.is_absolute_access}
        num_buffer_accesses += sum(1 for access in eq.atoms(Field.Access) if FieldType.is_buffer(access.field))

    common_shape = get_common_shape(fields_without_buffers)

    if iteration_slice is None:
        # determine iteration slice from ghost layers
        if ghost_layers is None:
            # determine required number of ghost layers from field access
            required_ghost_layers = max([fa.required_ghost_layers for fa in field_accesses])
            ghost_layers = [(required_ghost_layers, required_ghost_layers)] * len(common_shape)
        iteration_slice = []
        if isinstance(ghost_layers, int):
            for i in range(len(common_shape)):
                iteration_slice.append(slice(ghost_layers, -ghost_layers if ghost_layers > 0 else None))
            ghost_layers = [(ghost_layers, ghost_layers)] * len(common_shape)
        else:
            for i in range(len(common_shape)):
                iteration_slice.append(slice(ghost_layers[i][0],
                                             -ghost_layers[i][1] if ghost_layers[i][1] > 0 else None))

    indexing = indexing_creator(field=list(fields_without_buffers)[0], iteration_slice=iteration_slice)
    coord_mapping = indexing.coordinates

    cell_idx_assignments = [SympyAssignment(LoopOverCoordinate.get_loop_counter_symbol(i), value)
                            for i, value in enumerate(coord_mapping)]
    cell_idx_symbols = [LoopOverCoordinate.get_loop_counter_symbol(i) for i, _ in enumerate(coord_mapping)]
    assignments = cell_idx_assignments + assignments

    block = Block(assignments)

    block = indexing.guard(block, common_shape)
    unify_shape_symbols(block, common_shape=common_shape, fields=fields_without_buffers)

    ast = KernelFunction(block,
                         Target.GPU,
                         Backend.CUDA,
                         make_python_function,
                         ghost_layers,
                         function_name,
                         assignments=assignments)
    ast.global_variables.update(indexing.index_variables)

    base_pointer_spec = [['spatialInner0']]
    base_pointer_info = {f.name: parse_base_pointer_info(base_pointer_spec, [2, 1, 0],
                                                         f.spatial_dimensions, f.index_dimensions)
                         for f in all_fields}

    coord_mapping = {f.name: cell_idx_symbols for f in all_fields}

    loop_strides = list(fields_without_buffers)[0].shape

    if any(FieldType.is_buffer(f) for f in all_fields):
        resolve_buffer_accesses(ast, get_base_buffer_index(ast, indexing.coordinates, loop_strides), read_only_fields)

    resolve_field_accesses(ast, read_only_fields, field_to_base_pointer_info=base_pointer_info,
                           field_to_fixed_coordinates=coord_mapping)

    # add the function which determines #blocks and #threads as additional member to KernelFunction node
    # this is used by the jit

    # If loop counter symbols have been explicitly used in the update equations (e.g. for built in periodicity),
    # they are defined here
    undefined_loop_counters = {LoopOverCoordinate.is_loop_counter_symbol(s): s for s in ast.body.undefined_symbols
                               if LoopOverCoordinate.is_loop_counter_symbol(s) is not None}
    for i, loop_counter in undefined_loop_counters.items():
        ast.body.insert_front(SympyAssignment(loop_counter, indexing.coordinates[i]))

    ast.indexing = indexing
    return ast