Пример #1
0
 def retag(self, tag_value=None):
     """
     Create a new Iteration object which is identical to ``self``, except
     for the tag. If provided, ``tag_value`` is used as new tag; otherwise,
     an internally generated tag is used.
     """
     if self.tag is None:
         return self._rebuild()
     properties = [tagger(tag_value or (ntags() + 1)) if i.name == 'tag' else i
                   for i in self.properties]
     return self._rebuild(properties=properties)
Пример #2
0
def test_create_elemental_functions_simple(simple_function):
    roots = [i[-1] for i in retrieve_iteration_tree(simple_function)]
    retagged = [i._rebuild(properties=tagger(0)) for i in roots]
    mapper = {
        i: j._rebuild(properties=(j.properties + (ELEMENTAL, )))
        for i, j in zip(roots, retagged)
    }
    function = Transformer(mapper).visit(simple_function)
    handle = transform(function, mode='split')
    block = List(body=[handle.nodes] + handle.elemental_functions)
    output = str(block.ccode)
    # Make output compiler independent
    output = [
        i for i in output.split('\n')
        if all([j not in i for j in ('#pragma', '/*')])
    ]
    assert '\n'.join(output) == \
        ("""void foo(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec)
{
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[j_size] __attribute__((aligned(64))) = (float (*)[j_size]) c_vec;
  float (*restrict d)[j_size][k_size] __attribute__((aligned(64))) ="""
         """ (float (*)[j_size][k_size]) d_vec;
  for (int i = 0; i < 3; i += 1)
  {
    for (int j = 0; j < 5; j += 1)
    {
      f_0(0,7,(float*)a,(float*)b,(float*)c,(float*)d,i,i_size,j,j_size,k_size);
    }
  }
}
void f_0(const int k_start, const int k_finish,"""
         """ float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec,"""
         """ const int i, const int i_size, const int j, const int j_size, const int k_size)
{
  float (*restrict a) __attribute__((aligned(64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__((aligned(64))) = (float (*)) b_vec;
  float (*restrict c)[j_size] __attribute__((aligned(64))) = (float (*)[j_size]) c_vec;
  float (*restrict d)[j_size][k_size] __attribute__((aligned(64))) ="""
         """ (float (*)[j_size][k_size]) d_vec;
  for (int k = k_start; k < k_finish; k += 1)
  {
    a[i] = a[i] + b[i] + 5.0F;
    a[i] = -a[i]*c[i][j] + b[i]*d[i][j][k];
  }
}""")
Пример #3
0
    def _loop_blocking(self, nodes, state):
        """
        Apply loop blocking to :class:`Iteration` trees.

        Blocking is applied to parallel iteration trees. Heuristically, innermost
        dimensions are not blocked to maximize the trip count of the SIMD loops.

        Different heuristics may be specified by passing the keywords ``blockshape``
        and ``blockinner`` to the DLE. The former, a dictionary, is used to indicate
        a specific block size for each blocked dimension. For example, for the
        :class:`Iteration` tree: ::

            for i
              for j
                for k
                  ...

        one may provide ``blockshape = {i: 4, j: 7}``, in which case the
        two outer loops will blocked, and the resulting 2-dimensional block will
        have size 4x7. The latter may be set to True to also block innermost parallel
        :class:`Iteration` objects.
        """
        exclude_innermost = not self.params.get('blockinner', False)
        ignore_heuristic = self.params.get('blockalways', False)

        # Make sure loop blocking will span as many Iterations as possible
        fold = fold_blockable_tree(nodes, exclude_innermost)

        mapper = {}
        blocked = OrderedDict()
        for tree in retrieve_iteration_tree(fold):
            # Is the Iteration tree blockable ?
            iterations = [i for i in tree if i.is_Parallel]
            if exclude_innermost:
                iterations = [i for i in iterations if not i.is_Vectorizable]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not IsPerfectIteration().visit(root):
                # Illegal/unsupported
                continue
            if not tree[0].is_Sequential and not ignore_heuristic:
                # Heuristic: avoid polluting the generated code with blocked
                # nests (thus increasing JIT compilation time and affecting
                # readability) if the blockable tree isn't embedded in a
                # sequential loop (e.g., a timestepping loop)
                continue

            # Decorate intra-block iterations with an IterationProperty
            TAG = tagger(len(mapper))

            # Build all necessary Iteration objects, individually. These will
            # subsequently be composed to implement loop blocking.
            inter_blocks = []
            intra_blocks = []
            remainders = []
            for i in iterations:
                name = "%s%d_block" % (i.dim.name, len(mapper))

                # Build Iteration over blocks
                dim = blocked.setdefault(i, Dimension(name=name))
                bsize = dim.symbolic_size
                bstart = i.limits[0]
                binnersize = i.dim.symbolic_extent + (i.offsets[1] -
                                                      i.offsets[0])
                bfinish = i.dim.symbolic_end - (binnersize % bsize) - 1
                inter_block = Iteration([],
                                        dim, [bstart, bfinish, bsize],
                                        offsets=i.offsets,
                                        properties=PARALLEL)
                inter_blocks.append(inter_block)

                # Build Iteration within a block
                limits = (dim, dim + bsize - 1, 1)
                intra_block = i._rebuild([],
                                         limits=limits,
                                         offsets=(0, 0),
                                         properties=i.properties +
                                         (TAG, ELEMENTAL))
                intra_blocks.append(intra_block)

                # Build unitary-increment Iteration over the 'leftover' region.
                # This will be used for remainder loops, executed when any
                # dimension size is not a multiple of the block size.
                remainder = i._rebuild(
                    [],
                    limits=[bfinish + 1, i.dim.symbolic_end, 1],
                    offsets=(i.offsets[1], i.offsets[1]))
                remainders.append(remainder)

            # Build blocked Iteration nest
            blocked_tree = compose_nodes(inter_blocks + intra_blocks +
                                         [iterations[-1].nodes])

            # Build remainder Iterations
            remainder_trees = []
            for n in range(len(iterations)):
                for c in combinations([i.dim for i in iterations], n + 1):
                    # First all inter-block Interations
                    nodes = [
                        b._rebuild(properties=b.properties + (REMAINDER, ))
                        for b, r in zip(inter_blocks, remainders)
                        if r.dim not in c
                    ]
                    # Then intra-block or remainder, for each dim (in order)
                    properties = (REMAINDER, TAG, ELEMENTAL)
                    for b, r in zip(intra_blocks, remainders):
                        handle = r if b.dim in c else b
                        nodes.append(handle._rebuild(properties=properties))
                    nodes.extend([iterations[-1].nodes])
                    remainder_trees.append(compose_nodes(nodes))

            # Will replace with blocked loop tree
            mapper[root] = List(body=[blocked_tree] + remainder_trees)

        rebuilt = Transformer(mapper).visit(fold)

        # Finish unrolling any previously folded Iterations
        processed = unfold_blocked_tree(rebuilt)

        # All blocked dimensions
        if not blocked:
            return processed, {}

        # Determine the block shape
        blockshape = self.params.get('blockshape')
        if not blockshape:
            # Use trivial heuristic for a suitable blockshape
            def heuristic(dim_size):
                ths = 8  # FIXME: This really needs to be improved
                return ths if dim_size > ths else 1

            blockshape = {k: heuristic for k in blocked.keys()}
        else:
            try:
                nitems, nrequired = len(blockshape), len(blocked)
                blockshape = {k: v for k, v in zip(blocked, blockshape)}
                if nitems > nrequired:
                    dle_warning("Provided 'blockshape' has more entries than "
                                "blocked loops; dropping entries ...")
                if nitems < nrequired:
                    dle_warning("Provided 'blockshape' has fewer entries than "
                                "blocked loops; dropping dimensions ...")
            except TypeError:
                blockshape = {list(blocked)[0]: blockshape}
            blockshape.update(
                {k: None
                 for k in blocked.keys() if k not in blockshape})

        # Track any additional arguments required to execute /state.nodes/
        arguments = [
            BlockingArg(v, k, blockshape[k]) for k, v in blocked.items()
        ]

        return processed, {'arguments': arguments, 'flags': 'blocking'}
Пример #4
0
    def _loop_blocking(self, nodes, state):
        """Apply loop blocking to PARALLEL Iteration trees."""
        exclude_innermost = not self.params.get('blockinner', False)
        ignore_heuristic = self.params.get('blockalways', False)

        # Make sure loop blocking will span as many Iterations as possible
        fold = fold_blockable_tree(nodes, exclude_innermost)

        mapper = {}
        blocked = OrderedDict()
        for tree in retrieve_iteration_tree(fold):
            # Is the Iteration tree blockable ?
            iterations = [i for i in tree if i.is_Parallel]
            if exclude_innermost:
                iterations = [i for i in iterations if not i.is_Vectorizable]
            if len(iterations) <= 1:
                continue
            root = iterations[0]
            if not IsPerfectIteration().visit(root):
                # Illegal/unsupported
                continue
            if not tree.root.is_Sequential and not ignore_heuristic:
                # Heuristic: avoid polluting the generated code with blocked
                # nests (thus increasing JIT compilation time and affecting
                # readability) if the blockable tree isn't embedded in a
                # sequential loop (e.g., a timestepping loop)
                continue

            # Decorate intra-block iterations with an IterationProperty
            TAG = tagger(len(mapper))

            # Build all necessary Iteration objects, individually. These will
            # subsequently be composed to implement loop blocking.
            inter_blocks = []
            intra_blocks = []
            remainders = []
            for i in iterations:
                # Build Iteration over blocks
                name = "%s%d_block" % (i.dim.name, len(mapper))
                dim = blocked.setdefault(i, BlockDimension(i.dim, name=name))
                binnersize = i.symbolic_size + (i.offsets[1] - i.offsets[0])
                bmax = i.dim.symbolic_max - (binnersize % dim.step)
                inter_block = Iteration([], dim, bmax, offsets=i.offsets,
                                        properties=PARALLEL)
                inter_blocks.append(inter_block)

                # Build Iteration within a block
                limits = (dim, dim + dim.step - 1, 1)
                intra_block = i._rebuild([], limits=limits, offsets=(0, 0),
                                         properties=i.properties + (TAG, ELEMENTAL))
                intra_blocks.append(intra_block)

                # Build unitary-increment Iteration over the 'leftover' region.
                # This will be used for remainder loops, executed when any
                # dimension size is not a multiple of the block size.
                remainder = i._rebuild([], limits=[bmax + 1, i.dim.symbolic_max, 1],
                                       offsets=(i.offsets[1], i.offsets[1]))
                remainders.append(remainder)

            # Build blocked Iteration nest
            blocked_tree = compose_nodes(inter_blocks + intra_blocks +
                                         [iterations[-1].nodes])

            # Build remainder Iterations
            remainder_trees = []
            for n in range(len(iterations)):
                for c in combinations([i.dim for i in iterations], n + 1):
                    # First all inter-block Interations
                    nodes = [b._rebuild(properties=b.properties + (REMAINDER,))
                             for b, r in zip(inter_blocks, remainders)
                             if r.dim not in c]
                    # Then intra-block or remainder, for each dim (in order)
                    properties = (REMAINDER, TAG, ELEMENTAL)
                    for b, r in zip(intra_blocks, remainders):
                        handle = r if b.dim in c else b
                        nodes.append(handle._rebuild(properties=properties))
                    nodes.extend([iterations[-1].nodes])
                    remainder_trees.append(compose_nodes(nodes))

            # Will replace with blocked loop tree
            mapper[root] = List(body=[blocked_tree] + remainder_trees)

        rebuilt = Transformer(mapper).visit(fold)

        # Finish unrolling any previously folded Iterations
        processed = unfold_blocked_tree(rebuilt)

        return processed, {'dimensions': list(blocked.values())}
Пример #5
0
def test_create_efuncs_complex(complex_function):
    roots = [i[-1] for i in retrieve_iteration_tree(complex_function)]
    retagged = [j._rebuild(properties=tagger(i)) for i, j in enumerate(roots)]
    mapper = {
        i: j._rebuild(properties=(j.properties + (ELEMENTAL, )))
        for i, j in zip(roots, retagged)
    }
    function = Transformer(mapper).visit(complex_function)
    handle = transform(function, mode='split')
    block = List(body=[handle.nodes] + handle.efuncs)
    output = str(block.ccode)
    # Make output compiler independent
    output = [
        i for i in output.split('\n')
        if all([j not in i for j in ('#pragma', '/*')])
    ]
    assert '\n'.join(output) == \
        ("""void foo(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec)
{
  for (int i = 0; i <= 3; i += 1)
  {
    f_0((float *)a,(float *)b,i_size,i,4,0);
    for (int j = 0; j <= 5; j += 1)
    {
      f_1((float *)a,(float *)b,(float *)c,(float *)d,i_size,j_size,k_size,i,j,7,0);
    }
    f_2((float *)a,(float *)b,i_size,i,4,0);
  }
}
void f_0(float *restrict a_vec, float *restrict b_vec,"""
         """ const int i_size, const int i, const int sf_M, const int sf_m)
{
  float (*restrict a) __attribute__ ((aligned (64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__ ((aligned (64))) = (float (*)) b_vec;
  for (int s = sf_m; s <= sf_M; s += 1)
  {
    b[i] = a[i] + pow(b[i], 2) + 3;
  }
}
void f_1(float *restrict a_vec, float *restrict b_vec,"""
         """ float *restrict c_vec, float *restrict d_vec,"""
         """ const int i_size, const int j_size, const int k_size,"""
         """ const int i, const int j, const int kf_M, const int kf_m)
{
  float (*restrict a) __attribute__ ((aligned (64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__ ((aligned (64))) = (float (*)) b_vec;
  float (*restrict c)[j_size] __attribute__ ((aligned (64))) = (float (*)[j_size]) c_vec;
  float (*restrict d)[j_size][k_size] __attribute__ ((aligned (64))) ="""
         """ (float (*)[j_size][k_size]) d_vec;
  for (int k = kf_m; k <= kf_M; k += 1)
  {
    a[i] = a[i]*b[i]*c[i][j]*d[i][j][k];
    a[i] = 4*(a[i] + c[i][j])*(b[i] + d[i][j][k]);
  }
}
void f_2(float *restrict a_vec, float *restrict b_vec,"""
         """ const int i_size, const int i, const int qf_M, const int qf_m)
{
  float (*restrict a) __attribute__ ((aligned (64))) = (float (*)) a_vec;
  float (*restrict b) __attribute__ ((aligned (64))) = (float (*)) b_vec;
  for (int q = qf_m; q <= qf_M; q += 1)
  {
    a[i] = 8.0F*a[i] + 6.0F/b[i];
  }
}""")