示例#1
0
def process(func, state):
    """
    Apply ``func`` to the IETs in ``state._efuncs``, and update ``state`` accordingly.
    """
    # Create a Call graph. `func` will be applied to each node in the Call graph.
    # `func` might change an `efunc` signature; the Call graph will be used to
    # propagate such change through the `efunc` callers
    dag = DAG(nodes=['root'])
    queue = ['root']
    while queue:
        caller = queue.pop(0)
        callees = FindNodes(Call).visit(state._efuncs[caller])
        for callee in filter_ordered([i.name for i in callees]):
            if callee in state._efuncs:  # Exclude foreign Calls, e.g., MPI calls
                try:
                    dag.add_node(callee)
                    queue.append(callee)
                except KeyError:
                    # `callee` already in `dag`
                    pass
                dag.add_edge(callee, caller)
    assert dag.size == len(state._efuncs)

    # Apply `func`
    for i in dag.topological_sort():
        state._efuncs[i], metadata = func(state._efuncs[i])

        # Track any new Dimensions introduced by `func`
        state._dimensions.extend(list(metadata.get('dimensions', [])))

        # Track any new #include required by `func`
        state._includes.extend(list(metadata.get('includes', [])))
        state._includes = filter_ordered(state._includes)

        # Track any new ElementalFunctions
        state._efuncs.update(OrderedDict([(i.name, i)
                                          for i in metadata.get('efuncs', [])]))

        # If there's a change to the `args` and the `iet` is an efunc, then
        # we must update the call sites as well, as the arguments dropped down
        # to the efunc have just increased
        args = as_tuple(metadata.get('args'))
        if args:
            # `extif` avoids redundant updates to the parameters list, due
            # to multiple children wanting to add the same input argument
            extif = lambda v: list(v) + [e for e in args if e not in v]
            stack = [i] + dag.all_downstreams(i)
            for n in stack:
                efunc = state._efuncs[n]
                calls = [c for c in FindNodes(Call).visit(efunc) if c.name in stack]
                mapper = {c: c._rebuild(arguments=extif(c.arguments)) for c in calls}
                efunc = Transformer(mapper).visit(efunc)
                if efunc.is_Callable:
                    efunc = efunc._rebuild(parameters=extif(efunc.parameters))
                state._efuncs[n] = efunc
示例#2
0
文件: space.py 项目: speglich/devito
 def reorder(cls, items, relations):
     if not all(isinstance(i, AbstractInterval) for i in items):
         raise ValueError(
             "Cannot create an IntervalGroup from objects of type [%s]" %
             ', '.join(str(type(i)) for i in items))
     # The relations are between dimensions, not intervals. So we take
     # care of that here
     ordering = filter_ordered(toposort(relations) + [i.dim for i in items])
     return sorted(items, key=lambda i: ordering.index(i.dim))
示例#3
0
    def __init__(self, expr, dtype=None):
        assert isinstance(expr, Eq)
        assert isinstance(expr.lhs, (Symbol, Indexed))
        self.expr = expr
        self.dtype = dtype

        # Traverse /expression/ to determine meta information
        # Note: at this point, expressions have already been indexified
        self.reads = [
            i for i in retrieve_terminals(self.expr.rhs)
            if isinstance(i, (types.Indexed, types.Symbol))
        ]
        self.reads = filter_ordered(self.reads)
        self.functions = [self.write] + [i.base.function for i in self.reads]
        self.functions = filter_ordered(self.functions)
        # Filter collected dimensions and functions
        self.dimensions = flatten(i.indices for i in self.functions)
        self.dimensions = filter_ordered(self.dimensions)
示例#4
0
def default_rules(obj, functions):
    def generate_subs(deriv_order, function, index):
        dim = retrieve_dimensions(index)[0]

        if dim.is_Time:
            fd_order = function.time_order
        elif dim.is_Space:
            fd_order = function.space_order
        else:
            # Shouldn't arrive here
            raise TypeError("Dimension type not recognised")

        subs = {}

        mapper = {dim: index}

        indices, x0 = generate_indices(function,
                                       dim,
                                       fd_order,
                                       side=None,
                                       x0=mapper)

        coeffs = sympy.finite_diff_weights(deriv_order, indices, x0)[-1][-1]

        for j in range(len(coeffs)):
            subs.update({
                function._coeff_symbol(indices[j], deriv_order, function, index):
                coeffs[j]
            })

        return subs

    # Determine which 'rules' are missing
    sym = get_sym(functions)
    terms = obj.find(sym)
    args_present = filter_ordered(term.args[1:] for term in terms)

    subs = obj.substitutions
    if subs:
        args_provided = [(i.deriv_order, i.function, i.index)
                         for i in subs.coefficients]
    else:
        args_provided = []

    # NOTE: Do we want to throw a warning if the same arg has
    # been provided twice?
    args_provided = list(set(args_provided))
    not_provided = [
        i for i in args_present if i not in frozenset(args_provided)
    ]

    rules = {}
    for i in not_provided:
        rules = {**rules, **generate_subs(*i)}

    return rules
示例#5
0
def generate_nthreads(nthreads, args, level):
    ret = [((i.name, args[i.name]), ) for i in nthreads]

    # On the KNL, also try running with a different number of hyperthreads
    if level == 'aggressive' and configuration['platform'] == 'knl':
        ret.extend([((i.name, psutil.cpu_count()), ) for i in nthreads])
        ret.extend([((i.name, psutil.cpu_count() // 2), ) for i in nthreads])
        ret.extend([((i.name, psutil.cpu_count() // 4), ) for i in nthreads])

    return filter_ordered(ret)
示例#6
0
 def _dist_datamap(self):
     """
     Mapper ``M : MPI rank -> required sparse data``.
     """
     ret = {}
     for i, s in enumerate(self._support):
         # Sparse point `i` is "required" by the following ranks
         for r in self.grid.distributor.glb_to_rank(s):
             ret.setdefault(r, []).append(i)
     return {k: filter_ordered(v) for k, v in ret.items()}
示例#7
0
文件: sparse.py 项目: opesci/devito
 def _dist_datamap(self):
     """
     Mapper ``M : MPI rank -> required sparse data``.
     """
     ret = {}
     for i, s in enumerate(self._support):
         # Sparse point `i` is "required" by the following ranks
         for r in self.grid.distributor.glb_to_rank(s):
             ret.setdefault(r, []).append(i)
     return {k: filter_ordered(v) for k, v in ret.items()}
示例#8
0
def init_configuration(configuration=configuration, env_vars_mapper=env_vars_mapper,
                       env_vars_deprecated=env_vars_deprecated):
    # Populate `configuration` with user-provided options
    if environ.get('DEVITO_CONFIG') is None:
        # It is important to configure `platform`, `compiler` and `backend` in this order
        process_order = filter_ordered(['platform', 'compiler', 'backend'] +
                                       list(env_vars_mapper.values()))
        queue = sorted(env_vars_mapper.items(), key=lambda i: process_order.index(i[1]))
        unprocessed = OrderedDict([(v, environ.get(k, configuration._defaults[v]))
                                   for k, v in queue])

        # Handle deprecated env vars
        mapper = dict(queue)
        for k, (v, msg) in env_vars_deprecated.items():
            if environ.get(k):
                warning("`%s` is deprecated. %s" % (k, msg))
                if environ.get(v):
                    warning("Both `%s` and `%s` set. Ignoring `%s`" % (k, v, k))
                else:
                    warning("Setting `%s=%s`" % (v, environ[k]))
                    unprocessed[mapper[v]] = environ[k]
    else:
        # Attempt reading from the specified configuration file
        raise NotImplementedError("Devito doesn't support configuration via file yet.")

    # Parameters validation
    for k, v in unprocessed.items():
        try:
            items = v.split(';')
            # Env variable format: 'var=k1:v1;k2:v2:k3:v3:...'
            keys, values = zip(*[i.split(':') for i in items])
            # Casting
            values = [eval(i) for i in values]
        except AttributeError:
            # Env variable format: 'var=v', 'v' is not a string
            keys = [v]
            values = []
        except ValueError:
            # Env variable format: 'var=k1;k2:v2...' or even just 'var=v'
            keys = [i.split(':')[0] for i in items]
            values = []
            # Cast to integer
            for i, j in enumerate(list(keys)):
                try:
                    keys[i] = int(j)
                except (TypeError, ValueError):
                    keys[i] = j
        if len(keys) == len(values):
            configuration.update(k, dict(zip(keys, values)))
        elif len(keys) == 1:
            configuration.update(k, keys[0])
        else:
            configuration.update(k, keys)

    configuration.initialize()
示例#9
0
def mark_parallel(analysis):
    """Update the ``analysis`` detecting the ``SEQUENTIAL`` and ``PARALLEL``
    Iterations within ``analysis.iet``."""
    properties = OrderedDict()
    for tree in analysis.trees:
        for depth, i in enumerate(tree):
            if properties.get(i) is SEQUENTIAL:
                # Speed-up analysis
                continue

            if i.uindices:
                # Only ++/-- increments of iteration variables are supported
                properties.setdefault(i, []).append(SEQUENTIAL)
                continue

            # Get all dimensions up to and including Iteration /i/, grouped by Iteration
            dims = [filter_ordered(j.dimensions) for j in tree[:depth + 1]]
            # Get all dimensions up to and including Iteration /i-1/
            prev = flatten(dims[:-1])
            # Get all dimensions up to and including Iteration /i/
            dims = flatten(dims)

            # The i-th Iteration is PARALLEL if for all dependences (d_1, ..., d_n):
            # test0 := (d_1, ..., d_{i-1}) > 0, OR
            # test1 := (d_1, ..., d_i) = 0
            is_parallel = True

            # The i-th Iteration is PARALLEL_IF_ATOMIC if for all dependeces:
            # test0 OR test1 OR the write is an associative and commutative increment
            is_atomic_parallel = True

            for dep in analysis.scopes[i].d_all:
                test0 = len(prev) > 0 and any(dep.is_carried(d) for d in prev)
                test1 = all(dep.is_indep(d) for d in dims)
                test2 = all(dep.is_reduce_atmost(d) for d in prev) and dep.is_indep(i.dim)
                if not (test0 or test1 or test2):
                    is_parallel = False
                    if not dep.is_increment:
                        is_atomic_parallel = False
                        break

            if is_parallel:
                properties.setdefault(i, []).append(PARALLEL)
            elif is_atomic_parallel:
                properties.setdefault(i, []).append(PARALLEL_IF_ATOMIC)
            else:
                properties.setdefault(i, []).append(SEQUENTIAL)

    # Reduction (e.g, SEQUENTIAL takes priority over PARALLEL)
    priorities = {PARALLEL: 0, PARALLEL_IF_ATOMIC: 1, SEQUENTIAL: 2}
    properties = OrderedDict([(k, max(v, key=lambda i: priorities[i]))
                              for k, v in properties.items()])

    analysis.update(properties)
示例#10
0
文件: openmp.py 项目: speglich/devito
    def _make_clauses(cls, **kwargs):
        kwargs['chunk_size'] = False
        clauses = super()._make_clauses(**kwargs)

        indexeds = FindSymbols('indexeds').visit(kwargs['nodes'])
        deviceptrs = filter_ordered(i.name for i in indexeds
                                    if i.function._mem_local)
        if deviceptrs:
            clauses.append("is_device_ptr(%s)" % ",".join(deviceptrs))

        return clauses
示例#11
0
 def _dist_gather_mask(self):
     """
     A mask to index into the ``data`` received upon returning from
     ``self._dist_alltoall``. This mask creates a new data array in which
     duplicate sparse data values have been discarded. The resulting data
     array can thus be used to populate ``self.data``.
     """
     ret = list(self._dist_scatter_mask)
     mask = ret[self._sparse_position]
     ret[self._sparse_position] = [mask.tolist().index(i)
                                   for i in filter_ordered(mask)]
     return tuple(ret)
示例#12
0
文件: sparse.py 项目: opesci/devito
 def _dist_gather_mask(self):
     """
     A mask to index into the ``data`` received upon returning from
     ``self._dist_alltoall``. This mask creates a new data array in which
     duplicate sparse data values have been discarded. The resulting data
     array can thus be used to populate ``self.data``.
     """
     ret = list(self._dist_scatter_mask)
     mask = ret[self._sparse_position]
     ret[self._sparse_position] = [mask.tolist().index(i)
                                   for i in filter_ordered(mask)]
     return tuple(ret)
示例#13
0
文件: graph.py 项目: nw0/devito
    def __init__(self, exprs, **kwargs):
        # Check input legality
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        if len(set(mapper)) != len(mapper):
            raise DSEException(
                "Found redundant node, cannot build TemporariesGraph.")

        # Construct Temporaries, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs)
            for i in list(handle):
                if i.is_Indexed:
                    for idx in i.indices:
                        handle |= retrieve_terminals(idx)
            reads[k].update(
                set(flatten([tensor_map.get(as_symbol(i), [])
                             for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar temporaries
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the TemporariesGraph
        temporaries = [(i,
                        Temporary(*mapper[i].args,
                                  inc=q_inc(mapper[i]),
                                  reads=reads[i],
                                  readby=readby[i])) for i in processed]
        super(TemporariesGraph, self).__init__(temporaries, **kwargs)

        # Determine indices along the space and time dimensions
        terms = [
            v for k, v in self.items() if v.is_tensor and not q_indirect(k)
        ]
        indices = filter_ordered(flatten([i.function.indices for i in terms]))
        self.space_indices = tuple(i for i in indices if i.is_Space)
        self.time_indices = tuple(i for i in indices if i.is_Time)
示例#14
0
文件: graph.py 项目: woxin5295/devito
    def __init__(self, exprs, **kwargs):
        # Always convert to SSA
        exprs = convert_to_SSA(exprs)
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        assert len(set(mapper)) == len(exprs), "not SSA Cluster?"

        # Construct the Nodes, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs)
            for i in list(handle):
                if i.is_Indexed:
                    for idx in i.indices:
                        handle |= retrieve_terminals(idx)
            reads[k].update(
                set(flatten([tensor_map.get(as_symbol(i), [])
                             for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar temporaries
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the FlowGraph
        temporaries = [(i,
                        Node(*mapper[i].args,
                             inc=q_inc(mapper[i]),
                             reads=reads[i],
                             readby=readby[i])) for i in processed]
        super(FlowGraph, self).__init__(temporaries, **kwargs)

        # Determine indices along the space and time dimensions
        terms = [
            v for k, v in self.items() if v.is_tensor and not q_indirect(k)
        ]
        indices = filter_ordered(flatten([i.function.indices for i in terms]))
        self.space_indices = tuple(i for i in indices if i.is_Space)
        self.time_indices = tuple(i for i in indices if i.is_Time)
示例#15
0
    def __init__(self, intervals, sub_iterators=None, directions=None):
        super(IterationSpace, self).__init__(intervals)

        # Normalize sub-iterators
        sub_iterators = sub_iterators or {}
        self._sub_iterators = frozendict([(k, tuple(filter_ordered(as_tuple(v))))
                                          for k, v in sub_iterators.items()])

        # Normalize directions
        if directions is None:
            self._directions = frozendict([(i.dim, Any) for i in self.intervals])
        else:
            self._directions = frozendict(directions)
示例#16
0
    def _make_waitprefetch(self, iet, sync_ops, pieces, *args):
        ff = SharedData._field_flag

        waits = []
        objs = filter_ordered(pieces.objs.get(s) for s in sync_ops)
        for sdata, threads in objs:
            wait = BusyWait(
                CondNe(FieldFromComposite(ff, sdata[threads.index]), 1))
            waits.append(wait)

        iet = List(header=c.Comment("Wait for the arrival of prefetched data"),
                   body=waits + [BlankLine, iet])

        return iet
示例#17
0
def mark_parallel(analysis):
    """Update the ``analysis`` detecting the ``SEQUENTIAL`` and ``PARALLEL``
    Iterations within ``analysis.iet``."""
    properties = OrderedDict()
    for tree in analysis.trees:
        for depth, i in enumerate(tree):
            if i in properties:
                continue

            if i.uindices:
                # Only ++/-- increments of iteration variables are supported
                properties[i] = SEQUENTIAL
                continue

            # Get all dimensions up to and including Iteration /i/, grouped by Iteration
            dims = [filter_ordered(j.dimensions) for j in tree[:depth + 1]]
            # Get all dimensions up to and including Iteration /i-1/
            prev = flatten(dims[:-1])
            # Get all dimensions up to and including Iteration /i/
            dims = flatten(dims)

            # The i-th Iteration is PARALLEL if for all dependences (d_1, ..., d_n):
            # test0 := (d_1, ..., d_{i-1}) > 0, OR
            # test1 := (d_1, ..., d_i) = 0
            is_parallel = True

            # The i-th Iteration is PARALLEL_IF_ATOMIC if for all dependeces:
            # test0 OR test1 OR the write is an associative and commutative increment
            is_atomic_parallel = True

            for dep in analysis.scopes[i].d_all:
                test0 = len(prev) > 0 and any(dep.is_carried(d) for d in prev)
                test1 = all(dep.is_indep(d) for d in dims)
                test2 = all(dep.is_reduce_atmost(d)
                            for d in prev) and dep.is_indep(i.dim)
                if not (test0 or test1 or test2):
                    is_parallel = False
                    if not dep.is_increment:
                        is_atomic_parallel = False
                        break

            if is_parallel:
                properties[i] = PARALLEL
            elif is_atomic_parallel:
                properties[i] = PARALLEL_IF_ATOMIC
            else:
                properties[i] = SEQUENTIAL

    analysis.update(properties)
示例#18
0
def init_configuration(configuration=configuration,
                       env_vars_mapper=env_vars_mapper):
    # Populate /configuration/ with user-provided options
    if environ.get('DEVITO_CONFIG') is None:
        # At init time, it is important to first configure the compiler, then
        # the backend (which is impacted by the compiler), finally everything
        # else in any arbitrary order
        process_order = filter_ordered(['compiler', 'backend'] +
                                       list(env_vars_mapper.values()))
        queue = sorted(env_vars_mapper.items(),
                       key=lambda i: process_order.index(i[1]))
        unprocessed = OrderedDict([
            (v, environ.get(k, configuration._defaults[v])) for k, v in queue
        ])
    else:
        # Attempt reading from the specified configuration file
        raise NotImplementedError(
            "Devito doesn't support configuration via file yet.")

    # Parameters validation
    for k, v in unprocessed.items():
        try:
            items = v.split(';')
            # Env variable format: 'var=k1:v1;k2:v2:k3:v3:...'
            keys, values = zip(*[i.split(':') for i in items])
            # Casting
            values = [eval(i) for i in values]
        except AttributeError:
            # Env variable format: 'var=v', 'v' is not a string
            keys = [v]
            values = []
        except ValueError:
            # Env variable format: 'var=k1;k2:v2...' or even just 'var=v'
            keys = [i.split(':')[0] for i in items]
            values = []
            # Cast to integer
            for i, j in enumerate(list(keys)):
                try:
                    keys[i] = int(j)
                except (TypeError, ValueError):
                    keys[i] = j
        if len(keys) == len(values):
            configuration.update(k, dict(zip(keys, values)))
        elif len(keys) == 1:
            configuration.update(k, keys[0])
        else:
            configuration.update(k, keys)

    configuration.initialize()
示例#19
0
 def visit_Iteration(self, o, subs={}, offsets=defaultdict(set)):
     nodes = self.visit(o.children, subs=subs, offsets=offsets)
     if o.dim.is_Buffered:
         # For buffered dimensions insert the explicit
         # definition of buffered variables, eg. t+1 => t1
         init = []
         for i, off in enumerate(filter_ordered(offsets[o.dim])):
             vname = Symbol("%s%d" % (o.dim.name, i))
             value = (o.dim.parent + off) % o.dim.modulo
             init.append(UnboundedIndex(vname, value, value))
             subs[o.dim + off] = LoweredDimension(vname.name, o.dim, off)
         # Always lower to symbol
         subs[o.dim.parent] = Symbol(o.dim.parent.name)
         return o._rebuild(index=o.dim.parent.name, uindices=init)
     else:
         return o._rebuild(*nodes)
示例#20
0
文件: sparse.py 项目: opesci/devito
    def _index_matrix(self, offset):
        # Note about the use of *memoization*
        # Since this method is called by `_interpolation_indices`, using
        # memoization avoids a proliferation of symbolically identical
        # ConditionalDimensions for a given set of indirection indices

        # List of indirection indices for all adjacent grid points
        index_matrix = [tuple(idx + ii + offset for ii, idx
                              in zip(inc, self._coordinate_indices))
                        for inc in self._point_increments]

        # A unique symbol for each indirection index
        indices = filter_ordered(flatten(index_matrix))
        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
                              for i, p in enumerate(indices)])

        return index_matrix, points
示例#21
0
    def _index_matrix(self, offset):
        # Note about the use of *memoization*
        # Since this method is called by `_interpolation_indices`, using
        # memoization avoids a proliferation of symbolically identical
        # ConditionalDimensions for a given set of indirection indices

        # List of indirection indices for all adjacent grid points
        index_matrix = [tuple(idx + ii + offset for ii, idx
                              in zip(inc, self._coordinate_indices))
                        for inc in self._point_increments]

        # A unique symbol for each indirection index
        indices = filter_ordered(flatten(index_matrix))
        points = OrderedDict([(p, Symbol(name='ii_%s_%d' % (self.name, i)))
                              for i, p in enumerate(indices)])

        return index_matrix, points
示例#22
0
文件: graph.py 项目: opesci/devito
    def __init__(self, exprs, **kwargs):
        # Always convert to SSA
        exprs = makeit_ssa(exprs)
        mapper = OrderedDict([(i.lhs, i) for i in exprs])
        assert len(set(mapper)) == len(exprs), "not SSA Cluster?"

        # Construct the Nodes, tracking reads and readby
        tensor_map = DefaultOrderedDict(list)
        for i in mapper:
            tensor_map[as_symbol(i)].append(i)
        reads = DefaultOrderedDict(set)
        readby = DefaultOrderedDict(set)
        for k, v in mapper.items():
            handle = retrieve_terminals(v.rhs)
            for i in list(handle):
                if i.is_Indexed:
                    for idx in i.indices:
                        handle |= retrieve_terminals(idx)
            reads[k].update(set(flatten([tensor_map.get(as_symbol(i), [])
                                         for i in handle])))
            for i in reads[k]:
                readby[i].add(k)

        # Make sure read-after-writes are honored for scalar nodes
        processed = [i for i in mapper if i.is_Indexed]
        queue = [i for i in mapper if i not in processed]
        while queue:
            k = queue.pop(0)
            if not readby[k] or k in readby[k]:
                processed.insert(0, k)
            elif all(i in processed for i in readby[k]):
                index = min(processed.index(i) for i in readby[k])
                processed.insert(index, k)
            else:
                queue.append(k)

        # Build up the FlowGraph
        nodes = [(i, Node(mapper[i], reads=reads[i], readby=readby[i]))
                 for i in processed]
        super(FlowGraph, self).__init__(nodes, **kwargs)

        # Determine indices along the space and time dimensions
        terms = [v for k, v in self.items() if v.is_Tensor and not q_indirect(k)]
        indices = filter_ordered(flatten([i.function.indices for i in terms]))
        self.space_indices = tuple(i for i in indices if i.is_Space)
        self.time_indices = tuple(i for i in indices if i.is_Time)
示例#23
0
    def union(self, others):
        """
        Create a new HaloScheme representing the union of ``self`` with other HaloSchemes.
        """
        fmapper = dict(self.fmapper)
        for i in as_tuple(others):
            for k, v in i.fmapper.items():
                hse = fmapper.setdefault(k, v)
                # At this point, the `loc_indices` must match
                if hse.loc_indices != v.loc_indices:
                    raise ValueError(
                        "Cannot compute the union of one or more HaloScheme "
                        "when the `loc_indices` differ")
                halos = tuple(filter_ordered(hse.halos + v.halos))
                fmapper[k] = HaloSchemeEntry(hse.loc_indices, halos)

        return HaloScheme(fmapper=fmapper)
def generate_block_shapes(blockable, args, level):
    if not blockable:
        raise ValueError

    # Max attemptable block shape
    max_bs = tuple((d.step.name, d.max_step.subs(args)) for d in blockable)

    # Attempted block shapes:
    # 1) Defaults (basic mode)
    ret = [
        tuple((d.step.name, v) for d in blockable)
        for v in options['blocksize']
    ]
    # 2) Always try the entire iteration space (degenerate block)
    ret.append(max_bs)
    # 3) More attempts if auto-tuning in aggressive mode
    if level in ['aggressive', 'max']:
        # Ramp up to larger block shapes
        handle = tuple((i, options['blocksize'][-1]) for i, _ in ret[0])
        for i in range(3):
            new_bs = tuple((b, v * 2) for b, v in handle)
            ret.insert(ret.index(handle) + 1, new_bs)
            handle = new_bs

        handle = []
        # Extended shuffling for the smaller block shapes
        for bs in ret[:4]:
            for i in ret:
                handle.append(bs[:-1] + (i[-1], ))
        # Some more shuffling for all block shapes
        for bs in list(ret):
            ncombs = len(bs)
            for i in range(ncombs):
                for j in combinations(dict(bs), i + 1):
                    handle.append(
                        tuple((b, v * 2 if b in j else v) for b, v in bs))
        ret.extend(handle)

    # Drop unnecessary attempts:
    # 1) Block shapes exceeding the iteration space extent
    ret = [i for i in ret if all(dict(i)[k] <= v for k, v in max_bs)]
    # 2) Redundant block shapes
    ret = filter_ordered(ret)

    return ret
示例#25
0
def iet_make(stree):
    """
    Create an Iteration/Expression tree (IET) from a :class:`ScheduleTree`.
    """
    nsections = 0
    queues = OrderedDict()
    for i in stree.visit():
        if i == stree:
            # We hit this handle at the very end of the visit
            return List(body=queues.pop(i))

        elif i.is_Exprs:
            exprs = [Expression(e) for e in i.exprs]
            body = [ExpressionBundle(i.shape, i.ops, i.traffic, body=exprs)]

        elif i.is_Conditional:
            body = [Conditional(i.guard, queues.pop(i))]

        elif i.is_Iteration:
            # Generate `uindices`
            uindices = []
            for d, offs in i.sub_iterators:
                modulo = len(offs)
                for n, o in enumerate(filter_ordered(offs)):
                    value = (i.dim + o) % modulo
                    symbol = Scalar(name="%s%d" % (d.name, n), dtype=np.int32)
                    uindices.append(
                        UnboundedIndex(symbol, value, value, d, d + o))
            # Generate Iteration
            body = [
                Iteration(queues.pop(i),
                          i.dim,
                          i.dim.limits,
                          offsets=i.limits,
                          direction=i.direction,
                          uindices=uindices)
            ]

        elif i.is_Section:
            body = [Section('section%d' % nsections, body=queues.pop(i))]
            nsections += 1

        queues.setdefault(i.parent, []).extend(body)

    assert False
示例#26
0
def init_configuration(configuration=configuration, env_vars_mapper=env_vars_mapper):
    # Populate /configuration/ with user-provided options
    if environ.get('DEVITO_CONFIG') is None:
        # At init time, it is important to configure `platform`, `compiler` and `backend`
        # in this order
        process_order = filter_ordered(['platform', 'compiler', 'backend'] +
                                       list(env_vars_mapper.values()))
        queue = sorted(env_vars_mapper.items(), key=lambda i: process_order.index(i[1]))
        unprocessed = OrderedDict([(v, environ.get(k, configuration._defaults[v]))
                                   for k, v in queue])
    else:
        # Attempt reading from the specified configuration file
        raise NotImplementedError("Devito doesn't support configuration via file yet.")

    # Parameters validation
    for k, v in unprocessed.items():
        try:
            items = v.split(';')
            # Env variable format: 'var=k1:v1;k2:v2:k3:v3:...'
            keys, values = zip(*[i.split(':') for i in items])
            # Casting
            values = [eval(i) for i in values]
        except AttributeError:
            # Env variable format: 'var=v', 'v' is not a string
            keys = [v]
            values = []
        except ValueError:
            # Env variable format: 'var=k1;k2:v2...' or even just 'var=v'
            keys = [i.split(':')[0] for i in items]
            values = []
            # Cast to integer
            for i, j in enumerate(list(keys)):
                try:
                    keys[i] = int(j)
                except (TypeError, ValueError):
                    keys[i] = j
        if len(keys) == len(values):
            configuration.update(k, dict(zip(keys, values)))
        elif len(keys) == 1:
            configuration.update(k, keys[0])
        else:
            configuration.update(k, keys)

    configuration.initialize()
示例#27
0
def eliminate_arrays(clusters, template):
    """
    Eliminate redundant expressions stored in Arrays.
    """
    mapper = {}
    processed = []
    for c in clusters:
        if not c.is_dense:
            processed.append(c)
            continue

        # Search for any redundant RHSs
        seen = {}
        for e in c.exprs:
            f = e.lhs.function
            if not f.is_Array:
                continue
            v = seen.get(e.rhs)
            if v is not None:
                # Found a redundant RHS
                mapper[f] = v
            else:
                seen[e.rhs] = f

        if not mapper:
            # Do not waste time
            processed.append(c)
            continue

        # Replace redundancies
        subs = {}
        for f, v in mapper.items():
            for i in filter_ordered(i.indexed for i in c.scope[f]):
                subs[i] = v[f.indices]
        exprs = []
        for e in c.exprs:
            if e.lhs.function in mapper:
                # Drop the write
                continue
            exprs.append(e.xreplace(subs))

        processed.append(c.rebuild(exprs))

    return processed
示例#28
0
文件: misc.py 项目: rhodrin/devito
def scalarize(clusters, template):
    """
    Turn local "isolated" Arrays, that is Arrays appearing only in one Cluster,
    into Scalars.
    """
    processed = []
    for c in clusters:
        # Get any Arrays appearing only in `c`
        impacted = set(clusters) - {c}
        arrays = {i for i in c.scope.writes if i.is_Array}
        arrays -= set().union(*[i.scope.reads for i in impacted])

        # Turn them into scalars
        #
        # r[x,y,z] = g(b[x,y,z])                 t0 = g(b[x,y,z])
        # ... = r[x,y,z] + r[x,y,z+1]`  ---->    t1 = g(b[x,y,z+1])
        #                                        ... = t0 + t1
        mapper = {}
        exprs = []
        for n, e in enumerate(c.exprs):
            f = e.lhs.function
            if f in arrays:
                indexeds = [i.indexed for i in c.scope[f] if i.timestamp > n]
                for i in filter_ordered(indexeds):
                    mapper[i] = Scalar(name=template(), dtype=f.dtype)

                    assert len(f.indices) == len(e.lhs.indices) == len(
                        i.indices)
                    shifting = {
                        idx: idx + (o2 - o1)
                        for idx, o1, o2 in zip(f.indices, e.lhs.indices,
                                               i.indices)
                    }

                    handle = e.func(mapper[i], e.rhs.xreplace(mapper))
                    handle = xreplace_indices(handle, shifting)
                    exprs.append(handle)
            else:
                exprs.append(e.func(e.lhs, e.rhs.xreplace(mapper)))

        processed.append(c.rebuild(exprs))

    return processed
示例#29
0
def generate_nthreads(nthreads, args, level):
    if nthreads == 1:
        return [((None, 1),)]

    ret = [((nthreads.name, args[nthreads.name]),)]

    if level == 'max':
        # Be sure to try with:
        # 1) num_threads == num_physical_cores
        # 2) num_threads == num_hyperthreads
        if configuration['platform'] is KNL:
            ret.extend([((nthreads.name, psutil.cpu_count() // 4),),
                        ((nthreads.name, psutil.cpu_count() // 2),),
                        ((nthreads.name, psutil.cpu_count()),)])
        else:
            ret.extend([((nthreads.name, psutil.cpu_count() // 2),),
                        ((nthreads.name, psutil.cpu_count()),)])

    return filter_ordered(ret)
示例#30
0
def generate_nthreads(nthreads, args, level):
    if nthreads == 1:
        return [((None, 1),)]

    ret = [((nthreads.name, args[nthreads.name]),)]

    if level == 'max':
        # Be sure to try with:
        # 1) num_threads == num_physical_cores
        # 2) num_threads == num_hyperthreads
        if configuration['platform'] is KNL:
            ret.extend([((nthreads.name, psutil.cpu_count() // 4),),
                        ((nthreads.name, psutil.cpu_count() // 2),),
                        ((nthreads.name, psutil.cpu_count()),)])
        else:
            ret.extend([((nthreads.name, psutil.cpu_count() // 2),),
                        ((nthreads.name, psutil.cpu_count()),)])

    return filter_ordered(ret)
示例#31
0
文件: nodes.py 项目: speglich/devito
 def expr_symbols(self):
     retval = []
     for i in self.arguments:
         if isinstance(i, AbstractFunction):
             continue
         elif isinstance(i, (Indexed, IndexedBase, LocalObject, Symbol)):
             retval.append(i)
         elif isinstance(i, Call):
             retval.extend(i.expr_symbols)
         else:
             try:
                 retval.extend(i.free_symbols)
             except AttributeError:
                 pass
     if self.base is not None:
         retval.append(self.base)
     if self.retobj is not None:
         retval.extend(self.retobj.free_symbols)
     return tuple(filter_ordered(retval))
示例#32
0
文件: nodes.py 项目: kenhester/devito
 def functions(self):
     retval = []
     for i in self.arguments:
         if isinstance(i, numbers.Number):
             continue
         elif isinstance(i, (AbstractFunction, Indexed, LocalObject)):
             retval.append(i.function)
         else:
             for s in i.free_symbols:
                 try:
                     f = s.function
                 except AttributeError:
                     continue
                 if isinstance(f, AbstractFunction):
                     retval.append(f)
     if self.base is not None:
         retval.append(self.base.function)
     if self.retobj is not None:
         retval.append(self.retobj.function)
     return tuple(filter_ordered(retval))
示例#33
0
    def _create_call_graph(self):
        dag = DAG(nodes=['root'])
        queue = ['root']
        while queue:
            caller = queue.pop(0)
            callees = FindNodes(Call).visit(self.efuncs[caller])
            for callee in filter_ordered([i.name for i in callees]):
                if callee in self.efuncs:  # Exclude foreign Calls, e.g., MPI calls
                    try:
                        dag.add_node(callee)
                        queue.append(callee)
                    except KeyError:
                        # `callee` already in `dag`
                        pass
                    dag.add_edge(callee, caller)

        # Sanity check
        assert dag.size == len(self.efuncs)

        return dag
示例#34
0
def generate_block_shapes(blockable, args, level):
    if not blockable:
        raise ValueError

    # Max attemptable block shape
    max_bs = tuple((d.step.name, d.max_step.subs(args)) for d in blockable)

    # Attempted block shapes:
    # 1) Defaults (basic mode)
    ret = [tuple((d.step.name, v) for d in blockable) for v in options['blocksize']]
    # 2) Always try the entire iteration space (degenerate block)
    ret.append(max_bs)
    # 3) More attempts if auto-tuning in aggressive mode
    if level in ['aggressive', 'max']:
        # Ramp up to larger block shapes
        handle = tuple((i, options['blocksize'][-1]) for i, _ in ret[0])
        for i in range(3):
            new_bs = tuple((b, v*2) for b, v in handle)
            ret.insert(ret.index(handle) + 1, new_bs)
            handle = new_bs

        handle = []
        # Extended shuffling for the smaller block shapes
        for bs in ret[:4]:
            for i in ret:
                handle.append(bs[:-1] + (i[-1],))
        # Some more shuffling for all block shapes
        for bs in list(ret):
            ncombs = len(bs)
            for i in range(ncombs):
                for j in combinations(dict(bs), i+1):
                    handle.append(tuple((b, v*2 if b in j else v) for b, v in bs))
        ret.extend(handle)

    # Drop unnecessary attempts:
    # 1) Block shapes exceeding the iteration space extent
    ret = [i for i in ret if all(dict(i)[k] <= v for k, v in max_bs)]
    # 2) Redundant block shapes
    ret = filter_ordered(ret)

    return ret
示例#35
0
def normalize_syncs(*args):
    if not args:
        return
    if len(args) == 1:
        return args[0]

    syncs = defaultdict(list)
    for _dict in args:
        for k, v in _dict.items():
            syncs[k].extend(v)

    syncs = {k: filter_ordered(v) for k, v in syncs.items()}

    for v in syncs.values():
        waitlocks = [i for i in v if i.is_WaitLock]
        withlocks = [i for i in v if i.is_WithLock]

        if waitlocks and withlocks:
            # We do not allow mixing up WaitLock and WithLock ops
            raise ValueError("Incompatible SyncOps")

    return syncs
示例#36
0
 def visit_Iteration(self, o, subs={}, offsets=defaultdict(set)):
     nodes = self.visit(o.children, subs=subs, offsets=offsets)
     if o.dim.is_Buffered:
         # For buffered dimensions insert the explicit
         # definition of buffered variables, eg. t+1 => t1
         init = []
         for i, off in enumerate(filter_ordered(offsets[o.dim])):
             vname = "%s%d" % (o.dim.name, i)
             value = o.dim.parent + off
             modulo = o.dim.modulo
             init += [
                 c.Initializer(c.Value('int', vname),
                               "(%s) %% %d" % (value, modulo))
             ]
             subs[o.dim + off] = LoweredDimension(vname, o.dim, off)
         # Always lower to symbol
         subs[o.dim.parent] = Symbol(o.dim.parent.name)
         # Insert block with modulo initialisations
         newnodes = (List(header=init, body=nodes[0]), )
         return o._rebuild(newnodes, index=o.dim.parent.name)
     else:
         return o._rebuild(*nodes)
示例#37
0
    def omapper(self):
        """
        Mapper describing the OWNED ('o'-mapper) region offset from the DOMAIN
        extremes, along each Dimension and DataSide.

        Examples
        --------
        Consider a HaloScheme comprising two one-dimensional Functions, ``u``
        and ``v``.  ``u``'s halo, on the LEFT and RIGHT DataSides respectively,
        is (2, 2), while ``v``'s is (4, 4). The situation is depicted below.

        .. code-block:: python

              xx**----------------**xx     u
            xxxx****------------****xxxx   v

        Where 'x' represents a HALO point, '*' a OWNED point, and '-' a CORE point.
        Together, '*' and '-' constitute the DOMAIN.

        In this example, the "cumulative" OWNED size is (4, 4), that is the max
        on each DataSide across all Functions, namely ``u`` and ``v``. Then, the
        ``omapper``, which provides *relative offsets*, not sizes, will be
        ``{d0: (4, -4)}``.

        Note that, for each Function, the 'x' and '*' are exactly the same on
        *all MPI ranks*, so the output of this method is guaranteed to be
        consistent across *all MPI ranks*.
        """
        mapper = {}
        for f, v in self.halos.items():
            dimensions = filter_ordered(flatten(i.dim for i in v))
            for d, s in zip(f.dimensions, f._size_owned):
                if d in dimensions:
                    mapper.setdefault(d, []).append(s)
        for k, v in list(mapper.items()):
            left, right = zip(*v)
            mapper[k] = (max(left), -max(right))
        return mapper
示例#38
0
 def functions(self):
     retval = []
     for i in self.arguments:
         if isinstance(i, (AbstractFunction, Indexed, LocalObject)):
             retval.append(i.function)
         elif isinstance(i, Call):
             retval.extend(i.functions)
         else:
             try:
                 v = i.free_symbols
             except AttributeError:
                 continue
             for s in v:
                 try:
                     # `try-except` necessary for e.g. Macro
                     if isinstance(s.function, AbstractFunction):
                         retval.append(s.function)
                 except AttributeError:
                     continue
     if self.base is not None:
         retval.append(self.base.function)
     if self.retobj is not None:
         retval.append(self.retobj.function)
     return tuple(filter_ordered(retval))
示例#39
0
文件: space.py 项目: opesci/devito
 def reorder(cls, items, relations):
     # The relations are between dimensions, not intervals. So we take
     # care of that here
     ordering = filter_ordered(toposort(relations) + [i.dim for i in items])
     return sorted(items, key=lambda i: ordering.index(i.dim))
示例#40
0
文件: operator.py 项目: opesci/devito
 def input(self):
     ret = [i for i in self._input + list(self.parameters) if i.is_Input]
     return tuple(filter_ordered(ret))
示例#41
0
文件: operator.py 项目: opesci/devito
    def _specialize_iet(self, iet, **kwargs):
        """
        Transform the Iteration/Expression tree to offload the computation of
        one or more loop nests onto YASK. This involves calling the YASK compiler
        to generate YASK code. Such YASK code is then called from within the
        transformed Iteration/Expression tree.
        """
        mapper = {}
        self.yk_solns = OrderedDict()
        for n, (section, trees) in enumerate(find_affine_trees(iet).items()):
            dimensions = tuple(filter_ordered(i.dim.root for i in flatten(trees)))
            context = contexts.fetch(dimensions, self._dtype)

            # A unique name for the 'real' compiler and kernel solutions
            name = namespace['jit-soln'](Signer._digest(configuration,
                                                        *[i.root for i in trees]))

            # Create a YASK compiler solution for this Operator
            yc_soln = context.make_yc_solution(name)

            try:
                # Generate YASK grids and populate `yc_soln` with equations
                local_grids = yaskit(trees, yc_soln)

                # Build the new IET nodes
                yk_soln_obj = YaskSolnObject(namespace['code-soln-name'](n))
                funcall = make_sharedptr_funcall(namespace['code-soln-run'],
                                                 ['time'], yk_soln_obj)
                funcall = Offloaded(funcall, self._dtype)
                mapper[trees[0].root] = funcall
                mapper.update({i.root: mapper.get(i.root) for i in trees})  # Drop trees

                # Mark `funcall` as an external function call
                self._func_table[namespace['code-soln-run']] = MetaCall(None, False)

                # JIT-compile the newly-created YASK kernel
                yk_soln = context.make_yk_solution(name, yc_soln, local_grids)
                self.yk_solns[(dimensions, yk_soln_obj)] = yk_soln

                # Print some useful information about the newly constructed solution
                log("Solution '%s' contains %d grid(s) and %d equation(s)." %
                    (yc_soln.get_name(), yc_soln.get_num_grids(),
                     yc_soln.get_num_equations()))
            except NotImplementedError as e:
                log("Unable to offload a candidate tree. Reason: [%s]" % str(e))
        iet = Transformer(mapper).visit(iet)

        if not self.yk_solns:
            log("No offloadable trees found")

        # Some Iteration/Expression trees are not offloaded to YASK and may
        # require further processing to be executed in YASK, due to the differences
        # in storage layout employed by Devito and YASK
        yk_grid_objs = {i.name: YaskGridObject(i.name) for i in self._input
                        if i.from_YASK}
        yk_grid_objs.update({i: YaskGridObject(i) for i in self._local_grids})
        iet = make_grid_accesses(iet, yk_grid_objs)

        # Finally optimize all non-yaskized loops
        iet = super(OperatorYASK, self)._specialize_iet(iet, **kwargs)

        return iet
示例#42
0
文件: compiler.py 项目: opesci/devito
 def add_library_dirs(self, dirs):
     self.library_dirs = filter_ordered(self.library_dirs + list(as_tuple(dirs)))
示例#43
0
文件: compiler.py 项目: opesci/devito
 def add_ldflags(self, flags):
     self.ldflags = filter_ordered(self.ldflags + list(as_tuple(flags)))
示例#44
0
文件: analysis.py 项目: opesci/devito
def mark_iteration_parallel(analysis):
    """
    Update the ``analysis`` detecting the SEQUENTIAL and PARALLEL Iterations
    within ``analysis.iet``.
    """
    properties = OrderedDict()
    for tree in analysis.trees:
        for depth, i in enumerate(tree):
            if properties.get(i) is SEQUENTIAL:
                # Speed-up analysis
                continue

            if i.uindices:
                # Only ++/-- increments of iteration variables are supported
                properties.setdefault(i, []).append(SEQUENTIAL)
                continue

            # Get all dimensions up to and including Iteration /i/, grouped by Iteration
            dims = [filter_ordered(j.dimensions) for j in tree[:depth + 1]]
            # Get all dimensions up to and including Iteration /i-1/
            prev = flatten(dims[:-1])
            # Get all dimensions up to and including Iteration /i/
            dims = flatten(dims)

            # The i-th Iteration is PARALLEL if for all dependences (d_1, ..., d_n):
            # test0 := (d_1, ..., d_{i-1}) > 0, OR
            # test1 := (d_1, ..., d_i) = 0
            is_parallel = True

            # The i-th Iteration is PARALLEL_IF_ATOMIC if for all dependeces:
            # test0 OR test1 OR the write is an associative and commutative increment
            is_atomic_parallel = True

            for dep in analysis.scopes[i].d_all:
                test1 = all(dep.is_indep(d) for d in dims)
                if test1:
                    continue

                test0 = len(prev) > 0 and any(dep.is_carried(d) for d in prev)
                if test0:
                    continue

                test2 = all(dep.is_reduce_atmost(d) for d in prev) and dep.is_indep(i.dim)
                if test2:
                    continue

                is_parallel = False
                if not dep.is_increment:
                    is_atomic_parallel = False
                    break

            if is_parallel:
                properties.setdefault(i, []).append(PARALLEL)
            elif is_atomic_parallel:
                properties.setdefault(i, []).append(PARALLEL_IF_ATOMIC)
            else:
                properties.setdefault(i, []).append(SEQUENTIAL)

    # Reduction (e.g, SEQUENTIAL takes priority over PARALLEL)
    priorities = {PARALLEL: 0, PARALLEL_IF_ATOMIC: 1, SEQUENTIAL: 2}
    properties = OrderedDict([(k, max(v, key=lambda i: priorities[i]))
                              for k, v in properties.items()])

    analysis.update(properties)
示例#45
0
def autotune(operator, args, level, mode):
    """
    Operator autotuning.

    Parameters
    ----------
    operator : Operator
        Input Operator.
    args : dict_like
        The runtime arguments with which `operator` is run.
    level : str
        The autotuning aggressiveness (basic, aggressive, max). A more
        aggressive autotuning might eventually result in higher runtime
        performance, but the autotuning phase will take longer.
    mode : str
        The autotuning mode (preemptive, runtime). In preemptive mode, the
        output runtime values supplied by the user to `operator.apply` are
        replaced with shadow copies.
    """
    key = [level, mode]
    accepted = configuration._accepted['autotuning']
    if key not in accepted:
        raise ValueError("The accepted `(level, mode)` combinations are `%s`; "
                         "provided `%s` instead" % (accepted, key))

    # We get passed all the arguments, but the cfunction only requires a subset
    at_args = OrderedDict([(p.name, args[p.name]) for p in operator.parameters])

    # User-provided output data won't be altered in `preemptive` mode
    if mode == 'preemptive':
        output = {i.name: i for i in operator.output}
        copies = {k: output[k]._C_as_ndarray(v).copy()
                  for k, v in args.items() if k in output}
        # WARNING: `copies` keeps references to numpy arrays, which is required
        # to avoid garbage collection to kick in during autotuning and prematurely
        # free the shadow copies handed over to C-land
        at_args.update({k: output[k]._C_make_dataobj(v) for k, v in copies.items()})

    # Disable halo exchanges through MPI_PROC_NULL
    if mode in ['preemptive', 'destructive']:
        for p in operator.parameters:
            if isinstance(p, MPINeighborhood):
                at_args.update(MPINeighborhood(p.fields)._arg_values())
                for i in p.fields:
                    setattr(at_args[p.name]._obj, i, MPI.PROC_NULL)
            elif isinstance(p, MPIMsgEnriched):
                at_args.update(MPIMsgEnriched(p.name, p.function, p.halos)._arg_values())
                for i in at_args[p.name]:
                    i.fromrank = MPI.PROC_NULL
                    i.torank = MPI.PROC_NULL

    roots = [operator.body] + [i.root for i in operator._func_table.values()]
    trees = filter_ordered(retrieve_iteration_tree(roots), key=lambda i: i.root)

    # Detect the time-stepping Iteration; shrink its iteration range so that
    # each autotuning run only takes a few iterations
    steppers = {i for i in flatten(trees) if i.dim.is_Time}
    if len(steppers) == 0:
        stepper = None
        timesteps = 1
    elif len(steppers) == 1:
        stepper = steppers.pop()
        timesteps = init_time_bounds(stepper, at_args)
        if timesteps is None:
            return args, {}
    else:
        warning("cannot perform autotuning unless there is one time loop; skipping")
        return args, {}

    # Perform autotuning
    timings = {}
    for n, tree in enumerate(trees):
        blockable = [i.dim for i in tree if isinstance(i.dim, BlockDimension)]

        # Tunable arguments
        try:
            tunable = []
            tunable.append(generate_block_shapes(blockable, args, level))
            tunable.append(generate_nthreads(operator.nthreads, args, level))
            tunable = list(product(*tunable))
        except ValueError:
            # Some arguments are cumpolsory, otherwise autotuning is skipped
            continue

        # Symbolic number of loop-blocking blocks per thread
        nblocks_per_thread = calculate_nblocks(tree, blockable) / operator.nthreads

        for bs, nt in tunable:
            # Can we safely autotune over the given time range?
            if not check_time_bounds(stepper, at_args, args, mode):
                break

            # Update `at_args` to use the new tunable arguments
            run = [(k, v) for k, v in bs + nt if k in at_args]
            at_args.update(dict(run))

            # Drop run if not at least one block per thread
            if not configuration['develop-mode'] and nblocks_per_thread.subs(at_args) < 1:
                continue

            # Make sure we remain within stack bounds, otherwise skip run
            try:
                stack_footprint = operator._mem_summary['stack']
                if int(evaluate(stack_footprint, **at_args)) > options['stack_limit']:
                    continue
            except TypeError:
                warning("couldn't determine stack size; skipping run %s" % str(i))
                continue
            except AttributeError:
                assert stack_footprint == 0

            # Run the Operator
            operator.cfunction(*list(at_args.values()))
            elapsed = operator._profiler.timer.total

            timings.setdefault(nt, OrderedDict()).setdefault(n, {})[bs] = elapsed
            log("run <%s> took %f (s) in %d timesteps" %
                (','.join('%s=%s' % i for i in run), elapsed, timesteps))

            # Prepare for the next autotuning run
            update_time_bounds(stepper, at_args, timesteps, mode)

            # Reset profiling timers
            operator._profiler.timer.reset()

    # The best variant is the one that for a given number of threads had the minium
    # turnaround time
    try:
        runs = 0
        mapper = {}
        for k, v in timings.items():
            for i in v.values():
                runs += len(i)
                record = mapper.setdefault(k, Record())
                record.add(min(i, key=i.get), min(i.values()))
        best = min(mapper, key=mapper.get)
        best = OrderedDict(best + tuple(mapper[best].args))
        best.pop(None, None)
        log("selected <%s>" % (','.join('%s=%s' % i for i in best.items())))
    except ValueError:
        warning("couldn't perform any runs")
        return args, {}

    # Update the argument list with the tuned arguments
    args.update(best)

    # In `runtime` mode, some timesteps have been executed already, so we must
    # adjust the time range
    finalize_time_bounds(stepper, at_args, args, mode)

    # Autotuning summary
    summary = {}
    summary['runs'] = runs
    summary['tpr'] = timesteps  # tpr -> timesteps per run
    summary['tuned'] = dict(best)

    return args, summary
示例#46
0
文件: space.py 项目: opesci/devito
 def dimensions(self):
     return filter_ordered([i.dim for i in self])
示例#47
0
文件: space.py 项目: opesci/devito
 def dimensions(self):
     return filter_ordered(self.intervals.dimensions)
示例#48
0
 def indices(self):
     return tuple(filter_ordered(flatten(getattr(i, 'indices', ())
                                         for i in self._args_diff)))
示例#49
0
文件: space.py 项目: opesci/devito
 def dimensions(self):
     sub_dims = [i.parent for v in self.sub_iterators.values() for i in v]
     return filter_ordered(self.intervals.dimensions + sub_dims)
示例#50
0
文件: compiler.py 项目: opesci/devito
 def add_include_dirs(self, dirs):
     self.include_dirs = filter_ordered(self.include_dirs + list(as_tuple(dirs)))
示例#51
0
文件: compiler.py 项目: opesci/devito
 def add_libraries(self, libs):
     self.libraries = filter_ordered(self.libraries + list(as_tuple(libs)))