def index_is_basic(idx): if is_integer(idx): return True elif isinstance(idx, (slice, np.ndarray)): return False else: return all(is_integer(i) or (i is NONLOCAL) for i in idx)
def __init__(self, dim, lower, upper, stamp=0): assert is_integer(lower) or isinstance(lower, Expr) assert is_integer(upper) or isinstance(upper, Expr) super(Interval, self).__init__(dim, stamp) self.lower = lower self.upper = upper self.size = (dim.extreme_max - dim.extreme_min + 1) + (upper - lower)
def index_dist_to_repl(idx, decomposition): """Convert a distributed array index into a replicated array index.""" if decomposition is None: return PROJECTED if is_integer(idx) else slice(None) # Derive shift value value = idx.start if isinstance(idx, slice) else idx if value is None: value = 0 elif not is_integer(value): raise ValueError("Cannot derive shift value from type `%s`" % type(value)) # Convert into absolute local index idx = decomposition.convert_index(idx, rel=False) if is_integer(idx): return PROJECTED elif idx is None: return NONLOCAL elif isinstance(idx, (tuple, list)): return [i - value for i in idx] elif isinstance(idx, np.ndarray): return idx - value elif isinstance(idx, slice): return slice(idx.start - value, idx.stop - value, idx.step) else: raise ValueError("Cannot apply shift to type `%s`" % type(idx))
def compute_local_indices(f, dims, ispace, scope): """ Map the Dimensions in ``dims`` to the local indices necessary to perform a halo exchange, as described in HaloScheme.__doc__. Examples -------- 1) u[t+1, x] = f(u[t, x]) => shift == 1 2) u[t-1, x] = f(u[t, x]) => shift == 1 3) u[t+1, x] = f(u[t+1, x]) => shift == 0 In the first and second cases, the x-halo should be inserted at `t`, while in the last case it should be inserted at `t+1`. """ loc_indices = {} for d in dims: try: func = Max if ispace.is_forward(d.root) else Min except KeyError: raise HaloSchemeException("Don't know how to build a HaloScheme as `%s` " "doesn't appear in `%s`" % (d, ispace)) if d.is_Stepping: candidates = {i[d].origin - d: i[d] for i in scope.getreads(f) if not is_integer(i[d])} else: candidates = {i[d] - d: i[d] for i in scope.getreads(f) if not is_integer(i[d])} loc_indices[d] = candidates[func(*candidates.keys())] return loc_indices
def index_dist_to_repl(idx, decomposition): """Convert a distributed array index into a replicated array index.""" if decomposition is None: return PROJECTED if is_integer(idx) else slice(None) # Derive shift value value = idx.start if isinstance(idx, slice) else idx if value is None: value = 0 elif not is_integer(value): raise ValueError("Cannot derive shift value from type `%s`" % type(value)) # Convert into absolute local index idx = decomposition.convert_index(idx, rel=False) if is_integer(idx): return PROJECTED elif idx is None: return NONLOCAL elif isinstance(idx, (tuple, list)): return [i - value for i in idx] elif isinstance(idx, np.ndarray): return idx - value elif isinstance(idx, slice): return slice(idx.start - value, idx.stop - value, idx.step) else: raise ValueError("Cannot apply shift to type `%s`" % type(idx))
def index_is_basic(idx): if is_integer(idx): return True elif isinstance(idx, (slice, np.ndarray)): return False else: return all(is_integer(i) or (i is NONLOCAL) for i in idx)
def __init__(self, dim, lower, upper): assert is_integer(lower) assert is_integer(upper) super(Interval, self).__init__(dim) self.lower = lower self.upper = upper self.min_size = abs(upper - lower) self.size = (dim.symbolic_max - dim.symbolic_min + 1) + self.min_size
def __init__(self, dim, lower, upper): assert is_integer(lower) assert is_integer(upper) super(Interval, self).__init__(dim) self.lower = lower self.upper = upper self.min_size = abs(upper - lower) self.size = (dim.symbolic_max - dim.symbolic_min + 1) + self.min_size
def _set_global_idx(self, val, idx, val_idx): """ Compute the global indices to which val (the locally stored data) correspond. """ data_loc_idx = as_tuple(val._index_glb_to_loc(val_idx)) data_glb_idx = [] # Convert integers to slices so that shape dims are preserved if is_integer(as_tuple(idx)[0]): data_glb_idx.append(slice(0, 1, 1)) for i, j in zip(data_loc_idx, val._decomposition): if not j.loc_empty: data_glb_idx.append(j.index_loc_to_glb(i)) else: data_glb_idx.append(None) mapped_idx = [] # Add any integer indices that were not present in `val_idx`. if len(as_list(idx)) > len(data_glb_idx): for index, value in enumerate(idx): if is_integer(value) and index > 0: data_glb_idx.insert(index, value) # Based on `data_glb_idx` the indices to which the locally stored data # block correspond can now be computed: for i, j, k in zip(data_glb_idx, as_tuple(idx), self._decomposition): if is_integer(j): mapped_idx.append(j) continue elif isinstance(j, slice) and j.start is None: norm = 0 elif isinstance(j, slice) and j.start is not None: if j.start >= 0: norm = j.start else: norm = j.start + k.glb_max + 1 else: norm = j if i is not None: if isinstance(j, slice) and j.step is not None: stop = j.step * i.stop + norm else: stop = i.stop + norm if i is not None: if isinstance(j, slice) and j.step is not None: mapped_idx.append( slice(j.step * i.start + norm, stop, j.step)) else: mapped_idx.append(slice(i.start + norm, stop, i.step)) else: mapped_idx.append(None) return as_tuple(mapped_idx)
def size(self): """ The Interval size, defined as the number of points iterated over through ``self.dim``, namely (dim.symbolic_max + upper - dim.symbolic_min - lower + 1) / dim.symbolic_incr Notes ----- The Interval size is typically a function of several symbols (e.g., `self.dim.symbolic_max`), and all such symbols must be mappable to actual numbers at `op.apply` time (i.e., the runtime values). When `self.dim` is an "unstructured Dimension", such as ModuloDimension, things can get nasty since the symbolic min/max/incr can literally be anything (any expression involving any Dimension/symbol/...), which makes it extremely complicated to numerically compute the size. However, the compiler only uses such unstructured Dimensions in well defined circumstances, which we explicitly handle here. Ultimately, therefore, we return a `size` that is made up of known symbols. """ if self.dim.is_Custom: # Special case 1) # May be caused by the performance option `cire-rotate=True` d = self.dim.symbolic_min assert d.is_Modulo n = d.parent.symbolic_size # Iteration 0: assert is_integer(d.symbolic_min) assert is_integer(d.symbolic_incr) assert is_integer(self.dim.symbolic_max) assert self.lower == self.upper == 0 npoints = self.dim.symbolic_max - d.symbolic_min + 1 # Iterations [1, ..., n-1]: assert d.symbolic_incr == self.dim.symbolic_max npoints += 1 * (n - 1) npoints /= n else: # Typically we end up here (Dimension, SubDimension, IncrDimension) assert not self.dim.is_Modulo assert not self.dim.is_Conditional upper_extreme = self.dim.symbolic_max + self.upper lower_extreme = self.dim.symbolic_min + self.lower npoints = (upper_extreme - lower_extreme + 1) return npoints / self.dim.symbolic_incr
def glb_to_rank(self, index): """ The MPI rank owning a given global index. Parameters ---------- index : int or list of ints The index, or list of indices, for which the owning MPI rank(s) is retrieved. """ if isinstance(index, (tuple, list)): if len(index) == 0: return None elif is_integer(index[0]): # `index` is a single point indices = [index] else: indices = index ret = [] for i in indices: assert len(i) == self.ndim found = False for r, j in enumerate(self.all_ranges): if all(v in j[d] for v, d in zip(i, self.dimensions)): ret.append(r) found = True break assert found return tuple(ret) if len(indices) > 1 else ret[0]
def _arg_check(self, args, size, interval): """ Raises ------ InvalidArgument If any of the ``self``-related runtime arguments in ``args`` will cause an out-of-bounds access. """ if self.min_name not in args: raise InvalidArgument("No runtime value for %s" % self.min_name) if interval.is_Defined and args[self.min_name] + interval.lower < 0: raise InvalidArgument("OOB detected due to %s=%d" % (self.min_name, args[self.min_name])) if self.max_name not in args: raise InvalidArgument("No runtime value for %s" % self.max_name) if interval.is_Defined: if is_integer(interval.upper): upper = interval.upper else: # Autopadding causes non-integer upper limit upper = interval.upper.subs(args) if args[self.max_name] + upper >= size: raise InvalidArgument("OOB detected due to %s=%d" % (self.max_name, args[self.max_name])) # Allow the specific case of max=min-1, which disables the loop if args[self.max_name] < args[self.min_name]-1: raise InvalidArgument("Illegal %s=%d < %s=%d" % (self.max_name, args[self.max_name], self.min_name, args[self.min_name])) elif args[self.max_name] == args[self.min_name]-1: debug("%s=%d and %s=%d might cause no iterations along Dimension %s", self.min_name, args[self.min_name], self.max_name, args[self.max_name], self.name)
def actions_from_init(cluster, prefix, actions): it = prefix[-1] d = it.dim try: pd = prefix[-2].dim except IndexError: pd = None # Prepare the data to instantiate a FetchUpdate SyncOp e = cluster.exprs[0] size = d.symbolic_size function = e.rhs.function fetch = e.rhs.indices[d] ifetch = fetch.subs(d, d.symbolic_min) fcond = None pfetch = None pcond = None target = e.lhs.function tstore = 0 # Sanity checks assert is_integer(size) actions[cluster].syncs[pd].append( FetchUpdate(d, size, function, fetch, ifetch, fcond, pfetch, pcond, target, tstore))
def is_on_device(maybe_symbol, gpu_fit, only_writes=False): """ True if all given Functions are allocated in the device memory, False otherwise. Parameters ---------- maybe_symbol : Indexed or Function or Node The inspected object. May be a single Indexed or Function, or even an entire piece of IET. gpu_fit : list of Function The Function's which are known to definitely fit in the device memory. This information is given directly by the user through the compiler option `gpu-fit` and is propagated down here through the various stages of lowering. only_writes : bool, optional Only makes sense if `maybe_symbol` is an IET. If True, ignore all Function's that do not appear on the LHS of at least one Expression. Defaults to False. """ try: functions = (maybe_symbol.function,) except AttributeError: assert maybe_symbol.is_Node iet = maybe_symbol functions = set(FindSymbols().visit(iet)) if only_writes: expressions = FindNodes(Expression).visit(iet) functions &= {i.write for i in expressions} fsave = [f for f in functions if f.is_TimeFunction and is_integer(f.save)] if 'all-fallback' in gpu_fit and fsave: warning("TimeFunction %s assumed to fit the GPU memory" % fsave) return True return all(f in gpu_fit for f in fsave)
def __new__(cls, expr, dimensions, **kwargs): dimensions = as_tuple(dimensions) if not dimensions: return expr for d in dimensions: try: if d.is_Dimension and is_integer(d.symbolic_size): continue except AttributeError: pass raise ValueError("Expected Dimension with numeric size, " "got `%s` instead" % d) if not expr.has_free(*dimensions): raise ValueError("All Dimensions `%s` must appear in `expr` " "as free variables" % str(dimensions)) for i in expr.find(IndexSum): for d in dimensions: if d in i.dimensions: raise ValueError("Dimension `%s` already appears in a " "nested tensor contraction" % d) obj = sympy.Expr.__new__(cls, expr, *dimensions) obj._expr = expr obj._dimensions = dimensions return obj
def loc_data_idx(loc_idx): """ Return tuple of slices containing the unflipped idx corresponding to loc_idx. By 'unflipped' we mean that if a slice has a negative step, we wish to retrieve the corresponding indices but not in reverse order. Examples -------- >>> loc_data_idx(slice(11, None, -3)) (slice(2, 12, 3),) """ retval = [] for i in as_tuple(loc_idx): if isinstance(i, slice) and i.step is not None and i.step == -1: if i.stop is None: retval.append(slice(0, i.start + 1, -i.step)) else: retval.append(slice(i.stop + 1, i.start + 1, -i.step)) elif isinstance(i, slice) and i.step is not None and i.step < -1: if i.stop is None: lmin = i.start while lmin >= 0: lmin += i.step retval.append(slice(lmin - i.step, i.start + 1, -i.step)) else: retval.append(slice(i.stop + 1, i.start + 1, -i.step)) elif is_integer(i): retval.append(slice(i, i + 1, 1)) else: retval.append(i) return as_tuple(retval)
def offset_from_centre(d, indices): if d in indices: p = d offset = d - min(indices) assert is_integer(offset) elif len(indices) == 1: p = indices[0] offset = 0 else: # E.g., `time/factor-1` and `time/factor+1` present among the # indices in `index_mapper`, but not `time/factor`. We reconstruct # `time/factor` -- the starting pointing at time_m or time_M assert len(indices) > 0 v = indices[0] try: p = sum(v.args[1:]) if not ((p - v).is_Integer or (p - v).is_Symbol): raise ValueError except (IndexError, ValueError): raise NotImplementedError("Cannot apply buffering with nonlinear " "index functions (found `%s`)" % v) try: # Start assuming e.g. `list(m) = [time - 1, time + 2]` offset = p - min(indices) except TypeError: # Actually, e.g. `list(m) = [time/factor - 1, time/factor + 2]` offset = p - vmin(*[Vector(i) for i in indices])[0] return p, offset
def distance(self, other): """ Compute the distance from ``self`` to ``other``. The distance is a reflexive, transitive, and anti-symmetric relation, which establishes a total ordering amongst Vectors. The distance is a function [Vector x Vector --> D]. D is a tuple of length equal to the Vector ``rank``. The i-th entry of D, D_i, indicates whether the i-th component of ``self``, self_i, precedes (< 0), equals (== 0), or succeeds (> 0) the i-th component of ``other``, other_i. In particular, the *absolute value* of D_i represents the number of integer points that exist between self_i and sink_i. Examples -------- | 3 | | 1 | | 2 | source = | 2 | , sink = | 4 | , distance => | -2 | | 1 | | 5 | | -4 | There are 2, 2, and 4 points between [3-2], [2-4], and [1-5], respectively. """ try: # Handle quickly the special (yet relevant) cases `other == 0` if is_integer(other) and other == 0: return self elif all(i == 0 for i in other) and self.rank == other.rank: return self except TypeError: pass return self - other
def glb_to_rank(self, index): """ Return the rank owning a given global index. :param index: A single domain index, or a list of domain indices. In the latter case, a list of corresponding ranks is returned. """ if isinstance(index, (tuple, list)): if len(index) == 0: return None elif is_integer(index[0]): # `index` is a single point indices = [index] else: indices = index ret = [] for i in indices: assert len(i) == self.ndim found = False for r, j in enumerate(self.all_ranges): if all(v in j[d] for v, d in zip(i, self.dimensions)): ret.append(r) found = True break assert found return tuple(ret) if len(indices) > 1 else ret[0]
def glb_to_rank(self, index): """ The MPI rank owning a given global index. Parameters ---------- index : int or list of ints The index, or list of indices, for which the owning MPI rank(s) is retrieved. """ if isinstance(index, (tuple, list)): if len(index) == 0: return None elif is_integer(index[0]): # `index` is a single point indices = [index] else: indices = index ret = [] for i in indices: assert len(i) == self.ndim found = False for r, j in enumerate(self.all_ranges): if all(v in j[d] for v, d in zip(i, self.dimensions)): ret.append(r) found = True break assert found return tuple(ret) if len(indices) > 1 else ret[0]
def __new__(cls, items, default=None): if not items: return None elif isinstance(items, bool): if not default: raise ValueError("Expected `default` value, got None") items = (ParTileArg(as_tuple(default)), ) elif isinstance(items, tuple): if not items: raise ValueError("Expected at least one value") # Normalize to tuple of ParTileArgs x = items[0] if is_integer(x): # E.g., (32, 4, 8) items = (ParTileArg(items), ) elif isinstance(x, Iterable): if not x: raise ValueError("Expected at least one value") try: y = items[1] if is_integer(y): # E.g., ((32, 4, 8), 1) # E.g., ((32, 4, 8), 1, 'tag') items = (ParTileArg(*items), ) else: try: # E.g., (((32, 4, 8), 1), ((32, 4, 4), 2)) # E.g., (((32, 4, 8), 1, 'tag0'), ((32, 4, 4), 2, 'tag1')) items = tuple(ParTileArg(*i) for i in items) except TypeError: # E.g., ((32, 4, 8), (32, 4, 4)) items = tuple(ParTileArg(i) for i in items) except IndexError: # E.g., ((32, 4, 8),) items = (ParTileArg(x), ) else: raise ValueError("Expected int or tuple, got %s instead" % type(x)) else: raise ValueError("Expected bool or tuple, got %s instead" % type(items)) return super().__new__(cls, items)
def __array_finalize__(self, obj): # `self` is the newly created object # `obj` is the object from which `self` was created if obj is None: # `self` was created through __new__() return self._distributor = None self._index_stash = None # Views or references created via operations on `obj` do not get an # explicit reference to the underlying data (`_memfree_args`). This makes sure # that only one object (the "root" Data) will free the C-allocated memory self._memfree_args = None if type(obj) != Data: # Definitely from view casting self._is_distributed = False self._modulo = tuple(False for i in range(self.ndim)) self._decomposition = (None, ) * self.ndim elif obj._index_stash is not None: # From `__getitem__` self._is_distributed = obj._is_distributed self._distributor = obj._distributor glb_idx = obj._normalize_index(obj._index_stash) self._modulo = tuple(m for i, m in zip(glb_idx, obj._modulo) if not is_integer(i)) decomposition = [] for i, dec in zip(glb_idx, obj._decomposition): if is_integer(i): continue elif dec is None: decomposition.append(None) else: decomposition.append(dec.reshape(i)) self._decomposition = tuple(decomposition) else: self._is_distributed = obj._is_distributed self._distributor = obj._distributor if self.ndim == obj.ndim: # E.g., from a ufunc, such as `np.add` self._modulo = obj._modulo self._decomposition = obj._decomposition else: # E.g., from a reduction operation such as `np.mean` or `np.all` self._modulo = tuple(False for i in range(self.ndim)) self._decomposition = (None, ) * self.ndim
def is_regular(self): # Note: what we do below is stronger than something along the lines of # `self.source.is_regular and self.sink.is_regular` # `source` and `sink` may be regular in isolation, but the dependence # itself could be irregular, as the two TimedAccesses may stem from # different iteration spaces. Instead if the distance is an integer # vector, it is guaranteed that the iteration space is the same return all(is_integer(i) for i in self.distance)
def callback(self, clusters, prefix): if not prefix: return clusters it = prefix[-1] d = it.dim actions = defaultdict(Actions) # Case 1 if d.is_Custom and is_integer(it.size): for c in clusters: candidates = self.key(c) if candidates: if is_memcpy(c): # Case 1A (special case, leading to more efficient streaming) actions_from_init(c, prefix, actions, memcpy=True) else: # Case 1B (actually, we expect to never end up here) raise NotImplementedError # Case 2 elif all(SEQUENTIAL in c.properties[d] for c in clusters): mapper = OrderedDict() for c in clusters: candidates = self.key(c) if candidates: if is_memcpy(c): mapper[c] = actions_from_update_memcpy else: mapper[c] = None # Case 2A (special case, leading to more efficient streaming) if all(i is actions_from_update_memcpy for i in mapper.values()): for c in mapper: actions_from_update_memcpy(c, clusters, prefix, actions) # Case 2B elif mapper: actions_from_unstructured(clusters, self.key, prefix, actions) # Perform the necessary actions; this will ultimately attach SyncOps to Clusters processed = [] for c in clusters: v = actions[c] if v.drop: assert not v.syncs continue elif v.syncs: processed.append(c.rebuild(syncs=normalize_syncs(c.syncs, v.syncs))) else: processed.append(c) if v.insert: processed.extend(v.insert) return processed
def __new__(cls, indexed, mode, timestamp, directions): assert is_integer(timestamp) obj = super(TimedAccess, cls).__new__(cls, indexed, mode) obj.timestamp = timestamp # We use `.root` as if a DerivedDimension is in `directions`, then so is # its parent, and the parent (root) direction cannot differ from that # of its child obj.directions = [directions.get(i.root, Any) for i in obj.findices] return obj
def __init_finalize__(self, name, _min, _max, spacing=None): self._spacing = sympy.sympify(spacing) or sympy.S.One if not is_integer(_min): raise ValueError("Expected integer `min` (got %s)" % _min) if not is_integer(_max): raise ValueError("Expected integer `max` (got %s)" % _max) if not is_integer(self._spacing): raise ValueError("Expected integer `spacing` (got %s)" % self._spacing) self._min = _min self._max = _max self._size = _max - _min + 1 if self._size < 1: raise ValueError("Expected size greater than 0 (got %s)" % self._size)
def __new__(cls, indexed, mode, timestamp, directions): assert is_integer(timestamp) obj = super(TimedAccess, cls).__new__(cls, indexed, mode) obj.timestamp = timestamp # We use `.root` as if a DerivedDimension is in `directions`, then so is # its parent, and the parent (root) direction cannot differ from that # of its child obj.directions = [directions.get(i.root, Any) for i in obj.findices] return obj
def __getitem__(self, key): if isinstance(key, slice) or is_integer(key): return super(IntervalGroup, self).__getitem__(key) if not self.is_well_defined: raise ValueError("Cannot fetch Interval from ill defined Space") for i in self: if i.dim is key: return i return NullInterval(key)
def __getitem__(self, key): if isinstance(key, slice) or is_integer(key): return super(IntervalGroup, self).__getitem__(key) if not self.is_well_defined: raise ValueError("Cannot fetch Interval from ill defined Space") for i in self: if i.dim is key: return i return NullInterval(key)
def __array_finalize__(self, obj): # `self` is the newly created object # `obj` is the object from which `self` was created if obj is None: # `self` was created through __new__() return self._index_stash = None # Views or references created via operations on `obj` do not get an # explicit reference to the underlying data (`_memfree_args`). This makes sure # that only one object (the "root" Data) will free the C-allocated memory self._memfree_args = None if type(obj) != Data: # Definitely from view casting self._is_distributed = False self._modulo = tuple(False for i in range(self.ndim)) self._decomposition = (None,)*self.ndim elif obj._index_stash is not None: # From `__getitem__` self._is_distributed = obj._is_distributed glb_idx = obj._normalize_index(obj._index_stash) self._modulo = tuple(m for i, m in zip(glb_idx, obj._modulo) if not is_integer(i)) decomposition = [] for i, dec in zip(glb_idx, obj._decomposition): if is_integer(i): continue elif dec is None: decomposition.append(None) else: decomposition.append(dec.reshape(i)) self._decomposition = tuple(decomposition) else: self._is_distributed = obj._is_distributed if self.ndim == obj.ndim: # E.g., from a ufunc, such as `np.add` self._modulo = obj._modulo self._decomposition = obj._decomposition else: # E.g., from a reduction operation such as `np.mean` or `np.all` self._modulo = tuple(False for i in range(self.ndim)) self._decomposition = (None,)*self.ndim
def __new__(cls, items, local): if len(items) == 0: raise ValueError("The decomposition must contain at least one subdomain") if not all(isinstance(i, Iterable) for i in items): raise TypeError("Illegal Decomposition element type") if not is_integer(local) and (0 <= local < len(items)): raise ValueError("`local` must be an index in ``items``.") obj = super(Decomposition, cls).__new__(cls, [np.array(i) for i in items]) obj._local = local return obj
def convert_index(idx, decomposition, mode='glb_to_loc'): """Convert a global index into a local index or vise versa according to mode.""" if is_integer(idx) or isinstance(idx, slice): return decomposition(idx, mode=mode) elif isinstance(idx, (tuple, list)): return [decomposition(i, mode=mode) for i in idx] elif isinstance(idx, np.ndarray): return np.vectorize(lambda i: decomposition(i, mode=mode))(idx) else: raise ValueError("Cannot convert index of type `%s` " % type(idx))
def index_mode(self): index_mode = [] for i, fi in zip(self, self.findices): if is_integer(i): index_mode.append('regular') elif q_affine(i, fi): index_mode.append('regular') else: index_mode.append('irregular') return tuple(index_mode)
def touched_halo(self, findex): """ Return a boolean 2-tuple, one entry for each ``findex`` DataSide. True means that the halo is touched along that DataSide. """ # If an irregularly (non-affine) accessed Dimension, conservatively # assume the halo will be touched if self.irregular(findex): return (True, True) d = self.aindices[findex] # If the iterator is *not* a distributed Dimension, then surely the halo # isn't touched try: if not d._maybe_distributed: return (False, False) except AttributeError: pass # If a constant (integer, symbolic expr) is used to index into `findex`, # there is actually nothing we can do -- the most likely scenario is that # it's accessing into a *local* SubDomain/SubDimension # TODO: make sure this is indeed the case if is_integer(self[findex]) or d not in self[findex].free_symbols: return (False, False) # Given `d`'s iteration Interval `d[m, M]`, we know that `d` iterates between # `d_m + m` and `d_M + M` m, M = self.intervals[d].offsets # If `m + (self[d] - d) < self.function._size_nodomain[d].left`, then `self` # will definitely touch the left-halo, at least when `d=0` size_nodomain_left = self.function._size_nodomain[findex].left try: touch_halo_left = bool(m + (self[findex] - d) < size_nodomain_left) except TypeError: # Two reasons we might end up here: # * `d` is a constant integer # * `m` is a symbol (e.g., a SubDimension-induced offset) # TODO: we could exploit the properties attached to `m` (if any), such # as `nonnegative` etc, to do something smarter than just # assuming, conservatively, `touch_halo_left = True` touch_halo_left = True # If `M + (self[d] - d) > self.function._size_nodomain[d].left`, then # `self` will definitely touch the right-halo, at least when `d=d_M` try: touch_halo_right = bool(M + (self[findex] - d) > size_nodomain_left) except TypeError: # See comments in the except block above touch_halo_right = True return (touch_halo_left, touch_halo_right)
def process(self, iet): sync_spots = FindNodes(SyncSpot).visit(iet) if not sync_spots: return iet, {} def key(s): # The SyncOps are to be processed in the following order return [ WaitLock, WithLock, Delete, FetchUpdate, FetchPrefetch, PrefetchUpdate, WaitPrefetch ].index(s) callbacks = { WaitLock: self._make_waitlock, WithLock: self._make_withlock, Delete: self._make_delete, FetchUpdate: self._make_fetchupdate, FetchPrefetch: self._make_fetchprefetch, PrefetchUpdate: self._make_prefetchupdate } postponed_callbacks = {WaitPrefetch: self._make_waitprefetch} all_callbacks = [callbacks, postponed_callbacks] pieces = namedtuple('Pieces', 'init finalize funcs objs')([], [], [], Objs()) # The processing is a two-step procedure; first, we apply the `callbacks`; # then, the `postponed_callbacks`, as these depend on objects produced by the # `callbacks` subs = {} for cbks in all_callbacks: for n in sync_spots: mapper = as_mapper(n.sync_ops, lambda i: type(i)) for _type in sorted(mapper, key=key): try: subs[n] = cbks[_type](subs.get(n, n), mapper[_type], pieces, iet) except KeyError: pass iet = Transformer(subs).visit(iet) # Add initialization and finalization code init = List(body=pieces.init, footer=c.Line()) finalize = List(header=c.Line(), body=pieces.finalize) body = iet.body._rebuild(body=(init, ) + iet.body.body + (finalize, )) iet = iet._rebuild(body=body) return iet, { 'efuncs': pieces.funcs, 'includes': ['pthread.h'], 'args': [i.size for i in pieces.objs.threads if not is_integer(i.size)] }
def index_glb_to_loc(idx, decomposition): """Convert a global index into a local index.""" if is_integer(idx) or isinstance(idx, slice): return decomposition(idx) elif isinstance(idx, (tuple, list)): return [decomposition(i) for i in idx] elif isinstance(idx, np.ndarray): return np.vectorize(lambda i: decomposition(i))(idx) else: raise ValueError("Cannot convert global index of type `%s` into a local index" % type(idx))
def aindices(self): aindices = [] for i, fi in zip(self, self.findices): if is_integer(i): aindices.append(None) elif q_affine(i, fi): aindices.append(fi) else: dims = {i for i in i.free_symbols if isinstance(i, Dimension)} aindices.append(dims.pop() if len(dims) == 1 else None) return tuple(aindices)
def index_glb_to_loc(idx, decomposition): """Convert a global index into a local index.""" if is_integer(idx) or isinstance(idx, slice): return decomposition(idx) elif isinstance(idx, (tuple, list)): return [decomposition(i) for i in idx] elif isinstance(idx, np.ndarray): return np.vectorize(lambda i: decomposition(i))(idx) else: raise ValueError( "Cannot convert global index of type `%s` into a local index" % type(idx))
def q_constant(expr): """ Return True if ``expr`` is a constant, possibly symbolic, value, False otherwise. Examples of non-constants are expressions containing Dimensions. """ if is_integer(expr): return True for i in expr.free_symbols: try: if not i.is_const: return False except AttributeError: return False return True
def index_apply_modulo(idx, modulo): if is_integer(idx): return idx % modulo elif isinstance(idx, slice): if idx.start is None: start = idx.start elif idx.start >= 0: start = idx.start % modulo else: start = -(idx.start % modulo) if idx.stop is None: stop = idx.stop elif idx.stop >= 0: stop = idx.stop % (modulo + 1) else: stop = -(idx.stop % (modulo + 1)) return slice(start, stop, idx.step) elif isinstance(idx, (tuple, list)): return [i % modulo for i in idx] elif isinstance(idx, np.ndarray): return idx else: raise ValueError("Cannot apply modulo to index of type `%s`" % type(idx))
def __new__(cls, *items): if not all(is_integer(i) or isinstance(i, Basic) for i in items): raise TypeError("Illegal Vector element type") return super(Vector, cls).__new__(cls, items)
def _time_buffering(self): return not is_integer(self.save)