def repeat(a, repeats, axis=None): if axis is None: if a.ndim == 1: axis = 0 else: raise NotImplementedError("Must supply an integer axis value") if not isinstance(repeats, Integral): raise NotImplementedError("Only integer valued repeats supported") if -a.ndim <= axis < 0: axis += a.ndim elif not 0 <= axis <= a.ndim - 1: raise ValueError("axis(=%d) out of bounds" % axis) if repeats == 0: return a[tuple( slice(None) if d != axis else slice(0) for d in range(a.ndim))] elif repeats == 1: return a cchunks = cached_cumsum(a.chunks[axis], initial_zero=True) slices = [] for c_start, c_stop in sliding_window(2, cchunks): ls = np.linspace(c_start, c_stop, repeats).round(0) for ls_start, ls_stop in sliding_window(2, ls): if ls_start != ls_stop: slices.append(slice(ls_start, ls_stop)) all_slice = slice(None, None, None) slices = [ (all_slice, ) * axis + (s, ) + (all_slice, ) * (a.ndim - axis - 1) for s in slices ] slabs = [a[slc] for slc in slices] out = [] for slab in slabs: chunks = list(slab.chunks) assert len(chunks[axis]) == 1 chunks[axis] = (chunks[axis][0] * repeats, ) chunks = tuple(chunks) result = slab.map_blocks(np.repeat, repeats, axis=axis, chunks=chunks, dtype=slab.dtype) out.append(result) return concatenate(out, axis=axis)
async def test_stress_steal(c, s, *workers): s.validate = False for w in workers: w.validate = False dinc = delayed(slowinc) L = [delayed(slowinc)(i, delay=0.005) for i in range(100)] for i in range(5): L = [ delayed(slowsum)(part, delay=0.005) for part in sliding_window(5, L) ] total = delayed(sum)(L) future = c.compute(total) while future.status != "finished": await asyncio.sleep(0.1) for i in range(3): a = random.choice(workers) b = random.choice(workers) if a is not b: s.work_steal(a.address, b.address, 0.5) if not s.processing: break
async def test_clean_nbytes(c, s, a, b): L = [delayed(inc)(i) for i in range(10)] for i in range(5): L = [delayed(add)(x, y) for x, y in sliding_window(2, L)] total = delayed(sum)(L) future = c.compute(total) await wait(future) await asyncio.sleep(1) assert len(a.nbytes) + len(b.nbytes) == 1
def split_at_breaks(array, breaks, axis=0): """ Split an array into a list of arrays (using slices) at the given breaks >>> split_at_breaks(np.arange(6), [3, 5]) [array([0, 1, 2]), array([3, 4]), array([5])] """ padded_breaks = concat([[None], breaks, [None]]) slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)] preslice = (slice(None), ) * axis split_array = [array[preslice + (s, )] for s in slices] return split_array
async def test_stress_scatter_death(c, s, *workers): import random s.allowed_failures = 1000 np = pytest.importorskip("numpy") L = await c.scatter([np.random.random(10000) for i in range(len(workers))]) await c.replicate(L, n=2) adds = [ delayed(slowadd, pure=True)( random.choice(L), random.choice(L), delay=0.05, dask_key_name="slowadd-1-%d" % i, ) for i in range(50) ] adds = [ delayed(slowadd, pure=True)(a, b, delay=0.02, dask_key_name="slowadd-2-%d" % i) for i, (a, b) in enumerate(sliding_window(2, adds)) ] futures = c.compute(adds) L = adds = None alive = list(workers) from distributed.scheduler import logger for i in range(7): await asyncio.sleep(0.1) try: s.validate_state() except Exception as c: logger.exception(c) if config.get("log-on-err"): import pdb pdb.set_trace() else: raise w = random.choice(alive) await w.close() alive.remove(w) with suppress(CancelledError): await c.gather(futures) futures = None
async def test_steal_related_tasks(e, s, a, b, c): futures = e.map( slowinc, range(20), delay=0.05, workers=a.address, allow_other_workers=True ) await wait(futures) nearby = 0 for f1, f2 in sliding_window(2, futures): if s.who_has[f1.key] == s.who_has[f2.key]: nearby += 1 assert nearby > 10
async def read(self, deserializers=None): stream = self.stream if stream is None: raise CommClosedError() fmt = "Q" fmt_size = struct.calcsize(fmt) try: frames_nbytes = await stream.read_bytes(fmt_size) (frames_nbytes, ) = struct.unpack(fmt, frames_nbytes) frames = host_array(frames_nbytes) for i, j in sliding_window( 2, range(0, frames_nbytes + OPENSSL_MAX_CHUNKSIZE, OPENSSL_MAX_CHUNKSIZE), ): chunk = frames[i:j] chunk_nbytes = len(chunk) n = await stream.read_into(chunk) assert n == chunk_nbytes, (n, chunk_nbytes) except StreamClosedError as e: self.stream = None self._closed = True if not sys.is_finalizing(): convert_stream_closed_error(self, e) except Exception: # Some OSError or a another "low-level" exception. We do not really know what # was already read from the underlying socket, so it is not even safe to retry # here using the same stream. The only safe thing to do is to abort. # (See also GitHub #4133). self.abort() raise else: try: frames = unpack_frames(frames) msg = await from_frames( frames, deserialize=self.deserialize, deserializers=deserializers, allow_offload=self.allow_offload, ) except EOFError: # Frames possibly garbled or truncated by communication error self.abort() raise CommClosedError("aborted stream on truncated data") return msg