Пример #1
0
def repeat(a, repeats, axis=None):
    if axis is None:
        if a.ndim == 1:
            axis = 0
        else:
            raise NotImplementedError("Must supply an integer axis value")

    if not isinstance(repeats, Integral):
        raise NotImplementedError("Only integer valued repeats supported")

    if -a.ndim <= axis < 0:
        axis += a.ndim
    elif not 0 <= axis <= a.ndim - 1:
        raise ValueError("axis(=%d) out of bounds" % axis)

    if repeats == 0:
        return a[tuple(
            slice(None) if d != axis else slice(0) for d in range(a.ndim))]
    elif repeats == 1:
        return a

    cchunks = cached_cumsum(a.chunks[axis], initial_zero=True)
    slices = []
    for c_start, c_stop in sliding_window(2, cchunks):
        ls = np.linspace(c_start, c_stop, repeats).round(0)
        for ls_start, ls_stop in sliding_window(2, ls):
            if ls_start != ls_stop:
                slices.append(slice(ls_start, ls_stop))

    all_slice = slice(None, None, None)
    slices = [
        (all_slice, ) * axis + (s, ) + (all_slice, ) * (a.ndim - axis - 1)
        for s in slices
    ]

    slabs = [a[slc] for slc in slices]

    out = []
    for slab in slabs:
        chunks = list(slab.chunks)
        assert len(chunks[axis]) == 1
        chunks[axis] = (chunks[axis][0] * repeats, )
        chunks = tuple(chunks)
        result = slab.map_blocks(np.repeat,
                                 repeats,
                                 axis=axis,
                                 chunks=chunks,
                                 dtype=slab.dtype)
        out.append(result)

    return concatenate(out, axis=axis)
Пример #2
0
async def test_stress_steal(c, s, *workers):
    s.validate = False
    for w in workers:
        w.validate = False

    dinc = delayed(slowinc)
    L = [delayed(slowinc)(i, delay=0.005) for i in range(100)]
    for i in range(5):
        L = [
            delayed(slowsum)(part, delay=0.005)
            for part in sliding_window(5, L)
        ]

    total = delayed(sum)(L)
    future = c.compute(total)

    while future.status != "finished":
        await asyncio.sleep(0.1)
        for i in range(3):
            a = random.choice(workers)
            b = random.choice(workers)
            if a is not b:
                s.work_steal(a.address, b.address, 0.5)
        if not s.processing:
            break
Пример #3
0
async def test_clean_nbytes(c, s, a, b):
    L = [delayed(inc)(i) for i in range(10)]
    for i in range(5):
        L = [delayed(add)(x, y) for x, y in sliding_window(2, L)]
    total = delayed(sum)(L)

    future = c.compute(total)
    await wait(future)

    await asyncio.sleep(1)
    assert len(a.nbytes) + len(b.nbytes) == 1
Пример #4
0
def split_at_breaks(array, breaks, axis=0):
    """ Split an array into a list of arrays (using slices) at the given breaks

    >>> split_at_breaks(np.arange(6), [3, 5])
    [array([0, 1, 2]), array([3, 4]), array([5])]
    """
    padded_breaks = concat([[None], breaks, [None]])
    slices = [slice(i, j) for i, j in sliding_window(2, padded_breaks)]
    preslice = (slice(None), ) * axis
    split_array = [array[preslice + (s, )] for s in slices]
    return split_array
Пример #5
0
async def test_stress_scatter_death(c, s, *workers):
    import random

    s.allowed_failures = 1000
    np = pytest.importorskip("numpy")
    L = await c.scatter([np.random.random(10000) for i in range(len(workers))])
    await c.replicate(L, n=2)

    adds = [
        delayed(slowadd, pure=True)(
            random.choice(L),
            random.choice(L),
            delay=0.05,
            dask_key_name="slowadd-1-%d" % i,
        ) for i in range(50)
    ]

    adds = [
        delayed(slowadd, pure=True)(a,
                                    b,
                                    delay=0.02,
                                    dask_key_name="slowadd-2-%d" % i)
        for i, (a, b) in enumerate(sliding_window(2, adds))
    ]

    futures = c.compute(adds)
    L = adds = None

    alive = list(workers)

    from distributed.scheduler import logger

    for i in range(7):
        await asyncio.sleep(0.1)
        try:
            s.validate_state()
        except Exception as c:
            logger.exception(c)
            if config.get("log-on-err"):
                import pdb

                pdb.set_trace()
            else:
                raise
        w = random.choice(alive)
        await w.close()
        alive.remove(w)

    with suppress(CancelledError):
        await c.gather(futures)

    futures = None
Пример #6
0
async def test_steal_related_tasks(e, s, a, b, c):
    futures = e.map(
        slowinc, range(20), delay=0.05, workers=a.address, allow_other_workers=True
    )

    await wait(futures)

    nearby = 0
    for f1, f2 in sliding_window(2, futures):
        if s.who_has[f1.key] == s.who_has[f2.key]:
            nearby += 1

    assert nearby > 10
Пример #7
0
    async def read(self, deserializers=None):
        stream = self.stream
        if stream is None:
            raise CommClosedError()

        fmt = "Q"
        fmt_size = struct.calcsize(fmt)

        try:
            frames_nbytes = await stream.read_bytes(fmt_size)
            (frames_nbytes, ) = struct.unpack(fmt, frames_nbytes)

            frames = host_array(frames_nbytes)
            for i, j in sliding_window(
                    2,
                    range(0, frames_nbytes + OPENSSL_MAX_CHUNKSIZE,
                          OPENSSL_MAX_CHUNKSIZE),
            ):
                chunk = frames[i:j]
                chunk_nbytes = len(chunk)
                n = await stream.read_into(chunk)
                assert n == chunk_nbytes, (n, chunk_nbytes)
        except StreamClosedError as e:
            self.stream = None
            self._closed = True
            if not sys.is_finalizing():
                convert_stream_closed_error(self, e)
        except Exception:
            # Some OSError or a another "low-level" exception. We do not really know what
            # was already read from the underlying socket, so it is not even safe to retry
            # here using the same stream. The only safe thing to do is to abort.
            # (See also GitHub #4133).
            self.abort()
            raise
        else:
            try:
                frames = unpack_frames(frames)

                msg = await from_frames(
                    frames,
                    deserialize=self.deserialize,
                    deserializers=deserializers,
                    allow_offload=self.allow_offload,
                )
            except EOFError:
                # Frames possibly garbled or truncated by communication error
                self.abort()
                raise CommClosedError("aborted stream on truncated data")
            return msg