Пример #1
0
async def test_resources_str(c, s, a, b):
    pd = pytest.importorskip("pandas")
    dd = pytest.importorskip("dask.dataframe")

    await a.set_resources(MyRes=1)

    x = dd.from_pandas(pd.DataFrame({"A": [1, 2], "B": [3, 4]}), npartitions=1)
    y = x.apply(lambda row: row.sum(), axis=1, meta=(None, "int64"))
    yy = y.persist(resources={"MyRes": 1})
    await wait(yy)

    ts_first = s.tasks[tokey(y.__dask_keys__()[0])]
    assert ts_first.resource_restrictions == {"MyRes": 1}
    ts_last = s.tasks[tokey(y.__dask_keys__()[-1])]
    assert ts_last.resource_restrictions == {"MyRes": 1}
Пример #2
0
 def put(self, stream=None, keys=None, data=None, name=None, client=None):
     with log_errors():
         if name in self.datasets:
             raise KeyError("Dataset %s already exists" % name)
         self.scheduler.client_desires_keys(keys, 'published-%s' % tokey(name))
         self.datasets[name] = {'data': data, 'keys': keys}
         return {'status':  'OK', 'name': name}
Пример #3
0
 def put(self, stream=None, keys=None, data=None, name=None, client=None):
     with log_errors():
         if name in self.datasets:
             raise KeyError("Dataset %s already exists" % name)
         self.scheduler.client_desires_keys(keys,
                                            "published-%s" % tokey(name))
         self.datasets[name] = {"data": data, "keys": keys}
         return {"status": "OK", "name": name}
Пример #4
0
def test_compute(c, s, a, b):
    da = pytest.importorskip("dask.array")
    x = da.random.random((10, 10), chunks=(5, 5))
    y = da.random.random((10, 10), chunks=(5, 5))

    low = c.compute(x, priority=-1)
    futures = c.map(slowinc, range(10), delay=0.1)
    high = c.compute(y, priority=1)
    yield wait(high)
    assert all(s.processing.values())
    assert s.tasks[tokey(low.key)].state in ("processing", "waiting")
Пример #5
0
def test_compute(c, s, a, b):
    da = pytest.importorskip('dask.array')
    x = da.random.random((10, 10), chunks=(5, 5))
    y = da.random.random((10, 10), chunks=(5, 5))

    low = c.compute(x, priority=-1)
    futures = c.map(slowinc, range(10), delay=0.1)
    high = c.compute(y, priority=1)
    yield wait(high)
    assert all(s.processing.values())
    assert s.tasks[tokey(low.key)].state in ('processing', 'waiting')
Пример #6
0
def test_persist(c, s, a, b):
    da = pytest.importorskip("dask.array")
    x = da.random.random((10, 10), chunks=(5, 5))
    y = da.random.random((10, 10), chunks=(5, 5))

    low = x.persist(priority=-1)
    futures = c.map(slowinc, range(10), delay=0.1)
    high = y.persist(priority=1)
    yield wait(high)
    assert all(s.processing.values())
    assert all(s.tasks[tokey(k)].state in ("processing", "waiting")
               for k in flatten(low.__dask_keys__()))
Пример #7
0
def test_persist_collections(c, s, a, b):
    da = pytest.importorskip('dask.array')
    x = da.arange(10, chunks=(5,))
    y = x.map_blocks(lambda x: x + 1)
    z = y.map_blocks(lambda x: 2 * x)
    w = z.sum()

    ww, yy = c.persist([w, y], resources={tuple(y.__dask_keys__()): {'A': 1}})

    yield wait([ww, yy])

    assert all(tokey(key) in a.data for key in y.__dask_keys__())
Пример #8
0
def test_persist(c, s, a, b):
    da = pytest.importorskip('dask.array')
    x = da.random.random((10, 10), chunks=(5, 5))
    y = da.random.random((10, 10), chunks=(5, 5))

    low = x.persist(priority=-1)
    futures = c.map(slowinc, range(10), delay=0.1)
    high = y.persist(priority=1)
    yield wait(high)
    assert all(s.processing.values())
    assert all(s.tasks[tokey(k)].state in ('processing', 'waiting')
               for k in flatten(low.__dask_keys__()))
Пример #9
0
def test_persist_collections(c, s, a, b):
    da = pytest.importorskip('dask.array')
    x = da.arange(10, chunks=(5,))
    y = x.map_blocks(lambda x: x + 1)
    z = y.map_blocks(lambda x: 2 * x)
    w = z.sum()

    ww, yy = c.persist([w, y], resources={tuple(y.__dask_keys__()): {'A': 1}})

    yield wait([ww, yy])

    assert all(tokey(key) in a.data for key in y.__dask_keys__())
Пример #10
0
def test_compute_multidim(c, s, a, b):
    da = pytest.importorskip('dask.array')
    np = pytest.importorskip('numpy')
    x = delayed(np.random.randint)(0, 10, (5, 5))
    y = da.from_delayed(x, (5, 5), int)

    xx = c.compute(
        x,
        resources={x: {
            'A': 1
        }},
    )
    yy = c.compute(
        tupl(y.__dask_keys__()),
        resources={x: {
            'A': 1
        }},
    )

    yield wait([xx, yy])

    assert all(tokey(key) in a.data for key in x.__dask_keys__())
    assert all(tokey(key) in a.data for key in y.__dask_keys__())
Пример #11
0
def key_stringify(task):
    """Convert all keys in `task` to strings.

    This is a fast version of distributed.utils.str_graph()
    that only handles keys of the from: `("a string", ...)`
    """
    from distributed.utils import tokey

    typ = type(task)
    if typ is tuple and task and callable(task[0]):
        return (task[0], ) + tuple(key_stringify(x) for x in task[1:])
    if typ is list:
        return [key_stringify(v) for v in task]
    if typ is dict:
        return {k: key_stringify(v) for k, v in task.items()}
    if typ is tuple and task and type(task[0]) is str:
        return tokey(task)
    elif typ is tuple:  # If the tuple itself isn't a key, check its elements
        return tuple(key_stringify(v) for v in task)
    return task
Пример #12
0
    def __dask_distributed_unpack__(cls, state, dsk, dependencies):
        from distributed.worker import dumps_task
        from distributed.utils import tokey

        # msgpack will convert lists into tuples, here
        # we convert them back to lists
        if isinstance(state["column"], tuple):
            state["column"] = list(state["column"])
        if "inputs" in state:
            state["inputs"] = list(state["inputs"])

        # Materialize the layer
        raw = dict(cls(**state))

        # Convert all keys to strings and dump tasks
        raw = {tokey(k): key_stringify(v) for k, v in raw.items()}
        dsk.update(toolz.valmap(dumps_task, raw))

        # TODO: use shuffle-knowledge to calculate dependencies more efficiently
        dependencies.update(
            {k: keys_in_tasks(dsk, [v], as_list=True)
             for k, v in raw.items()})
Пример #13
0
 def delete(self, stream=None, name=None):
     with log_errors():
         out = self.datasets.pop(name, {'keys': []})
         self.scheduler.client_releases_keys(out['keys'], 'published-%s' % tokey(name))
Пример #14
0
def test_pack_data_with_key_mapping():
    data = {tokey(('x', 1)): 1}
    assert pack_data((('x', 1), 'y'), data) == (1, 'y')
Пример #15
0
def test_pack_data_with_key_mapping():
    data = {tokey(('x', 1)): 1}
    assert pack_data((('x', 1), 'y'), data) == (1, 'y')
Пример #16
0
 def delete(self, stream=None, name=None):
     with log_errors():
         out = self.datasets.pop(name, {"keys": []})
         self.scheduler.client_releases_keys(out["keys"],
                                             "published-%s" % tokey(name))