def test_wait_on_collections(): colls, cnt = collections_with_node_counters() # Create a delayed that depends on a single one among all collections @delayed def f(x): pass colls2 = wait_on(*colls) f(colls2[0]).compute() assert cnt.n == 16 # dask.delayed assert colls2[0].compute() == colls[0].compute() # dask.array da.utils.assert_eq(colls2[1], colls[1]) da.utils.assert_eq(colls2[2], colls[2]) # dask.bag db.utils.assert_eq(colls2[3], colls[3]) db.utils.assert_eq(colls2[4], colls[4]) db.utils.assert_eq(colls2[5], colls[5]) # dask.dataframe dd.utils.assert_eq(colls2[6], colls[6]) dd.utils.assert_eq(colls2[7], colls[7]) dd.utils.assert_eq(colls2[8], colls[8]) dd.utils.assert_eq(colls2[9], colls[9])
def test_wait_on_many(): cnt = NodeCounter() dsk1 = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)} dsk2 = {"b": (cnt.f, 3)} out = wait_on(Tuple(dsk1, list(dsk1)), {"x": [Tuple(dsk2, list(dsk2))]}) assert dask.compute(*out, scheduler="sync") == ((1, 2), {"x": [(3,)]}) assert cnt.n == 3
def test_split_every(split_every, nkeys): dsk = {("a", i): i for i in range(100)} t1 = Tuple(dsk, list(dsk)) c = checkpoint(t1, split_every=split_every) assert len(c.__dask_graph__()) == nkeys assert c.compute(scheduler="sync") is None t2 = wait_on(t1, split_every=split_every) assert len(t2.__dask_graph__()) == nkeys + 100 assert t2.compute(scheduler="sync") == tuple(range(100)) dsk3 = {"b": 1, "c": 2} t3 = Tuple(dsk3, list(dsk3)) t4 = bind(t3, t1, split_every=split_every, assume_layers=False) assert len(t4.__dask_graph__()) == nkeys + 2 assert t4.compute(scheduler="sync") == (1, 2)
def test_wait_on_many(layers): t1, t2, cnt = demo_tuples(layers) out = wait_on(t1, {"x": [t2]}) assert dask.compute(*out, scheduler="sync") == ((1, 2, 3), {"x": [(4, 5)]}) assert cnt.n == 5
def test_wait_on_one(layers): t1, _, cnt = demo_tuples(layers) t1w = wait_on(t1) assert t1w.compute(scheduler="sync") == (1, 2, 3) assert cnt.n == 3
def test_wait_on_one(): cnt = NodeCounter() dsk = {("a", h1): (cnt.f, 1), ("a", h2): (cnt.f, 2)} t = wait_on(Tuple(dsk, list(dsk))) assert t.compute(scheduler="sync") == (1, 2) assert cnt.n == 2