Пример #1
0
def test_same_input(rsds_env):
    url = rsds_env.start([1])
    client = Client(url)

    f1 = client.submit(comp_fn1, 10)
    f2 = client.submit(comp_fn2, f1, f1)
    r2 = client.gather(f2)
    assert r2 == 0
Пример #2
0
def test_recompute_existing(rsds_env):
    url = rsds_env.start([1])
    client = Client(url)

    # assert delayed_fn1(10).compute() == 100
    # assert delayed_fn1(10).compute() == 100

    f1 = client.submit(comp_fn1, 10)
    f2 = client.submit(comp_fn1, 10)
    r1, r2 = client.gather([f1, f2])
    assert r1 == 100
    assert r2 == 100
Пример #3
0
 def _sample_sync(
     self, label: str | None, client: Client, measure: str, interval: float
 ):
     key = client.sync(
         client.scheduler.memory_sampler_start,
         client=client.id,
         measure=measure,
         interval=interval,
     )
     try:
         yield
     finally:
         samples = client.sync(client.scheduler.memory_sampler_stop, key=key)
         self.samples[label or key] = samples
Пример #4
0
    def _fetch_resources(self):
        """Retrieve worker resources."""
        client = Client(self._scheduler_address, timeout=30)
        while True:
            worker_memory = client.run(_process_memory)
            with self._lock:
                for worker, mem in worker_memory.items():
                    self._add_memory(worker, mem)

            worker_cpu = client.run(_process_cpu)
            with self._lock:
                for worker, cpu in worker_cpu.items():
                    self._add_cpu(worker, cpu)

            sleep(self.update_freq)
Пример #5
0
 def __init__(self, name=None, client=None, maxsize=0):
     try:
         self.client = client or Client.current()
     except ValueError:
         # Initialise new client
         self.client = get_worker().client
     self.name = name or "variable-" + uuid.uuid4().hex
Пример #6
0
def test_unsupported_arguments(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            with pytest.raises(TypeError) as excinfo:
                c.get_executor(workers=[b['address']], foo=1, bar=2)
            assert ("unsupported arguments to ClientExecutor: ['bar', 'foo']"
                    in str(excinfo.value))
Пример #7
0
 async def try_connect(port):
     with contextlib.suppress(OSError):
         async with Client(f"tcp://localhost:{port}",
                           asynchronous=True,
                           timeout=0.5):
             return port
     return
Пример #8
0
def test_progressbar_done(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            L = [c.submit(inc, i) for i in range(5)]
            wait(L)
            p = ProgressWidget(L)
            sync(loop, p.listen)
            assert p.status == 'finished'
            assert p.bar.value == 1.0
            assert p.bar.bar_style == 'success'
            assert 'Finished' in p.elapsed_time.value

            f = c.submit(throws, L)
            wait([f])

            p = ProgressWidget([f])
            sync(loop, p.listen)
            assert p.status == 'error'
            assert p.bar.value == 0.0
            assert p.bar.bar_style == 'danger'
            assert 'Exception' in p.elapsed_time.value

            try:
                throws(1)
            except Exception as e:
                assert repr(e) in p.elapsed_time.value
Пример #9
0
def test_cancellation(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            with c.get_executor(pure=False) as e:
                fut = e.submit(time.sleep, 2.0)
                assert number_of_processing_tasks(c) > 0
                assert not fut.done()
                fut.cancel()
                assert fut.cancelled()
                assert number_of_processing_tasks(c) == 0
                with pytest.raises(CancelledError):
                    fut.result()

            # With wait()
            with c.get_executor(pure=False) as e:
                N = 10
                fs = [e.submit(slowinc, i, delay=0.02) for i in range(N)]
                fs[3].cancel()
                res = wait(fs, return_when=FIRST_COMPLETED)
                assert len(res.not_done) > 0
                assert len(res.done) >= 1

                assert fs[3] in res.done
                assert fs[3].cancelled()

            # With as_completed()
            with c.get_executor(pure=False) as e:
                N = 10
                fs = [e.submit(slowinc, i, delay=0.02) for i in range(N)]
                fs[3].cancel()
                fs[8].cancel()

                n_cancelled = sum(f.cancelled() for f in as_completed(fs))
                assert n_cancelled == 2
Пример #10
0
def test_scheduler_equals_client(loop):
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop) as client:
            x = delayed(lambda: 1)()
            assert x.compute(scheduler=client) == 1
            assert client.run_on_scheduler(
                lambda dask_scheduler: dask_scheduler.story(x.key))
Пример #11
0
def test_shutdown(loop):
    with cluster(active_rpc_timeout=10) as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            # shutdown(wait=True) waits for pending tasks to finish
            e = c.get_executor()
            fut = e.submit(time.sleep, 1.0)
            t1 = time.time()
            e.shutdown()
            dt = time.time() - t1
            assert 0.5 <= dt <= 2.0
            time.sleep(0.1)  # wait for future outcome to propagate
            assert fut.done()
            fut.result()  # doesn't raise

            with pytest.raises(RuntimeError):
                e.submit(time.sleep, 1.0)

            # shutdown(wait=False) cancels pending tasks
            e = c.get_executor()
            fut = e.submit(time.sleep, 2.0)
            t1 = time.time()
            e.shutdown(wait=False)
            dt = time.time() - t1
            assert dt < 0.5
            time.sleep(0.1)  # wait for future outcome to propagate
            assert fut.cancelled()

            with pytest.raises(RuntimeError):
                e.submit(time.sleep, 1.0)
Пример #12
0
def test_retries(loop):
    args = [ZeroDivisionError("one"), ZeroDivisionError("two"), 42]

    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            with c.get_executor(retries=3, pure=False) as e:
                future = e.submit(varying(args))
                assert future.result() == 42

            with c.get_executor(retries=2) as e:
                future = e.submit(varying(args))
                result = future.result()
                assert result == 42

            with c.get_executor(retries=1) as e:
                future = e.submit(varying(args))
                with pytest.raises(ZeroDivisionError) as exc_info:
                    res = future.result()
                exc_info.match("two")

            with c.get_executor(retries=0) as e:
                future = e.submit(varying(args))
                with pytest.raises(ZeroDivisionError) as exc_info:
                    res = future.result()
                exc_info.match("one")
Пример #13
0
def test_map(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            with c.get_executor() as e:
                N = 10
                it = e.map(inc, range(N))
                expected = set(range(1, N + 1))
                for x in it:
                    expected.remove(x)
                assert not expected

            with c.get_executor(pure=False) as e:
                N = 10
                it = e.map(slowinc, range(N), [0.1] * N, timeout=0.4)
                results = []
                with pytest.raises(TimeoutError):
                    for x in it:
                        results.append(x)
                assert 2 <= len(results) < 7

            with c.get_executor(pure=False) as e:
                N = 10
                # Not consuming the iterator will cancel remaining tasks
                it = e.map(slowinc, range(N), [0.1] * N)
                for x in take(2, it):
                    pass
                # Some tasks still processing
                assert number_of_processing_tasks(c) > 0
                # Garbage collect the iterator => remaining tasks are cancelled
                del it
                assert number_of_processing_tasks(c) == 0
def main(n_to_pull=100):
    path = "audit"

    if os.path.exists(os.path.join(path, "_inspection_version.txt")):
        with open(os.path.join(path, "_inspection_version.txt")) as f:
            db_version = f.read()
    else:
        db_version = ""
    if db_version != complete_version and os.path.exists(path):
        shutil.rmtree(path)

    if not os.path.exists(path):
        os.makedirs(path)
    with open(os.path.join(path, "_inspection_version.txt"), "w") as f:
        f.write(complete_version)

    all_extracted_artifacts = web_interface.get_all_extracted_artifacts()
    existing_artifacts = glob.glob(f"{path}/**/*.json", recursive=True)
    existing_artifact_names = {
        k.partition("/")[2].replace(".json", "")
        for k in existing_artifacts
    }

    artifacts = sorted(
        list(set(all_extracted_artifacts) - set(existing_artifact_names)))

    # Don't have the artifacts in alphabetical order
    shuffle(artifacts)

    with Client(threads_per_worker=100):
        db.from_sequence(
            artifacts[:n_to_pull]).map(inner_loop_and_write).compute()
Пример #15
0
def user_scheduler(
    scheduler: Client,
    scenario: Scenario,
    backend: IUserManagerBackend,
    context: dict,
):
    """Schedules users inside user manager on events from scenario.

    Args:
        scheduler (Client): Dask client to start users
        scenario (Scenario): User Scenario
        user_manager_id (str): ID of this user manager
        backend (IUserManagerBackend): Backend implementation for user manager to use
        context (dict): Test context
    """

    while True:
        for user_id in backend.get_new_users():
            fut = scheduler.submit(
                user_runner,
                scenario=scenario,
                user_id=user_id,
                backend=backend.get_user_backend(user_id),
                context=context,
                pure=False,
            )

            # NOTE: may be better waiting for all futures to finish
            fire_and_forget(fut)

        backend.send_user_results()
        time.sleep(1)
Пример #16
0
def _get_client(client):
    if client is None:
        return _get_global_client()
    elif isinstance(client, Client):
        return client
    else:
        # e.g., connection string
        return Client(client)
Пример #17
0
def test_async_run(capsys):
    with Client():
        g = setup_graph()
        # persist assert en error because the given collection is not of type
        # dask.base.Base
        futures = g.run(keys=('cleaned_data', 'pool1'))
        data = g.results(futures).values()[0]
        assert data == 'cleaned_data'
Пример #18
0
def test_cluster(capsys):
    with Client():
        g = setup_graph()
        data = g.get([('analyzed_data', 'pool1'), ('analyzed_data', 'pool2')])
        assert isinstance(data, list)
        assert data == ['analyzed_cleaned_data', 'analyzed_cleaned_data']
        out, err = capsys.readouterr()
        assert not out
Пример #19
0
 def __setstate__(self, state):
     name, address = state
     try:
         client = get_client(address)
         assert client.scheduler.address == address
     except (AttributeError, AssertionError):
         client = Client(address, set_as_default=False)
     self.__init__(name=name, client=client)
Пример #20
0
def test_futures_to_delayed_bag(loop):
    db = pytest.importorskip("dask.bag")
    L = [1, 2, 3]
    with cluster() as (s, [a, b]):
        with Client(s["address"], loop=loop) as c:
            futures = c.scatter([L, L])
            b = db.from_delayed(futures)
            assert list(b) == L + L
Пример #21
0
    def run_user(
        self,
        scenario_name: str,
        user_manager_id: str,
        backend_address: str,
        encoded_context: str,
    ):
        """Startup function when user is started. Runs scenario user loop.

        Args:
            scenario_name (str): Name of scenario being run
            user_manager_id (str): Unique ID of user manager assigned by scenario
            backend_address (str): Address of backend client to receive work and save results
            encoded_context (str): Context from test containing previous results
        """
        scenario = self.scenarios[scenario_name]
        context = decode_context(encoded_context)
        client = Client()

        # Create buffer actor
        buffer_fut = client.submit(
            UserBufferActor,
            user_manager_id=user_manager_id,
            backend_address=backend_address,
            actor=True,
        )

        buffer = buffer_fut.result()

        fire_and_forget(buffer_fut)

        backend = UserManagerBackend(
            user_manager_id=user_manager_id,
            buffer=buffer,
            address=backend_address,
        )

        atexit.register(lambda: backend.send_user_results())

        user_scheduler(
            client,
            scenario,
            backend,
            context,
        )
Пример #22
0
 def __init__(self, name=None, client=None):
     try:
         self.client = client or Client.current()
     except ValueError:
         # Initialise new client
         self.client = get_worker().client
     self.name = name or "lock-" + uuid.uuid4().hex
     self.id = uuid.uuid4().hex
     self._locked = False
Пример #23
0
def test_fast(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            L = c.map(inc, range(100))
            L2 = c.map(dec, L)
            L3 = c.map(add, L, L2)
            p = progress(L3, multi=True, complete=True, notebook=True)
            sync(loop, p.listen)
            assert set(p._last_response['all']) == {'inc', 'dec', 'add'}
Пример #24
0
 async def try_connect(port):
     with contextlib.suppress(OSError, asyncio.TimeoutError):
         async with Client(
                 f"tcp://localhost:{port}",
                 asynchronous=True,
                 timeout=1,  # Minimum of 1 for Windows
         ):
             return port
     return
Пример #25
0
def test_more_clients(rsds_env):
    url = rsds_env.start([1])
    client1 = Client(url)
    client2 = Client(url)

    f1 = client1.submit(comp_fn1, 10)
    f2 = client2.submit(comp_fn1, 20)
    r2 = client2.gather(f2)
    r1 = client1.gather(f1)
    assert r1 == 100
    assert r2 == 200
Пример #26
0
def test_scatter(rsds_env):
    url = rsds_env.start([1])

    client = Client(url)
    client.wait_for_workers(1)

    metadata = client.scheduler_info()
    worker = list(metadata["workers"].keys())[0]
    futures = client.scatter(range(10), workers=[worker])
    fut = client.submit(comp_fn3, futures)
    assert client.gather(fut) == list(range(1, 11))
Пример #27
0
 def test_dask_cv_single(self):
     test_cluster = LocalCluster(1)
     test_client = Client(test_cluster)
     iris = load_iris()
     reg = tree.DecisionTreeClassifier()
     cv_score = test_client.submit(cross_val_score, reg, iris.data,
                                   iris.target)
     self.assertGreater(cv_score.result().mean(), 0)
     test_cluster.scale_up(4)
     _cv_results = {
         'reg_%i':
         test_client.submit(cross_val_score,
                            tree.DecisionTreeClassifier(min_samples_leaf=i),
                            iris.data, iris.target)
         for i in range(5)
     }
     cv_results = test_client.gather(list(_cv_results.values()))
     for cv_result in cv_results:
         self.assertGreaterEqual(cv_result.mean(), 0)
Пример #28
0
def test_long_chain(rsds_env):
    url = rsds_env.start([1])
    client = Client(url)

    t = delayed_fn1(1)
    for _ in range(10):
        t = delayed_fn1(t)

    r = t.compute()
    assert r == 100_000_000_000
Пример #29
0
def test_workers(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            N = 10
            with c.get_executor(workers=[b['address']]) as e:
                fs = [e.submit(slowinc, i) for i in range(N)]
                wait(fs)
                has_what = c.has_what()
                assert not has_what.get(a['address'])
                assert len(has_what[b['address']]) == N
Пример #30
0
def test_progressbar_cancel(loop):
    with cluster() as (s, [a, b]):
        with Client(s['address'], loop=loop) as c:
            import time
            L = [c.submit(lambda: time.sleep(0.3), i) for i in range(5)]
            p = ProgressWidget(L)
            sync(loop, p.listen)
            L[-1].cancel()
            wait(L[:-1])
            assert p.status == 'error'
            assert p.bar.value == 0  # no tasks finish before cancel is called