def test_sync_error(loop_in_thread): loop = loop_in_thread try: result = sync(loop, throws, 1) except Exception as exc: f = exc assert 'hello' in str(exc) tb = get_traceback() L = traceback.format_tb(tb) assert any('throws' in line for line in L) def function1(x): return function2(x) def function2(x): return throws(x) try: result = sync(loop, function1, 1) except Exception as exc: assert 'hello' in str(exc) tb = get_traceback() L = traceback.format_tb(tb) assert any('function1' in line for line in L) assert any('function2' in line for line in L)
def test_nanny_worker_ports(loop): try: worker = Popen(['dworker', '127.0.0.1:8989', '--host', '127.0.0.1', '--worker-port', '8788', '--nanny-port', '8789'], stdout=PIPE, stderr=PIPE) sched = Popen(['dscheduler', '--port', '8989'], stdout=PIPE, stderr=PIPE) with Executor('127.0.0.1:8989', loop=loop) as e: start = time() while True: d = sync(e.loop, e.scheduler.identity) if d['workers']: break else: assert time() - start < 5 sleep(0.1) assert d['workers']['127.0.0.1:8788']['services']['nanny'] == 8789 finally: with ignoring(Exception): w = rpc('127.0.0.1:8789') sync(loop, w.terminate) with ignoring(Exception): os.kill(sched.pid, signal.SIGINT) with ignoring(Exception): worker.kill()
def test_sync_closed_loop(): loop = IOLoop.current() loop.close() IOLoop.clear_current() IOLoop.clear_instance() with pytest.raises(RuntimeError) as exc_info: sync(loop, inc, 1) exc_info.match("IOLoop is clos(ed|ing)")
def test_fast(loop): with cluster() as (s, [a, b]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: L = e.map(inc, range(100)) L2 = e.map(dec, L) L3 = e.map(add, L, L2) p = progress(L3, multi=True, complete=True, notebook=True) sync(loop, p.listen) assert set(p._last_response['all']) == {'inc', 'dec', 'add'}
def test_progressbar_cancel(loop): with cluster() as (s, [a, b]): with Client(('127.0.0.1', s['port']), loop=loop) as c: import time L = [c.submit(lambda: time.sleep(0.3), i) for i in range(5)] p = ProgressWidget(L) sync(loop, p.listen) L[-1].cancel() wait(L[:-1]) assert p.status == 'error' assert p.bar.value == 0 # no tasks finish before cancel is called
def worker_client(timeout=3): from distributed.utils import sync from distributed.worker import thread_state from distributed.worker_client import WorkerClient address = thread_state.execution_state['scheduler'] worker = thread_state.execution_state['worker'] with WorkerClient(address, loop=worker.loop) as wc: # Make sure connection errors are bubbled to the caller sync(wc.loop, wc._start, timeout=timeout) assert wc.status == 'running' yield wc
def test_local_tls(loop): from distributed.utils_test import tls_only_security security = tls_only_security() with LocalCluster(scheduler_port=8786, silence_logs=False, security=security, diagnostics_port=False, ip='tls://0.0.0.0', loop=loop) as c: sync(loop, assert_can_connect_from_everywhere_4, c.scheduler.port, connection_args=security.get_connection_args('client'), protocol='tls', timeout=3) # If we connect to a TLS localculster without ssl information we should fail sync(loop, assert_cannot_connect, addr='tcp://127.0.0.1:%d' % c.scheduler.port, connection_args=security.get_connection_args('client'), exception_class=RuntimeError, )
def test_directed_scatter_sync(loop): with cluster() as (s, [a, b]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: futures = e.scatter([1, 2, 3], workers=[('127.0.0.1', b['port'])]) has_what = sync(loop, e.scheduler.has_what) assert len(has_what[('127.0.0.1', b['port'])]) == 3 assert len(has_what[('127.0.0.1', a['port'])]) == 0
def test_nanny_worker_ports(loop): with popen(["dask-scheduler", "--port", "9359", "--no-dashboard"]) as sched: with popen([ "dask-worker", "127.0.0.1:9359", "--host", "127.0.0.1", "--worker-port", "9684", "--nanny-port", "5273", "--no-dashboard", ]) as worker: with Client("127.0.0.1:9359", loop=loop) as c: start = time() while True: d = sync(c.loop, c.scheduler.identity) if d["workers"]: break else: assert time() - start < 5 sleep(0.1) assert (d["workers"]["tcp://127.0.0.1:9684"]["nanny"] == "tcp://127.0.0.1:5273")
def test_directed_scatter_sync(loop): with cluster() as (c, [a, b]): with Executor(('127.0.0.1', c['port']), loop=loop) as e: e.scatter([1, 2, 3], workers=[('127.0.0.1', b['port'])]) has_what = sync(e.loop, e.center.has_what) assert len(has_what[('127.0.0.1', b['port'])]) == 3 assert len(has_what[('127.0.0.1', a['port'])]) == 0
def test_sync_inactive_loop(loop): @gen.coroutine def f(x): raise gen.Return(x + 1) y = sync(loop, f, 1) assert y == 2
def train(client, params, data, labels, **kwargs): """ Train an XGBoost model on a Dask Cluster This starts XGBoost on all Dask workers, moves input data to those workers, and then calls ``xgboost.train`` on the inputs. Parameters ---------- client: dask.distributed.Client params: dict Parameters to give to XGBoost (see xgb.Booster.train) data: dask array or dask.dataframe labels: dask.array or dask.dataframe **kwargs: Keywords to give to XGBoost Examples -------- >>> client = Client('scheduler-address:8786') # doctest: +SKIP >>> data = dd.read_csv('s3://...') # doctest: +SKIP >>> labels = data['outcome'] # doctest: +SKIP >>> del data['outcome'] # doctest: +SKIP >>> train(client, params, data, labels, **normal_kwargs) # doctest: +SKIP <xgboost.core.Booster object at ...> """ return sync(client.loop, _train, client, params, data, labels, **kwargs)
def test_restart_sync(loop): with cluster(nanny=True) as (s, [a, b]): with Client(s["address"], loop=loop) as c: x = c.submit(div, 1, 2) x.result() assert sync(loop, c.scheduler.who_has) c.restart() assert not sync(loop, c.scheduler.who_has) assert x.cancelled() assert len(c.nthreads()) == 2 with pytest.raises(CancelledError): x.result() y = c.submit(div, 1, 3) assert y.result() == 1 / 3
def test_restart_sync(loop): with cluster(nanny=True) as (s, [a, b]): with Client(('127.0.0.1', s['port']), loop=loop) as c: x = c.submit(div, 1, 2) x.result() assert sync(loop, c.scheduler.who_has) c.restart() assert not sync(loop, c.scheduler.who_has) assert x.cancelled() assert len(c.ncores()) == 2 with pytest.raises(CancelledError): x.result() y = c.submit(div, 1, 3) assert y.result() == 1 / 3
def test_values(loop): with cluster() as (s, [a, b]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: L = [e.submit(inc, i) for i in range(5)] wait(L) p = MultiProgressWidget(L) sync(loop, p.listen) assert set(p.bars) == {'inc'} assert p.status == 'finished' assert p.stream.closed() assert '5 / 5' in p.bar_texts['inc'].value assert p.bars['inc'].value == 1.0 x = e.submit(throws, 1) p = MultiProgressWidget([x]) sync(loop, p.listen) assert p.status == 'error'
def test_restart_sync(loop): with cluster(nanny=True) as (s, [a, b]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: x = e.submit(div, 1, 2) x.result() assert sync(loop, e.scheduler.who_has) e.restart() assert not sync(loop, e.scheduler.who_has) assert x.cancelled() assert len(e.ncores()) == 2 with pytest.raises(CancelledError): x.result() y = e.submit(div, 1, 3) assert y.result() == 1 / 3
def test_values(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = MultiProgressWidget(L) sync(loop, p.listen) assert set(p.bars) == {'inc'} assert p.status == 'finished' assert p.comm.closed() assert '5 / 5' in p.bar_texts['inc'].value assert p.bars['inc'].value == 1.0 x = c.submit(throws, 1) p = MultiProgressWidget([x]) sync(loop, p.listen) assert p.status == 'error'
def test_sync_timeout(loop): e = Event() @gen.coroutine def wait_until_event(): yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event, )) thread.daemon = True thread.start() while not loop._running: sleep(0.01) with pytest.raises(gen.TimeoutError): sync(loop, gen.sleep, 0.5, callback_timeout=0.05) loop.add_callback(e.set) thread.join()
def test_client_sync_with_async_def(loop): async def ff(): await gen.sleep(0.01) return 1 with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: assert sync(loop, ff) == 1 assert c.sync(ff) == 1
def test_progressbar_done(loop): with cluster() as (s, [a, b]): with Executor(('127.0.0.1', s['port']), loop=loop) as e: L = [e.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) sync(loop, p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' f = e.submit(throws, L) wait([f]) p = ProgressWidget([f]) sync(loop, p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger'
def test_progressbar_done(loop): with cluster() as (s, [a, b]): with Client(('127.0.0.1', s['port']), loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) sync(loop, p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' f = c.submit(throws, L) wait([f]) p = ProgressWidget([f]) sync(loop, p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger'
def shutdown(self, wait=True): """Clean-up the resources associated with the Executor. It is safe to call this method several times. Otherwise, no other methods can be called after this one. Parameters ---------- wait : If True then shutdown will not return until all running futures have finished executing. If False then all running futures are cancelled immediately. """ if not self._shutdown: self._shutdown = True fs = list(self._futures) if wait: sync(self._client.loop, _wait_on_futures, fs) else: self._client.cancel(fs)
def sync(self, func, *args, **kwargs): if kwargs.pop("asynchronous", None) or self.asynchronous: callback_timeout = kwargs.pop("callback_timeout", None) future = func(*args, **kwargs) if callback_timeout is not None: future = gen.with_timeout(timedelta(seconds=callback_timeout), future) return future else: return sync(self.loop, func, *args, **kwargs)
def test_progressbar_done(loop): with cluster() as (s, [a, b]): with Client(s['address'], loop=loop) as c: L = [c.submit(inc, i) for i in range(5)] wait(L) p = ProgressWidget(L) sync(loop, p.listen) assert p.status == 'finished' assert p.bar.value == 1.0 assert p.bar.bar_style == 'success' assert 'Finished' in p.elapsed_time.value f = c.submit(throws, L) wait([f]) p = ProgressWidget([f]) sync(loop, p.listen) assert p.status == 'error' assert p.bar.value == 0.0 assert p.bar.bar_style == 'danger' assert 'Exception' in p.elapsed_time.value
def test_sync_error(loop): e = Event() @gen.coroutine def wait_until_event(): yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event, )) thread.daemon = True thread.start() while not loop._running: sleep(0.01) try: result = sync(loop, throws, 1) except Exception as exc: f = exc assert 'hello' in str(exc) tb = get_traceback() L = traceback.format_tb(tb) assert any('throws' in line for line in L) def function1(x): return function2(x) def function2(x): return throws(x) try: result = sync(loop, function1, 1) except Exception as exc: assert 'hello' in str(exc) tb = get_traceback() L = traceback.format_tb(tb) assert any('function1' in line for line in L) assert any('function2' in line for line in L) loop.add_callback(e.set) thread.join()
def test_sync_error(loop): e = Event() @gen.coroutine def wait_until_event(): yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event,)) thread.daemon = True thread.start() while not loop._running: sleep(0.01) try: result = sync(loop, throws, 1) except Exception as exc: f = exc assert 'hello' in str(exc) tb = get_traceback() L = traceback.format_tb(tb) assert any('throws' in line for line in L) def function1(x): return function2(x) def function2(x): return throws(x) try: result = sync(loop, function1, 1) except Exception as exc: assert 'hello' in str(exc) tb = get_traceback() L = traceback.format_tb(tb) assert any('function1' in line for line in L) assert any('function2' in line for line in L) loop.add_callback(e.set) thread.join()
def test_nanny_worker_ports(loop): with popen(['dask-scheduler', '--port', '8989']) as sched: with popen(['dask-worker', '127.0.0.1:8989', '--host', '127.0.0.1', '--worker-port', '8788', '--nanny-port', '8789']) as worker: with Client('127.0.0.1:8989', loop=loop) as c: start = time() while True: d = sync(c.loop, c.scheduler.identity) if d['workers']: break else: assert time() - start < 5 sleep(0.1) assert d['workers']['127.0.0.1:8788']['services']['nanny'] == 8789
def test_nanny_worker_ports(loop): with popen(['dask-scheduler', '--port', '9359', '--no-bokeh']) as sched: with popen(['dask-worker', '127.0.0.1:9359', '--host', '127.0.0.1', '--worker-port', '9684', '--nanny-port', '5273', '--no-bokeh']) as worker: with Client('127.0.0.1:9359', loop=loop) as c: start = time() while True: d = sync(c.loop, c.scheduler.identity) if d['workers']: break else: assert time() - start < 5 sleep(0.1) assert d['workers']['tcp://127.0.0.1:9684']['services']['nanny'] == 5273
def stop_workers(self): """ Stop running workers. """ # Sometimes retire_workers command throws a lot of exceptions, that # also vary from update to update, so we just suppress everything here. # Anyway we just kill all the workers later using Slurm, # so it is just an attempt to do this in polite manner. with warnings.catch_warnings(): warnings.simplefilter('ignore') with suppress(Exception): sync(loop=self._local_cluster.loop, func=self.scheduler.retire_workers, remove=True) if self._jobid: try: subprocess.check_call(("scancel", str(self._jobid))) except subprocess.CalledProcessError as ex: m = ("scancel returned non-zero exit status {code} while " "stopping Slurm job number {jobid} for workers. " "You should check manually whether they are " "terminated successfully.".format(code=ex.returncode, jobid=self._jobid)) logger.error(m) finally: self._jobid = None
def test_local_tls(loop): from distributed.utils_test import tls_only_security security = tls_only_security() with LocalCluster(scheduler_port=8786, silence_logs=False, security=security, diagnostics_port=False, ip='tls://0.0.0.0', loop=loop) as c: sync(loop, assert_can_connect_from_everywhere_4, c.scheduler.port, connection_args=security.get_connection_args('client'), protocol='tls', timeout=3) # If we connect to a TLS localculster without ssl information we should fail sync( loop, assert_cannot_connect, addr='tcp://127.0.0.1:%d' % c.scheduler.port, connection_args=security.get_connection_args('client'), exception_class=RuntimeError, )
def test_sync_error(loop): e = Event() @gen.coroutine def wait_until_event(): yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event, )) thread.daemon = True thread.start() with pytest.raises(Exception): result = sync(loop, throws, 1) loop.add_callback(e.set) thread.join()
def test_sync_error(loop): e = Event() @gen.coroutine def wait_until_event(): yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event,)) thread.daemon = True thread.start() with pytest.raises(Exception): result = sync(loop, throws, 1) loop.add_callback(e.set) thread.join()
def recreate_error_locally(self, future): """ For a failed calculation, perform the blamed task locally for debugging. This operation should be performed after a future (result of ``gather``, ``compute``, etc) comes back with a status of "error", if the stack- trace is not informative enough to diagnose the problem. The specific task (part of the graph pointing to the future) responsible for the error will be fetched from the scheduler, together with the values of its inputs. The function will then be executed, so that ``pdb`` can be used for debugging. Examples -------- >>> future = c.submit(div, 1, 0) # doctest: +SKIP >>> future.status # doctest: +SKIP 'error' >>> c.recreate_error_locally(future) # doctest: +SKIP ZeroDivisionError: division by zero If you're in IPython you might take this opportunity to use pdb >>> %pdb # doctest: +SKIP Automatic pdb calling has been turned ON >>> c.recreate_error_locally(future) # doctest: +SKIP ZeroDivisionError: division by zero 1 def div(x, y): ----> 2 return x / y ipdb> Parameters ---------- future : future or collection that failed The same thing as was given to ``gather``, but came back with an exception/stack-trace. Can also be a (persisted) dask collection containing any errored futures. Returns ------- Nothing; the function runs and should raise an exception, allowing the debugger to run. """ errored_future_key = sync(self.client.loop, self._get_errored_future, future) return self.recreate_task_locally(errored_future_key)
def test_sync(loop): e = Event() e2 = threading.Event() @gen.coroutine def wait_until_event(): e2.set() yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event, )) thread.daemon = True thread.start() e2.wait() result = sync(loop, inc, 1) assert result == 2 loop.add_callback(e.set) thread.join()
def get(self, timeout=None): """Get a single message Parameters ---------- timeout : number or string or timedelta, optional Time in seconds to wait before timing out. Instead of number of seconds, it is also possible to specify a timedelta in string format, e.g. "200ms". """ timeout = parse_timedelta(timeout) if self.client: return self.client.sync(self._get, timeout=timeout) elif self.worker.thread_id == threading.get_ident(): return self._get() else: if self.buffer: # fastpath return self.buffer.popleft() return sync(self.loop, self._get, timeout=timeout)
def test_sync(loop): e = Event() e2 = threading.Event() @gen.coroutine def wait_until_event(): e2.set() yield e.wait() thread = Thread(target=loop.run_sync, args=(wait_until_event,)) thread.daemon = True thread.start() e2.wait() result = sync(loop, inc, 1) assert result == 2 loop.add_callback(e.set) thread.join()
def dask_to_spark(client, **kwargs): """ Launch Spark Cluster on top of a Dask cluster Parameters ---------- client: dask.distributed.Client **kwargs: Keywords These get sent to the SparkContext call Examples -------- >>> from dask.distributed import Client # doctest: +SKIP >>> client = Client('scheduler-address:8786') # doctest: +SKIP >>> sc = dask_to_spark(client) # doctest: +SKIP See Also -------- spark_to_dask """ return sync(client.loop, _dask_to_spark, client, **kwargs)
def start_tensorflow(client, **kwargs): """ Start Tensorflow on Dask Cluster This launches Tensorflow Servers alongside Dask workers Examples -------- >>> client = Client('dask-scheduler-address:8786') >>> tf_spec, dask_spec = start_tensorflow(client) >>> tf_spec.as_dict() {'worker': ['192.168.1.100:2222', '192.168.1.101:2222']} Specify desired number of jobs types as keyword args >>> tf_spec, dask_spec = start_tensorflow(client, ps=2, worker=4) >>> tf_spec.as_dict() {'worker': ['192.168.1.100:2222', '192.168.1.101:2222', '192.168.1.102:2222', '192.168.1.103:2222'], 'ps': ['192.168.1.104:2222', '192.168.1.105:2222']} """ return sync(client.loop, _start_tensorflow, client, **kwargs)
def test_only_local_access(loop): with LocalCluster(scheduler_port=0, silence_logs=False, diagnostics_port=None, loop=loop) as c: sync(loop, assert_can_connect_locally_4, c.scheduler.port)
def test_sync_timeout(loop_in_thread): loop = loop_in_thread with pytest.raises(TimeoutError): sync(loop_in_thread, asyncio.sleep, 0.5, callback_timeout=0.05)
def test_only_local_access(loop): with LocalCluster( 0, scheduler_port=0, silence_logs=False, dashboard_address=None, loop=loop ) as c: sync(loop, assert_can_connect_locally_4, c.scheduler.port)
def test_remote_access(loop): with LocalCluster(scheduler_port=0, silence_logs=False, diagnostics_port=None, ip='', loop=loop) as c: sync(loop, assert_can_connect_from_everywhere_4_6, c.scheduler.port)
def test_sync_timeout(loop_in_thread): loop = loop_in_thread with pytest.raises(gen.TimeoutError): sync(loop_in_thread, gen.sleep, 0.5, callback_timeout=0.05)
def test_remote_access(loop): with LocalCluster( scheduler_port=0, silence_logs=False, dashboard_address=None, ip="", loop=loop ) as c: sync(loop, assert_can_connect_from_everywhere_4_6, c.scheduler.port)