def test_threads_per_worker(default_raw, dask_executor): ctx = Context(executor=dask_executor) inline_ctx = Context(executor=InlineJobExecutor()) res = ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF())['num_threads'] res_inline = inline_ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF())['num_threads'] assert np.allclose(res, 1) assert np.allclose(res_inline, psutil.cpu_count(logical=False))
def test_multiple_clients(local_cluster_url, default_raw): ex1 = DaskJobExecutor.connect(local_cluster_url) # this creates a second Client, and even though we are setting `set_as_default=False`, # this Client is then used by functions like `dd.as_completed`. That is because # `set_as_default` only sets the dask scheduler config to "dask.distributed", it does # not affect setting the _client_ as the global default `Client`! # so any time `as_completed` is called, the `loop` needs to be set correctly, otherwise # this may result in strange hangs and crashes DaskJobExecutor.connect(local_cluster_url) udf = SumUDF() cx1 = Context(executor=ex1) cx1.run_udf(dataset=default_raw, udf=udf)
def test_threads_per_worker_vanilla(default_raw, monkeypatch): old_threads = os.environ.get('NUMBA_NUM_THREADS') # Triggers #1053 monkeypatch.delenv('NUMBA_NUM_THREADS', raising=False) ctx = Context() assert 'NUMBA_NUM_THREADS' not in os.environ # We have to reset it properly since it is set in pytest.ini # and Numba will complain if it is changed if old_threads: os.environ['NUMBA_NUM_THREADS'] = old_threads inline_ctx = Context(executor=InlineJobExecutor()) res = ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF()) res_inline = inline_ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF()) print(res['num_threads'].data) assert np.all(res['num_threads'].data == 1) print(res_inline['num_threads'].data) assert np.all(res_inline['num_threads'].data == psutil.cpu_count( logical=False))
def test_threads_per_worker(dask_executor, default_raw): ctx = Context(executor=dask_executor) res = ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF())['num_threads'] assert np.allclose(res, 1)