Пример #1
0
def test_threads_per_worker(default_raw, dask_executor):
    ctx = Context(executor=dask_executor)
    inline_ctx = Context(executor=InlineJobExecutor())
    res = ctx.run_udf(dataset=default_raw,
                      udf=ThreadsPerWorkerUDF())['num_threads']
    res_inline = inline_ctx.run_udf(dataset=default_raw,
                                    udf=ThreadsPerWorkerUDF())['num_threads']
    assert np.allclose(res, 1)
    assert np.allclose(res_inline, psutil.cpu_count(logical=False))
Пример #2
0
def test_multiple_clients(local_cluster_url, default_raw):
    ex1 = DaskJobExecutor.connect(local_cluster_url)

    # this creates a second Client, and even though we are setting `set_as_default=False`,
    # this Client is then used by functions like `dd.as_completed`. That is because
    # `set_as_default` only sets the dask scheduler config to "dask.distributed", it does
    # not affect setting the _client_ as the global default `Client`!
    # so any time `as_completed` is called, the `loop` needs to be set correctly, otherwise
    # this may result in strange hangs and crashes
    DaskJobExecutor.connect(local_cluster_url)

    udf = SumUDF()

    cx1 = Context(executor=ex1)
    cx1.run_udf(dataset=default_raw, udf=udf)
Пример #3
0
def test_threads_per_worker_vanilla(default_raw, monkeypatch):
    old_threads = os.environ.get('NUMBA_NUM_THREADS')
    # Triggers #1053
    monkeypatch.delenv('NUMBA_NUM_THREADS', raising=False)
    ctx = Context()
    assert 'NUMBA_NUM_THREADS' not in os.environ
    # We have to reset it properly since it is set in pytest.ini
    # and Numba will complain if it is changed
    if old_threads:
        os.environ['NUMBA_NUM_THREADS'] = old_threads
    inline_ctx = Context(executor=InlineJobExecutor())
    res = ctx.run_udf(dataset=default_raw, udf=ThreadsPerWorkerUDF())
    res_inline = inline_ctx.run_udf(dataset=default_raw,
                                    udf=ThreadsPerWorkerUDF())
    print(res['num_threads'].data)
    assert np.all(res['num_threads'].data == 1)
    print(res_inline['num_threads'].data)
    assert np.all(res_inline['num_threads'].data == psutil.cpu_count(
        logical=False))
Пример #4
0
def test_threads_per_worker(dask_executor, default_raw):
    ctx = Context(executor=dask_executor)
    res = ctx.run_udf(dataset=default_raw,
                      udf=ThreadsPerWorkerUDF())['num_threads']
    assert np.allclose(res, 1)