Exemplo n.º 1
0
async def test_worker_start_exception(cleanup):
    # make sure this raises the right Exception:
    with pytest.raises(StartException):
        async with Nanny("tcp://localhost:1", worker_class=BrokenWorker) as n:
            await n.start()
Exemplo n.º 2
0
def test_nanny_no_port():
    _ = str(Nanny('127.0.0.1', 8786))
Exemplo n.º 3
0
async def test_config(cleanup):
    async with Scheduler() as s:
        async with Nanny(s.address, config={"foo": "bar"}) as n:
            async with Client(s.address, asynchronous=True) as client:
                config = await client.run(dask.config.get, "foo")
                assert config[n.worker_address] == "bar"
Exemplo n.º 4
0
def test_data_types(c, s):
    w = yield Nanny(s.address, data=dict)
    r = yield c.run(lambda dask_worker: type(dask_worker.data))
    assert r[w.worker_address] == dict
    yield w.close()
Exemplo n.º 5
0
def test_many_kills(s):
    n = yield Nanny(s.address, nthreads=2, loop=s.loop)
    assert n.is_alive()
    yield [n.kill() for i in range(5)]
    yield [n.kill() for i in range(5)]
    yield n.close()
Exemplo n.º 6
0
async def test_worker_start_exception(s):
    # make sure this raises the right Exception:
    with pytest.raises(StartException):
        async with Nanny(s.address, worker_class=BrokenWorker) as n:
            pass
Exemplo n.º 7
0
def test_nanny_death_timeout(s):
    yield s.close()
    w = yield Nanny(s.address, death_timeout=1)

    yield gen.sleep(3)
    assert w.status == "closed"
Exemplo n.º 8
0
def test_nanny_death_timeout():
    w = Nanny('127.0.0.1', 38848, death_timeout=1)
    yield w._start()

    yield gen.sleep(3)
    assert w.status == 'closed'
Exemplo n.º 9
0
async def test_config(c, s):
    async with Nanny(s.address, config={"foo": "bar"}) as n:
        config = await c.run(dask.config.get, "foo")
        assert config[n.worker_address] == "bar"
Exemplo n.º 10
0
 async def create_and_destroy_worker(delay):
     start = time()
     while time() < start + 5:
         async with Nanny(s.address, nthreads=2):
             await asyncio.sleep(delay)
         print("Killed nanny")
Exemplo n.º 11
0
async def test_web_preload_worker(cleanup, worker_preload):
    async with Scheduler(port=8786, host="localhost") as s:
        async with Nanny(
                preload_nanny=["http://127.0.0.1:12346/preload"]) as nanny:
            assert nanny.scheduler_addr == s.address
async def f():
    async with Scheduler(protocol=protocol,
                         interface='ib0',
                         dashboard_address=':8789') as s:
        async with Nanny(
                s.address,
                protocol=protocol,
                nthreads=1,
                memory_limit='32GB',
                env={'CUDA_VISIBLE_DEVICES': '2'},
        ) as w:
            async with Nanny(s.address,
                             protocol=protocol,
                             memory_limit='32gb',
                             env={'CUDA_VISIBLE_DEVICES': '3'},
                             nthreads=1) as w2:
                async with Client(s.address, asynchronous=True) as c:
                    with log_errors(pdb=True):
                        # Create a simple random array
                        n_rows = 50000000
                        n_keys = 5000000

                        # working!!!
                        # n_rows = 5000000
                        # n_keys = 500000
                        # 1200000 B or 1.2 MB

                        #n_rows = 5000000
                        #n_keys = 2500000

                        # n_rows = 5000
                        # n_keys = 2500

                        chunks = n_rows // 1000
                        left = dd.concat([
                            da.random.random(
                                n_rows,
                                chunks=chunks).to_dask_dataframe(columns='x'),
                            da.random.randint(
                                0, n_keys, size=n_rows,
                                chunks=chunks).to_dask_dataframe(columns='id'),
                        ],
                                         axis=1).persist()

                        right = dd.concat([
                            da.random.random(
                                n_rows,
                                chunks=chunks).to_dask_dataframe(columns='y'),
                            da.random.randint(
                                0, n_keys, size=n_rows,
                                chunks=chunks).to_dask_dataframe(columns='id'),
                        ],
                                          axis=1).persist()

                        print('Building CUDF DataFrames...')
                        gright = right.map_partitions(cudf.from_pandas)
                        gleft = left.map_partitions(cudf.from_pandas)

                        print(gleft.npartitions)
                        print(gright.npartitions)
                        print('Repartition Left 10...')
                        res = gleft.repartition(npartitions=10)
                        res = await res.persist()
                        out = await c.compute(res.head(compute=False))
                        print(out)
                        print("FINISHED!")
Exemplo n.º 13
0
    def __init__(
        self,
        scheduler=None,
        host=None,
        nthreads=1,
        name=None,
        memory_limit="auto",
        device_memory_limit="auto",
        rmm_pool_size=None,
        rmm_managed_memory=False,
        pid_file=None,
        resources=None,
        dashboard=True,
        dashboard_address=":0",
        local_directory=None,
        scheduler_file=None,
        interface=None,
        death_timeout=None,
        preload=[],
        dashboard_prefix=None,
        security=None,
        enable_tcp_over_ucx=False,
        enable_infiniband=False,
        enable_nvlink=False,
        enable_rdmacm=False,
        net_devices=None,
        jit_unspill=None,
        **kwargs,
    ):
        # Required by RAPIDS libraries (e.g., cuDF) to ensure no context
        # initialization happens before we can set CUDA_VISIBLE_DEVICES
        os.environ["RAPIDS_NO_INITIALIZE"] = "True"

        enable_proctitle_on_current()
        enable_proctitle_on_children()

        try:
            nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(","))
        except KeyError:
            nprocs = get_n_gpus()

        if nthreads < 1:
            raise ValueError("nthreads must be higher than 0.")

        memory_limit = parse_memory_limit(memory_limit,
                                          nthreads,
                                          total_cores=nprocs)

        if pid_file:
            with open(pid_file, "w") as f:
                f.write(str(os.getpid()))

            def del_pid_file():
                if os.path.exists(pid_file):
                    os.remove(pid_file)

            atexit.register(del_pid_file)

        if resources:
            resources = resources.replace(",", " ").split()
            resources = dict(pair.split("=") for pair in resources)
            resources = valmap(float, resources)
        else:
            resources = None

        loop = IOLoop.current()

        preload_argv = kwargs.get("preload_argv", [])
        kwargs = {"worker_port": None, "listen_address": None}

        if (not scheduler and not scheduler_file
                and dask.config.get("scheduler-address", None) is None):
            raise ValueError("Need to provide scheduler address like\n"
                             "dask-worker SCHEDULER_ADDRESS:8786")

        if isinstance(scheduler, Cluster):
            scheduler = scheduler.scheduler_address

        if interface and host:
            raise ValueError("Can not specify both interface and host")

        if rmm_pool_size is not None or rmm_managed_memory:
            try:
                import rmm  # noqa F401
            except ImportError:
                raise ValueError(
                    "RMM pool requested but module 'rmm' is not available. "
                    "For installation instructions, please see "
                    "https://github.com/rapidsai/rmm")  # pragma: no cover
            if rmm_pool_size is not None:
                rmm_pool_size = parse_bytes(rmm_pool_size)
        else:
            if enable_nvlink:
                warnings.warn(
                    "When using NVLink we recommend setting a "
                    "`rmm_pool_size`.  Please see: "
                    "https://dask-cuda.readthedocs.io/en/latest/ucx.html"
                    "#important-notes for more details")

        if enable_nvlink and rmm_managed_memory:
            raise ValueError(
                "RMM managed memory and NVLink are currently incompatible.")

        # Ensure this parent dask-cuda-worker process uses the same UCX
        # configuration as child worker processes created by it.
        initialize(
            create_cuda_context=False,
            enable_tcp_over_ucx=enable_tcp_over_ucx,
            enable_infiniband=enable_infiniband,
            enable_nvlink=enable_nvlink,
            enable_rdmacm=enable_rdmacm,
            net_devices=net_devices,
            cuda_device_index=0,
        )

        if jit_unspill is None:
            self.jit_unspill = dask.config.get("jit-unspill", default=False)
        else:
            self.jit_unspill = jit_unspill

        if self.jit_unspill:
            data = lambda i: (
                ProxifyHostFile,
                {
                    "device_memory_limit":
                    parse_device_memory_limit(device_memory_limit,
                                              device_index=i),
                },
            )
        else:
            data = lambda i: (
                DeviceHostFile,
                {
                    "device_memory_limit":
                    parse_device_memory_limit(device_memory_limit,
                                              device_index=i),
                    "memory_limit":
                    memory_limit,
                    "local_directory":
                    local_directory,
                },
            )

        self.nannies = [
            Nanny(
                scheduler,
                scheduler_file=scheduler_file,
                nthreads=nthreads,
                dashboard=dashboard,
                dashboard_address=dashboard_address,
                http_prefix=dashboard_prefix,
                loop=loop,
                resources=resources,
                memory_limit=memory_limit,
                interface=_get_interface(interface, host, i, net_devices),
                host=host,
                preload=(list(preload) or []) + ["dask_cuda.initialize"],
                preload_argv=(list(preload_argv) or []) +
                ["--create-cuda-context"],
                security=security,
                env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)},
                plugins={
                    CPUAffinity(get_cpu_affinity(i)),
                    RMMSetup(rmm_pool_size, rmm_managed_memory),
                },
                name=name if nprocs == 1 or not name else name + "-" + str(i),
                local_directory=local_directory,
                config={
                    "ucx":
                    get_ucx_config(
                        enable_tcp_over_ucx=enable_tcp_over_ucx,
                        enable_infiniband=enable_infiniband,
                        enable_nvlink=enable_nvlink,
                        enable_rdmacm=enable_rdmacm,
                        net_devices=net_devices,
                        cuda_device_index=i,
                    )
                },
                data=data(i),
                **kwargs,
            ) for i in range(nprocs)
        ]
Exemplo n.º 14
0
def test_nanny_no_port():
    _ = str(Nanny("127.0.0.1", 8786))