def main(scheduler_file, interface, nthreads, local_directory, memory_limit, scheduler, bokeh_port, bokeh_prefix, nanny, bokeh_worker_port): if interface: host = get_ip_interface(interface) else: host = None if rank == 0 and scheduler: try: from distributed.bokeh.scheduler import BokehScheduler except ImportError: services = {} else: services = { ('bokeh', bokeh_port): partial(BokehScheduler, prefix=bokeh_prefix) } scheduler = Scheduler(scheduler_file=scheduler_file, loop=loop, services=services) addr = uri_from_host_port(host, None, 8786) scheduler.start(addr) try: loop.start() loop.close() finally: scheduler.stop() else: W = Nanny if nanny else Worker worker = W(scheduler_file=scheduler_file, loop=loop, name=rank if scheduler else None, ncores=nthreads, local_dir=local_directory, services={('bokeh', bokeh_worker_port): BokehWorker}, memory_limit=memory_limit) addr = uri_from_host_port(host, None, 0) @gen.coroutine def run(): yield worker._start(addr) while worker.status != 'closed': yield gen.sleep(0.2) try: loop.run_sync(run) loop.close() finally: pass @gen.coroutine def close(): yield worker._close(timeout=2) loop.run_sync(close)
def main(scheduler_file, interface, nthreads, local_directory, memory_limit, scheduler, bokeh_port, bokeh_prefix, nanny, bokeh_worker_port): if interface: host = get_ip_interface(interface) else: host = None if rank == 0 and scheduler: try: from distributed.bokeh.scheduler import BokehScheduler except ImportError: services = {} else: services = {('bokeh', bokeh_port): partial(BokehScheduler, prefix=bokeh_prefix)} scheduler = Scheduler(scheduler_file=scheduler_file, loop=loop, services=services) addr = uri_from_host_port(host, None, 8786) scheduler.start(addr) try: loop.start() loop.close() finally: scheduler.stop() else: W = Nanny if nanny else Worker worker = W(scheduler_file=scheduler_file, loop=loop, name=rank if scheduler else None, ncores=nthreads, local_dir=local_directory, services={('bokeh', bokeh_worker_port): BokehWorker}, memory_limit=memory_limit) addr = uri_from_host_port(host, None, 0) @gen.coroutine def run(): yield worker._start(addr) while worker.status != 'closed': yield gen.sleep(0.2) try: loop.run_sync(run) loop.close() finally: pass @gen.coroutine def close(): yield worker._close(timeout=2) loop.run_sync(close)
def create_scheduler(loop, scheduler_file=None, host=None, bokeh=True, bokeh_port=8787, bokeh_prefix=None, scheduler_port=None): try: from distributed.bokeh.scheduler import BokehScheduler except ImportError: BokehScheduler = None if bokeh and BokehScheduler: services = { ('bokeh', bokeh_port): partial(BokehScheduler, prefix=bokeh_prefix) } else: services = {} scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file) addr = uri_from_host_port(host, scheduler_port, 8786) scheduler.start(addr) return scheduler
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_internal_port)] = BokehScheduler scheduler = Scheduler(loop=loop, services=services) scheduler.start(addr) bokeh_proc = None if _bokeh: try: from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, tcp_port=scheduler.port, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %r", addr)
def main(scheduler_file, interface, nthreads, local_directory, memory_limit, scheduler): if interface: host = get_ip_interface(interface) else: host = None if rank == 0 and scheduler: scheduler = Scheduler(scheduler_file=scheduler_file, loop=loop, services={('bokeh', 8787): BokehScheduler}) addr = uri_from_host_port(host, None, 8786) scheduler.start(addr) try: loop.start() loop.close() finally: scheduler.stop() else: worker = Worker(scheduler_file=scheduler_file, loop=loop, name=rank if scheduler else None, ncores=nthreads, local_dir=local_directory, services={'bokeh': BokehWorker}, memory_limit=memory_limit) addr = uri_from_host_port(host, None, 0) @gen.coroutine def run(): yield worker._start(addr) while worker.status != 'closed': yield gen.sleep(0.2) try: loop.run_sync(run) loop.close() finally: pass @gen.coroutine def close(): yield worker._close(timeout=2) loop.run_sync(close)
def create_and_run_worker(loop, host=None, rank=0, scheduler_file=None, nanny=False, local_directory='', nthreads=0, memory_limit='auto', bokeh=True, bokeh_port=8789, bokeh_prefix=None): try: from distributed.bokeh.worker import BokehWorker except ImportError: BokehWorker = None if bokeh and BokehWorker: services = { ('bokeh', bokeh_port): partial(BokehWorker, prefix=bokeh_prefix) } else: services = {} W = Nanny if nanny else Worker worker = W(scheduler_file=scheduler_file, loop=loop, name='mpi-rank-%d' % rank, ncores=nthreads, local_dir=local_directory, services=services, memory_limit=memory_limit) addr = uri_from_host_port(host, None, 0) @gen.coroutine def run_until_closed(): yield worker._start(addr) while worker.status != 'closed': yield gen.sleep(0.2) loop = worker.loop try: loop.run_sync(run_until_closed) finally: @gen.coroutine def close(): yield worker._close(timeout=2) loop.run_sync(close) loop.close()
def scheduler(): # pragma: nocover app_client = skein.ApplicationClient.from_current() enable_proctitle_on_current() enable_proctitle_on_children() if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) addr = uri_from_host_port('', None, 0) loop = IOLoop.current() services = {} bokeh = False with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', 0)] = (BokehScheduler, {}) bokeh = True scheduler = Scheduler(loop=loop, services=services) scheduler.start(addr) install_signal_handlers(loop) # Set dask.dashboard before dask.scheduler since the YarnCluster object # waits on dask.scheduler only if bokeh: bokeh_port = scheduler.services['bokeh'].port bokeh_host = urlparse(scheduler.address).hostname bokeh_address = 'http://%s:%d' % (bokeh_host, bokeh_port) app_client.kv['dask.dashboard'] = bokeh_address.encode() app_client.kv['dask.scheduler'] = scheduler.address.encode() try: loop.start() loop.close() finally: scheduler.stop()
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, preload_argv, tls_ca_file, tls_cert, tls_key): enable_proctitle_on_current() enable_proctitle_on_children() sec = Security( tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key, ) if not host and (tls_ca_file or tls_cert or tls_key): host = 'tls://' if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = (BokehScheduler, { 'prefix': bokeh_prefix }) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) scheduler.start(addr) if not preload: preload = dask.config.get('distributed.scheduler.preload') if not preload_argv: preload_argv = dask.config.get('distributed.scheduler.preload-argv') preload_modules(preload, parameter=scheduler, file_dir=local_directory, argv=preload_argv) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) install_signal_handlers(loop) try: loop.start() loop.close() finally: scheduler.stop() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr)
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, preload_argv, tls_ca_file, tls_cert, tls_key): enable_proctitle_on_current() enable_proctitle_on_children() sec = Security(tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key, ) if not host and (tls_ca_file or tls_cert or tls_key): host = 'tls://' if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {} if _bokeh: try: from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = (BokehScheduler, {'prefix': bokeh_prefix}) except ImportError as error: if str(error).startswith('No module named'): logger.info('Web dashboard not loaded. Unable to import bokeh') else: logger.info('Unable to import bokeh: %s' % str(error)) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) scheduler.start(addr) if not preload: preload = dask.config.get('distributed.scheduler.preload') if not preload_argv: preload_argv = dask.config.get('distributed.scheduler.preload-argv') preload_modules(preload, parameter=scheduler, file_dir=local_directory, argv=preload_argv) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) install_signal_handlers(loop) try: loop.start() loop.close() finally: scheduler.stop() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr)
def main( scheduler, host, worker_port, listen_address, contact_address, nanny_port, nthreads, nprocs, nanny, name, memory_limit, pid_file, reconnect, resources, bokeh, bokeh_port, local_directory, scheduler_file, interface, death_timeout, preload, preload_argv, bokeh_prefix, tls_ca_file, tls_cert, tls_key, dashboard_address, ): g0, g1, g2 = gc.get_threshold( ) # https://github.com/dask/distributed/issues/1653 gc.set_threshold(g0 * 3, g1 * 3, g2 * 3) enable_proctitle_on_current() enable_proctitle_on_children() if bokeh_port is not None: warnings.warn( "The --bokeh-port flag has been renamed to --dashboard-address. " "Consider adding ``--dashboard-address :%d`` " % bokeh_port) dashboard_address = bokeh_port sec = Security(tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key) if nprocs > 1 and worker_port != 0: logger.error( "Failed to launch worker. You cannot use the --port argument when nprocs > 1." ) exit(1) if nprocs > 1 and not nanny: logger.error( "Failed to launch worker. You cannot use the --no-nanny argument when nprocs > 1." ) exit(1) if contact_address and not listen_address: logger.error( "Failed to launch worker. " "Must specify --listen-address when --contact-address is given") exit(1) if nprocs > 1 and listen_address: logger.error("Failed to launch worker. " "You cannot specify --listen-address when nprocs > 1.") exit(1) if (worker_port or host) and listen_address: logger.error( "Failed to launch worker. " "You cannot specify --listen-address when --worker-port or --host is given." ) exit(1) try: if listen_address: (host, worker_port) = get_address_host_port(listen_address, strict=True) if contact_address: # we only need this to verify it is getting parsed (_, _) = get_address_host_port(contact_address, strict=True) else: # if contact address is not present we use the listen_address for contact contact_address = listen_address except ValueError as e: logger.error("Failed to launch worker. " + str(e)) exit(1) if nanny: port = nanny_port else: port = worker_port if not nthreads: nthreads = _ncores // nprocs if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if bokeh: try: from distributed.bokeh.worker import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {"prefix": bokeh_prefix}) else: result = BokehWorker services[("bokeh", dashboard_address)] = result if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() if nanny: kwargs = {"worker_port": worker_port, "listen_address": listen_address} t = Nanny else: kwargs = {} if nanny_port: kwargs["service_ports"] = {"nanny": nanny_port} t = Worker if (not scheduler and not scheduler_file and dask.config.get("scheduler-address", None) is None): raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host or port: addr = uri_from_host_port(host, port, 0) else: # Choose appropriate address for scheduler addr = None if death_timeout is not None: death_timeout = parse_timedelta(death_timeout, "s") nannies = [ t(scheduler, scheduler_file=scheduler_file, ncores=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=preload, preload_argv=preload_argv, security=sec, contact_address=contact_address, name=name if nprocs == 1 or not name else name + "-" + str(i), **kwargs) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler if nanny: yield [n.close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield [n._start(addr) for n in nannies] while all(n.status != "closed" for n in nannies): yield gen.sleep(0.2) install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, prefix, tls_ca_file, tls_cert, tls_key): if bokeh_internal_port: print("The --bokeh-internal-port keyword has been removed.\n" "The internal bokeh server is now the default bokeh server.\n" "Use --bokeh-port %d instead" % bokeh_internal_port) sys.exit(1) if prefix: print("The --prefix keyword has moved to --bokeh-prefix") sys.exit(1) sec = Security( tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key, ) if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = partial(BokehScheduler, prefix=bokeh_prefix) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) scheduler.start(addr) preload_modules(preload, parameter=scheduler, file_dir=local_directory) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr)
def main( scheduler, host, nthreads, name, memory_limit, pid_file, reconnect, resources, bokeh, bokeh_port, local_directory, scheduler_file, interface, death_timeout, preload, preload_argv, bokeh_prefix, tls_ca_file, tls_cert, tls_key, ): enable_proctitle_on_current() enable_proctitle_on_children() sec = Security( tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key ) try: nprocs = len(os.environ["CUDA_VISIBLE_DEVICES"].split(",")) except KeyError: nprocs = get_n_gpus() if not nthreads: nthreads = min(1, _ncores // nprocs ) if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if bokeh: try: from distributed.bokeh.worker import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {"prefix": bokeh_prefix}) else: result = BokehWorker services[("bokeh", bokeh_port)] = result if resources: resources = resources.replace(",", " ").split() resources = dict(pair.split("=") for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() kwargs = {"worker_port": None, "listen_address": None} t = Nanny if not scheduler and not scheduler_file and "scheduler-address" not in config: raise ValueError( "Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786" ) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host: addr = uri_from_host_port(host, 0, 0) else: # Choose appropriate address for scheduler addr = None if death_timeout is not None: death_timeout = parse_timedelta(death_timeout, "s") nannies = [ t( scheduler, scheduler_file=scheduler_file, ncores=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=preload, preload_argv=preload_argv, security=sec, contact_address=None, env={"CUDA_VISIBLE_DEVICES": cuda_visible_devices(i)}, name=name if nprocs == 1 or not name else name + "-" + str(i), **kwargs ) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler yield [n._close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield [n._start(addr) for n in nannies] while all(n.status != "closed" for n in nannies): yield gen.sleep(0.2) install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def main(host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, preload_argv, tls_ca_file, tls_cert, tls_key): logger = SchedulerLogger.getLogger() enable_proctitle_on_current() enable_proctitle_on_children() log_metrics = EdasEnv.getBool("log.metrics", False) logger.info(f"Log Metrics: {log_metrics}") plugins = [EDASSchedulerPlugin(logger)] if log_metrics else [] sec = Security( tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key, ) if not host and (tls_ca_file or tls_cert or tls_key): host = 'tls://' if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {} if _bokeh: try: from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = (BokehScheduler, { 'prefix': bokeh_prefix }) except ImportError as error: if str(error).startswith('No module named'): logger.info( 'Web dashboard not loaded. Unable to import bokeh') else: logger.info('Unable to import bokeh: %s' % str(error)) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) for plugin in plugins: logger.info(f"@SP: Adding scheduler plugin: {plugin}") scheduler.add_plugin(plugin) scheduler.start(addr) comm = Comm(scheduler) comm.start() if not preload: preload = dask.config.get('distributed.scheduler.preload', {}) if not preload_argv: preload_argv = dask.config.get('distributed.scheduler.preload-argv', {}) preload_modules(preload, parameter=scheduler, file_dir=local_directory, argv=preload_argv) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) install_signal_handlers(loop) def shutdown_scheduler(): comm.terminate() scheduler.stop() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr) def close_loop(): loop.stop() loop.close() shutdown_scheduler() atexit.register(close_loop) try: loop.start() loop.close() finally: shutdown_scheduler()
def main(host, port, http_port, bokeh_port, bokeh_external_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload): if bokeh_internal_port: print("The --bokeh-internal-port keyword has been removed.\n" "The internal bokeh server is now the default bokeh server.\n" "Use --bokeh-port %d instead" % bokeh_internal_port) sys.exit(1) if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix='scheduler-') local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_port)] = BokehScheduler scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file) scheduler.start(addr) preload_modules(preload, parameter=scheduler, file_dir=local_directory) bokeh_proc = None if _bokeh and bokeh_external_port is not None: if bokeh_external_port == 0: # This is a hack and not robust bokeh_port = open_port() # This port may be taken by the OS try: # before we successfully pass it to Bokeh from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, scheduler_address=scheduler.address, bokeh_port=bokeh_external_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info('Local Directory: %26s', local_directory) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr)
def main( host, port, bokeh_port, show, _bokeh, bokeh_whitelist, bokeh_prefix, use_xheaders, pid_file, scheduler_file, interface, local_directory, preload, preload_argv, tls_ca_file, tls_cert, tls_key, dashboard_address, ): enable_proctitle_on_current() enable_proctitle_on_children() if bokeh_port is not None: warnings.warn( "The --bokeh-port flag has been renamed to --dashboard-address. " "Consider adding ``--dashboard-address :%d`` " % bokeh_port) dashboard_address = bokeh_port sec = Security(tls_ca_file=tls_ca_file, tls_scheduler_cert=tls_cert, tls_scheduler_key=tls_key) if not host and (tls_ca_file or tls_cert or tls_key): host = "tls://" if pid_file: with open(pid_file, "w") as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) local_directory_created = False if local_directory: if not os.path.exists(local_directory): os.mkdir(local_directory) local_directory_created = True else: local_directory = tempfile.mkdtemp(prefix="scheduler-") local_directory_created = True if local_directory not in sys.path: sys.path.insert(0, local_directory) if sys.platform.startswith("linux"): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info("-" * 47) services = {} if _bokeh: try: from distributed.bokeh.scheduler import BokehScheduler services[("bokeh", dashboard_address)] = ( BokehScheduler, { "prefix": bokeh_prefix }, ) except ImportError as error: if str(error).startswith("No module named"): logger.info( "Web dashboard not loaded. Unable to import bokeh") else: logger.info("Unable to import bokeh: %s" % str(error)) scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file, security=sec) scheduler.start(addr) if not preload: preload = dask.config.get("distributed.scheduler.preload") if not preload_argv: preload_argv = dask.config.get("distributed.scheduler.preload-argv") preload_modules(preload, parameter=scheduler, file_dir=local_directory, argv=preload_argv) logger.info("Local Directory: %26s", local_directory) logger.info("-" * 47) install_signal_handlers(loop) try: loop.start() loop.close() finally: scheduler.stop() if local_directory_created: shutil.rmtree(local_directory) logger.info("End scheduler at %r", addr)
def main(scheduler, host, worker_port, http_port, nanny_port, nthreads, nprocs, nanny, name, memory_limit, pid_file, reconnect, resources, bokeh, bokeh_port, local_directory, scheduler_file, interface, death_timeout, preload, bokeh_prefix, tls_ca_file, tls_cert, tls_key): sec = Security( tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key, ) if nanny: port = nanny_port else: port = worker_port if nprocs > 1 and worker_port != 0: logger.error( "Failed to launch worker. You cannot use the --port argument when nprocs > 1." ) exit(1) if nprocs > 1 and name: logger.error( "Failed to launch worker. You cannot use the --name argument when nprocs > 1." ) exit(1) if nprocs > 1 and not nanny: logger.error( "Failed to launch worker. You cannot use the --no-nanny argument when nprocs > 1." ) exit(1) if not nthreads: nthreads = _ncores // nprocs if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {('http', http_port): HTTPWorker} if bokeh: try: from distributed.bokeh.worker import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {'prefix': bokeh_prefix}) else: result = BokehWorker services[('bokeh', bokeh_port)] = result if resources: resources = resources.replace(',', ' ').split() resources = dict(pair.split('=') for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() if nanny: kwargs = {'worker_port': worker_port} t = Nanny else: kwargs = {} if nanny_port: kwargs['service_ports'] = {'nanny': nanny_port} t = Worker if scheduler_file: while not os.path.exists(scheduler_file): sleep(0.01) for i in range(10): try: with open(scheduler_file) as f: cfg = json.load(f) scheduler = cfg['address'] break except (ValueError, KeyError): # race with scheduler on file sleep(0.01) if not scheduler: raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host or port: addr = uri_from_host_port(host, port, 0) else: # Choose appropriate address for scheduler addr = None nannies = [ t(scheduler, ncores=nthreads, services=services, name=name, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=preload, security=sec, **kwargs) for i in range(nprocs) ] @gen.coroutine def close_all(): try: if nanny: yield [n._close(timeout=2) for n in nannies] finally: loop.stop() def handle_signal(signum, frame): logger.info("Exiting on signal %d", signum) if loop._running: loop.add_callback_from_signal(loop.stop) else: exit(0) # NOTE: We can't use the generic install_signal_handlers() function from # distributed.cli.utils because we're handling the signal differently. signal.signal(signal.SIGINT, handle_signal) signal.signal(signal.SIGTERM, handle_signal) for n in nannies: n.start(addr) @gen.coroutine def run(): while all(n.status != 'closed' for n in nannies): yield gen.sleep(0.2) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker") # Clean exit: unregister all workers from scheduler loop.run_sync(close_all)
def main(host, port, http_port, bokeh_port, bokeh_internal_port, show, _bokeh, bokeh_whitelist, prefix, use_xheaders, pid_file, scheduler_file, interface): if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) if sys.platform.startswith('linux'): import resource # module fails importing on Windows soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) limit = max(soft, hard // 2) resource.setrlimit(resource.RLIMIT_NOFILE, (limit, hard)) if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) addr = uri_from_host_port(host, port, 8786) loop = IOLoop.current() logger.info('-' * 47) services = {('http', http_port): HTTPScheduler} if _bokeh: with ignoring(ImportError): from distributed.bokeh.scheduler import BokehScheduler services[('bokeh', bokeh_internal_port)] = BokehScheduler scheduler = Scheduler(loop=loop, services=services, scheduler_file=scheduler_file) scheduler.start(addr) bokeh_proc = None if _bokeh: if bokeh_port == 0: # This is a hack and not robust bokeh_port = open_port() # This port may be taken by the OS try: # before we successfully pass it to Bokeh from distributed.bokeh.application import BokehWebInterface bokeh_proc = BokehWebInterface(http_port=http_port, scheduler_address=scheduler.address, bokeh_port=bokeh_port, bokeh_whitelist=bokeh_whitelist, show=show, prefix=prefix, use_xheaders=use_xheaders, quiet=False) except ImportError: logger.info("Please install Bokeh to get Web UI") except Exception as e: logger.warn("Could not start Bokeh web UI", exc_info=True) logger.info('-' * 47) try: loop.start() loop.close() finally: scheduler.stop() if bokeh_proc: bokeh_proc.close() logger.info("End scheduler at %r", addr)
def main(scheduler, host, worker_port, listen_address, contact_address, nanny_port, nthreads, nprocs, nanny, name, memory_limit, pid_file, reconnect, resources, bokeh, bokeh_port, local_directory, scheduler_file, interface, death_timeout, preload, preload_argv, bokeh_prefix, tls_ca_file, tls_cert, tls_key): enable_proctitle_on_current() enable_proctitle_on_children() sec = Security(tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key, ) if nprocs > 1 and worker_port != 0: logger.error("Failed to launch worker. You cannot use the --port argument when nprocs > 1.") exit(1) if nprocs > 1 and not nanny: logger.error("Failed to launch worker. You cannot use the --no-nanny argument when nprocs > 1.") exit(1) if contact_address and not listen_address: logger.error("Failed to launch worker. " "Must specify --listen-address when --contact-address is given") exit(1) if nprocs > 1 and listen_address: logger.error("Failed to launch worker. " "You cannot specify --listen-address when nprocs > 1.") exit(1) if (worker_port or host) and listen_address: logger.error("Failed to launch worker. " "You cannot specify --listen-address when --worker-port or --host is given.") exit(1) try: if listen_address: (host, worker_port) = get_address_host_port(listen_address, strict=True) if contact_address: # we only need this to verify it is getting parsed (_, _) = get_address_host_port(contact_address, strict=True) else: # if contact address is not present we use the listen_address for contact contact_address = listen_address except ValueError as e: logger.error("Failed to launch worker. " + str(e)) exit(1) if nanny: port = nanny_port else: port = worker_port if not nthreads: nthreads = _ncores // nprocs if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if bokeh: try: from distributed.bokeh.worker import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {'prefix': bokeh_prefix}) else: result = BokehWorker services[('bokeh', bokeh_port)] = result if resources: resources = resources.replace(',', ' ').split() resources = dict(pair.split('=') for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() if nanny: kwargs = {'worker_port': worker_port, 'listen_address': listen_address} t = Nanny else: kwargs = {} if nanny_port: kwargs['service_ports'] = {'nanny': nanny_port} t = Worker if not scheduler and not scheduler_file and 'scheduler-address' not in config: raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host or port: addr = uri_from_host_port(host, port, 0) else: # Choose appropriate address for scheduler addr = None if death_timeout is not None: death_timeout = parse_timedelta(death_timeout, 's') nannies = [t(scheduler, scheduler_file=scheduler_file, ncores=nthreads, services=services, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=preload, preload_argv=preload_argv, security=sec, contact_address=contact_address, name=name if nprocs == 1 or not name else name + '-' + str(i), **kwargs) for i in range(nprocs)] @gen.coroutine def close_all(): # Unregister all workers from scheduler if nanny: yield [n._close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield [n._start(addr) for n in nannies] while all(n.status != 'closed' for n in nannies): yield gen.sleep(0.2) install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def main(scheduler, host, worker_port, listen_address, contact_address, nanny_port, nthreads, nprocs, nanny, name, memory_limit, pid_file, reconnect, resources, bokeh, bokeh_port, local_directory, scheduler_file, interface, death_timeout, preload, bokeh_prefix, tls_ca_file, tls_cert, tls_key): sec = Security( tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key, ) if nprocs > 1 and worker_port != 0: logger.error( "Failed to launch worker. You cannot use the --port argument when nprocs > 1." ) exit(1) if nprocs > 1 and name: logger.error( "Failed to launch worker. You cannot use the --name argument when nprocs > 1." ) exit(1) if nprocs > 1 and not nanny: logger.error( "Failed to launch worker. You cannot use the --no-nanny argument when nprocs > 1." ) exit(1) if contact_address and not listen_address: logger.error( "Failed to launch worker. " "Must specify --listen-address when --contact-address is given") exit(1) if nprocs > 1 and listen_address: logger.error("Failed to launch worker. " "You cannot specify --listen-address when nprocs > 1.") exit(1) if (worker_port or host) and listen_address: logger.error( "Failed to launch worker. " "You cannot specify --listen-address when --worker-port or --host is given." ) exit(1) try: if listen_address: (host, worker_port) = get_address_host_port(listen_address, strict=True) if contact_address: # we only need this to verify it is getting parsed (_, _) = get_address_host_port(contact_address, strict=True) else: # if contact address is not present we use the listen_address for contact contact_address = listen_address except ValueError as e: logger.error("Failed to launch worker. " + str(e)) exit(1) if nanny: port = nanny_port else: port = worker_port if not nthreads: nthreads = _ncores // nprocs if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {} if bokeh: try: from distributed.bokeh.worker import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {'prefix': bokeh_prefix}) else: result = BokehWorker services[('bokeh', bokeh_port)] = result if resources: resources = resources.replace(',', ' ').split() resources = dict(pair.split('=') for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() if nanny: kwargs = {'worker_port': worker_port, 'listen_address': listen_address} t = Nanny else: kwargs = {} if nanny_port: kwargs['service_ports'] = {'nanny': nanny_port} t = Worker if not scheduler and not scheduler_file: raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host or port: addr = uri_from_host_port(host, port, 0) else: # Choose appropriate address for scheduler addr = None nannies = [ t(scheduler, scheduler_file=scheduler_file, ncores=nthreads, services=services, name=name, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=preload, security=sec, contact_address=contact_address, **kwargs) for i in range(nprocs) ] @gen.coroutine def close_all(): # Unregister all workers from scheduler if nanny: yield [n._close(timeout=2) for n in nannies] def on_signal(signum): logger.info("Exiting on signal %d", signum) close_all() @gen.coroutine def run(): yield [n.start(addr) for n in nannies] while all(n.status != 'closed' for n in nannies): yield gen.sleep(0.2) install_signal_handlers(loop, cleanup=on_signal) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker")
def main(scheduler, host, worker_port, http_port, nanny_port, nthreads, nprocs, nanny, name, memory_limit, pid_file, reconnect, resources, bokeh, bokeh_port, local_directory, scheduler_file, interface, death_timeout, preload, bokeh_prefix, tls_ca_file, tls_cert, tls_key): sec = Security( tls_ca_file=tls_ca_file, tls_worker_cert=tls_cert, tls_worker_key=tls_key, ) if nanny: port = nanny_port else: port = worker_port if nprocs > 1 and worker_port != 0: logger.error( "Failed to launch worker. You cannot use the --port argument when nprocs > 1." ) exit(1) if nprocs > 1 and name: logger.error( "Failed to launch worker. You cannot use the --name argument when nprocs > 1." ) exit(1) if not nthreads: nthreads = _ncores // nprocs if pid_file: with open(pid_file, 'w') as f: f.write(str(os.getpid())) def del_pid_file(): if os.path.exists(pid_file): os.remove(pid_file) atexit.register(del_pid_file) services = {('http', http_port): HTTPWorker} if bokeh: try: from distributed.bokeh.worker import BokehWorker except ImportError: pass else: if bokeh_prefix: result = (BokehWorker, {'prefix': bokeh_prefix}) else: result = BokehWorker services[('bokeh', bokeh_port)] = result if resources: resources = resources.replace(',', ' ').split() resources = dict(pair.split('=') for pair in resources) resources = valmap(float, resources) else: resources = None loop = IOLoop.current() if nanny: kwargs = {'worker_port': worker_port} t = Nanny else: kwargs = {} if nanny_port: kwargs['service_ports'] = {'nanny': nanny_port} t = Worker if scheduler_file: while not os.path.exists(scheduler_file): sleep(0.01) for i in range(10): try: with open(scheduler_file) as f: cfg = json.load(f) scheduler = cfg['address'] break except (ValueError, KeyError): # race with scheduler on file sleep(0.01) if not scheduler: raise ValueError("Need to provide scheduler address like\n" "dask-worker SCHEDULER_ADDRESS:8786") nannies = [ t(scheduler, ncores=nthreads, services=services, name=name, loop=loop, resources=resources, memory_limit=memory_limit, reconnect=reconnect, local_dir=local_directory, death_timeout=death_timeout, preload=preload, security=sec, **kwargs) for i in range(nprocs) ] if interface: if host: raise ValueError("Can not specify both interface and host") else: host = get_ip_interface(interface) if host or port: addr = uri_from_host_port(host, port, 0) else: # Choose appropriate address for scheduler addr = None for n in nannies: n.start(addr) if t is Nanny: global_nannies.append(n) @gen.coroutine def run(): while all(n.status != 'closed' for n in nannies): yield gen.sleep(0.2) try: loop.run_sync(run) except (KeyboardInterrupt, TimeoutError): pass finally: logger.info("End worker") loop.close() # Clean exit: unregister all workers from scheduler loop2 = IOLoop() @gen.coroutine def f(): if nanny: w = nannies[0] with w.rpc(w.scheduler.address) as scheduler: yield gen.with_timeout(timeout=timedelta(seconds=2), future=All([ scheduler.unregister( address=n.worker_address, close=True) for n in nannies if n.process and n.worker_address ]), io_loop=loop2) loop2.run_sync(f) loop2.close() if nanny: for n in nannies: if isalive(n.process): n.process.terminate() if nanny: start = time() while (any(isalive(n.process) for n in nannies) and time() < start + 1): sleep(0.1) for nanny in nannies: nanny.stop()