def ctx(request, dask_executor): if request.param == 'inline': yield Context.make_with('inline') elif request.param == "dask_executor": yield Context(executor=dask_executor) elif request.param == "delayed_default": yield Context(executor=DelayedJobExecutor()) elif request.param == "delayed_dist": with distributed.Client(n_workers=2, threads_per_worker=4, processes=True) as _: yield Context(executor=DelayedJobExecutor()) elif request.param == "dask_make_default": try: ctx = Context.make_with('dask-make-default') yield ctx finally: # cleanup: Close cluster and client # This is also tested below, here just to make # sure things behave as expected. assert isinstance(ctx.executor, DaskJobExecutor) ctx.executor.is_local = True ctx.close() elif request.param == "dask_integration": with distributed.Client(n_workers=2, threads_per_worker=4, processes=False) as _: yield Context.make_with("dask-integration") elif request.param == "concurrent": yield Context.make_with("threads") elif request.param == "delayed": yield Context(executor=DelayedJobExecutor())
def setup_executor(address=None, n_workers=None, threads_per_worker=1, **kwds): """ Setup a Dask distributed cluster scheduler client Parameters ---------- address : str, optional This can be the address of a ``Scheduler`` server, like a string ``'127.0.0.1:8786'``. If ``None``, sets up a ``LocalCluster`` n_workers : int, optional Number of workers. Only used if setting up a ``LocalCluster`` threads_per_worker : int, optional Number of threads per worker kwds Additional options passed to :py:func:`distributed.Client` Returns ------- distributed.Client Distributed compute client """ import distributed try: client = distributed.Client(address=address, n_workers=n_workers, threads_per_worker=threads_per_worker, **kwds) except Exception as e: logger.exception('Could not start `distributed` cluster') raise else: return client
def ee_dask_deploy(config, pb_id, image, n_workers=1, buffers=[], secrets=[]): """Deploy Dask execution engine. :param config: configuration DB handle :param pb_id: processing block ID :param image: Docker image to deploy :param n_workers: number of Dask workers :param buffers: list of buffers to mount on Dask workers :param secrets: list of secrets to mount on Dask workers :return: deployment ID and Dask client handle """ # Make deployment deploy_id = "proc-{}-dask".format(pb_id) values = {"image": image, "worker.replicas": n_workers} for i, b in enumerate(buffers): values["buffers[{}]".format(i)] = b for i, s in enumerate(secrets): values["secrets[{}]".format(i)] = s deploy = ska_sdp_config.Deployment( deploy_id, "helm", {"chart": "dask", "values": values} ) for txn in config.txn(): txn.create_deployment(deploy) # Wait for scheduler to become available scheduler = deploy_id + "-scheduler." + os.environ["SDP_HELM_NAMESPACE"] + ":8786" client = None while client is None: try: client = distributed.Client(scheduler, timeout=1) except: pass return deploy_id, client
def test_retrieve() -> None: cluster = distributed.LocalCluster( ip='localhost:8786', # I want a bokeh interface to check progress dashboard_address='localhost:8787', # single process, single thread allows ctrl+C backtrace to # show where the code is getting stuck. Otherwise, it will say, # "I'm stuck waiting for other processes." It also makes # time_code more meaningful processes=False, threads_per_worker=1, ) # TODO: put this in a reusable module with distributed.Client(cluster): # disable the cache, because I don't want to persist these results # in the cloud for cached_func in [ retrieve.get_rfs, retrieve.get_paragraphs, retrieve.get_raw_forms, retrieve.get_indexes ]: assert isinstance(cached_func, Cache) cast(Cache, cached_func).disabled = True rfs = dask.bag.zip( # pylint: disable=unused-variable retrieve.get_indexes('10-K', 1995, 1), retrieve.get_rfs(1995, 1)).take(10, npartitions=1)
def fullReconstruction(self): from xicam.Tomography.widgets.volumeviewer import VolumeViewer volumeviewer = VolumeViewer() self.recontabs.addTab(volumeviewer, '????') currentitem = self.headermodel.item(self.rawtabview.currentIndex()) if not currentitem: msg.showMessage('Error: You must open files before reconstructing.') try: msg.showBusy() msg.showMessage('Running slice reconstruction...', level=msg.INFO) currentheader = self.headermodel.item(self.rawtabview.currentIndex()).header readprocess = self.workflow.processes[0] # hopefully! TODO: require a readprocess first readprocess.path.value = currentheader.startdoc['path'] numofsinograms = currentheader.meta_array('primary').shape[1] executor = DaskExecutor() client = distributed.Client() def chunkiterator(workflow): for i in range(0, int(numofsinograms), int(readprocess.chunksize.value)): readprocess.sinoindex.value = i yield executor.execute(workflow) _reconthread = QThreadFutureIterator(chunkiterator, self.workflow, callback_slot=partial(self.showReconstruction, mode=self.fullrecon), except_slot=self.exceptionCallback) _reconthread.start() except Exception as ex: msg.logError(ex) msg.showReady() msg.clearMessage()
def get_dask( self, ca_path="dask_ca.crt", client_cert_path="dask_client_cert.pem", hostname=None, port=8786, ): with open(ca_path, "w") as fout: fout.write(self.config["tls_ca"]) userproxy_path = os.environ.get("X509_USER_PROXY", "/tmp/x509up_u%d" % os.getuid()) with open(userproxy_path, "rb") as fin: userproxy = fin.read() result = self.api.post("/clientkey", data={"proxycert": userproxy}) if result.status_code == 401: raise RuntimeError( "Authorization denied while retrieving dask certificate") elif result.status_code != 200: raise RuntimeError("Error while retrieving dask certificate") with open(client_cert_path, "w") as fout: fout.write(result.text) sec = distributed.security.Security( tls_ca_file=ca_path, tls_client_cert=client_cert_path, require_encryption=True, ) if hostname is None: hostname = self.hostname url = f"tls://{hostname}:{port}" return distributed.Client(url, security=sec)
def setup(self): try: import distributed except ImportError: raise NotImplementedError self.client = distributed.Client() self.write = create_delayed_write()
def get_client() -> distributed.Client: address = os.environ.get('SCHEDULER_PORT', None) for _ in range(10): try: client = distributed.Client(address=address) except OSError: continue else: break else: # try > else > break was never hit raise ValueError('Could not connect to scheduler') n_workers_ = os.environ.get('N_WORKERS', None) if n_workers_ is not None: n_workers = int(n_workers_) logging.debug('Waiting for: %d workers', n_workers) client.wait_for_workers(n_workers) egg_url = os.environ.get('DEPLOY_EGG', None) if egg_url is not None: logging.debug('Downloading and uploading egg: %s', egg_url) with tempfile.TemporaryDirectory() as tempdir: egg_path = Path(tempdir) / 'code.egg' src = urllib.request.urlopen(egg_url) with egg_path.open('wb') as dst: shutil.copyfileobj(src, dst) client.upload_file(str(egg_path)) return client
def executor(kind: str, max_workers: int, daemon=True) -> typing.Iterator[Executor]: """General purpose utility to get an executor with its as_completed handler This allows us to easily use other executors as needed. """ if kind == "thread": with ThreadPoolExecutor(max_workers=max_workers) as pool_t: yield pool_t elif kind == "process": with ProcessPoolExecutor(max_workers=max_workers) as pool_p: yield pool_p elif kind in ["dask", "dask-process", "dask-thread"]: import dask import distributed from distributed.cfexecutor import ClientExecutor processes = kind == "dask" or kind == "dask-process" with dask.config.set({"distributed.worker.daemon": daemon}): with distributed.LocalCluster( n_workers=max_workers, processes=processes, ) as cluster: with distributed.Client(cluster) as client: yield ClientExecutor(client) else: raise NotImplementedError("That kind is not implemented")
def test_prctl_on_docker(module_scoped_container_getter, tmp_path): network_info = module_scoped_container_getter.get("scheduler").network_info[0] client = distributed.Client( f"tcp://{network_info.hostname}:{network_info.host_port}", set_as_default=False ) core_test(client, tmp_path)
def dask_client(): cluster = distributed.LocalCluster(n_workers=3, threads_per_worker=1) client = distributed.Client(cluster) yield client client.close() cluster.close()
def test_use_distributed(): # This Client is pretty cheap to start # since it only uses threads with distributed.Client(n_workers=1, threads_per_worker=1, processes=False) as c: ctx = Context.make_with("dask-integration") assert isinstance(ctx.executor, DaskJobExecutor) assert ctx.executor.client is c
def client(self, attempts=10, **kwargs): '''Wait for scheduler to be initialized and return Client(self)''' block(self.instances[0][2]) for i in reversed(range(attempts)): try: return distributed.Client(self, **kwargs) except (TimeoutError, ConnectionRefusedError, OSError) as e: if i == 0: raise e
def external_client(): # Explicitly specify we want only 4 workers so that when running on # continuous integration we don't request too many. cluster = distributed.LocalCluster(n_workers=4) client = distributed.Client(cluster) yield client client.close() cluster.close()
def get_context(self): """Returns Dask Client for Scheduler""" details = self.get_config_data() if details is not None: print("Connect to Dask: %s" % details["master_url"]) client = distributed.Client(details["master_url"]) return client return None
def dispatch_computations(job_args, tmp_dir, timestamp): client = distributed.Client('localhost:8786') webbrowser.open('http://localhost:8787') jobs = [] for job_arg in job_args: job = client.submit(compute, *job_arg) jobs.append(job) check_jobs(jobs, job_args, tmp_dir, timestamp)
def pipeline_scan(st, segments=None, cl=None, host=None, cfile=None, vys_timeout=vys_timeout_default, mem_read=0., mem_search=0., throttle=False, mockseg=None): """ Given rfpipe state and dask distributed client, run search pipline. """ if cl is None: if host is None: cl = distributed.Client( n_workers=1, threads_per_worker=16, resources={ "READER": 1, "MEMORY": 16e9 }, local_dir="/lustre/evla/test/realfast/scratch") else: cl = distributed.Client('{0}:{1}'.format(host, '8786')) if not isinstance(segments, list): segments = list(range(st.nsegment)) futures = [] sleeptime = throttle * 0.8 * st.nints * st.inttime / st.nsegment # bit shorter than scan for segment in segments: futures.append( pipeline_seg(st, segment, cl=cl, cfile=cfile, vys_timeout=vys_timeout, mem_read=mem_read, mem_search=mem_search, mockseg=mockseg)) if throttle: sleep(sleeptime) return futures # list of tuples of futures (seg, data, cc, acc)
def check_dask(self): try: import distributed client = distributed.Client(self.nodes[0].strip() + ":8786") print "Found %d workers: %s" % (len(brokers.keys()), str(brokers)) return client.scheduler_info() except: pass return None
def test_executor_disables_watch_worker_events_with_false(self): with distributed.Client(n_workers=1, processes=False, set_as_default=False) as client: executor = DaskExecutor(address=client.scheduler.address, watch_worker_status=False) with executor.start(): assert executor.watch_worker_status is False assert executor._watch_dask_events_task is None
def test_bias_batch_recommend_dask(ml_folds: MLFolds): algo = Bias(damping=5) algo = TopN(algo) with closing(distributed.Client()), joblib.parallel_backend('dask'): recs = ml_folds.eval_all(algo, dask=True) assert isinstance(recs, ddf.DataFrame) ml_folds.check_positive_ndcg(recs)
def dask_client(mock_service_envs: None) -> Iterable[distributed.Client]: print(pformat(dask.config.get("distributed"))) with distributed.LocalCluster( worker_class=distributed.Worker, **{ "resources": {"CPU": 10, "GPU": 10, "MPI": 1}, "preload": "simcore_service_dask_sidecar.tasks", }, ) as cluster: with distributed.Client(cluster) as client: yield client
def test_connect_to_running_cluster(self): with distributed.Client(processes=False, set_as_default=False) as client: executor = DaskExecutor(address=client.scheduler.address) assert executor.address == client.scheduler.address assert executor.cluster_class is None assert executor.cluster_kwargs is None assert executor.client_kwargs == {"set_as_default": False} with executor.start(): res = executor.wait(executor.submit(lambda x: x + 1, 1)) assert res == 2
def setup(self): try: import distributed except ImportError: raise NotImplementedError() # TODO: Lazily skipped in CI as it is very demanding and slow. # Improve times and remove errors. _skip_slow() self.client = distributed.Client() self.write = create_delayed_write()
def test_executor_enables_watch_worker_events_with_true(self): with distributed.Client(n_workers=1, processes=False, set_as_default=False) as client: executor = DaskExecutor( address=client.scheduler.address, watch_worker_status=True, adapt_kwargs={"maximum": 4}, ) with executor.start(): assert executor.watch_worker_status is True assert executor._watch_dask_events_task is not None
def init_client(processes, max_memory): memory_limit = int(max_memory / processes) memory_limit = '{0:d}MB'.format(memory_limit) logger.info( 'Initialising client with {0:d} workers and {1:s} per worker'.format( processes, memory_limit)) cluster = distributed.LocalCluster(n_workers=processes, threads_per_worker=1, memory_limit=memory_limit, local_directory='/scratch/u/u300636') client = distributed.Client(cluster) logger.info('Initialised client: {0}'.format(client)) return client
def execute(self, wf, client=None): # global client, graph services = {"machines": [ { "name": "freyja", "address": "freyja.nsls2.bnl.gov", "port": 22, "username": "******", "password": "******", "environment": {"PYTHONUSERBASE": "/tmp"}, "config_dir": "/tmp/camera" } ], "graph": { "configure": [ { "machine": "freyja", "apps": [ {"name": "dask/dask-scheduler"} ], "tasks": ["dask-cluster"] } ] } } services = json.dumps(services) print(services) graph = task_graph.Graph() graph.parse_stream(services) graph.start_tasks() graph.connect() graph.execute() meta_data = graph.machines[0].tasks[0].request_meta_data() local_port = graph.machines[0].node.get_free_local_port() remote_port = meta_data[0][0] print(local_port, remote_port) graph.machines[0].node.forward_tunnel(local_port, "localhost", remote_port) client = distributed.Client("tcp://localhost:" + str(local_port)) return super(CamLinkExecutor, self).execute(wf, client)[0]
def start(self): if (self.tls_ca) or (self.tls_key) or (self.tls_cert): from distributed.security import Security security = Security( tls_client_key=self.tls_key, tls_client_cert=self.tls_cert, tls_ca_file=self.tls_ca, ) else: security = None self.client = distributed.Client(self.cluster_address, security=security) self.futures = {}
def test_setup_executor_distributed(n_workers=1, threads_per_worker=2): cluster = distributed.LocalCluster(n_workers=n_workers, threads_per_worker=threads_per_worker) client = distributed.Client(cluster) address = cluster.scheduler.address test = executor.setup_executor(address) assert test.scheduler.address == cluster.scheduler.address assert client.scheduler_info() == test.scheduler_info() test.close() cluster.close() client.close()
def start_jupyterlab(): scheduler_address = get_scheduler_address() client = distributed.Client(f'{scheduler_address}') host = client.run_on_scheduler(socket.gethostname) userid = getpass.getuser() with open(LOG_DIRECTORY + 'jupyterlab.log', 'wb') as fp: subprocess.run(f'jupyter-lab --port=7787 --no-browser &', shell=SHELL, check=True, stdout=fp, stderr=subprocess.STDOUT) ssh_jpylab = f"ssh -t -L 7787:localhost:7787 -L 8787:localhost:8787 {userid}@theta.alcf.anl.gov ssh -t -L 7787:localhost:7787 -L 8787:localhost:8787 {MOMNODE} ssh -t -L 7787:localhost:7787 -L 8787:localhost:8787 {host}" print( f"\nTo connect to JupyterLab and Dask dashboard, execute the following command in a shell on your local machine:\n {ssh_jpylab}\n" ) logging.info( f"To connect to JupyterLab and Dask dashboard, execute the following command in a shell on your local machine:\n {ssh_jpylab}\n" ) jpylab_url = '' counter = 0 while jpylab_url == '': time.sleep(1) #f"sed -n '/^[^[].*localhost:7787/p' {LOG_DIRECTORY}jupyterlab.log", w = subprocess.run( f"jupyter server list | sed -n '/localhost:7787/ s/ .*//p'", shell=SHELL, capture_output=True, text=True) jpylab_url = w.stdout.strip() counter += 1 if counter > 20: jpylab_url = 'http://localhost:7787/' break print( f"To open JupyterLab, go to (see log file {LOG_DIRECTORY}jupyterlab.log):\n {jpylab_url}\n" ) logging.info( f"To open JupyterLab, go to (see log file {LOG_DIRECTORY}jupyterlab.log):\n {jpylab_url}\n" ) print( f"To open the Dask dashboard, go to:\n http://localhost:8787/status\n" ) logging.info( f"To open the Dask dashboard, go to:\n http://localhost:8787/status\n" )
def start(self): if self.tls_ca or self.tls_key or self.tls_cert: from distributed.security import Security security = Security( tls_client_key=self.tls_key, tls_client_cert=self.tls_cert, tls_ca_file=self.tls_ca, require_encryption=True, ) else: security = None self.client = distributed.Client(self.cluster_address, security=security) self.futures = {}