def initialize_client_for_ssh_cluster(scheduler_host: str, worker_hosts: List[str]) -> Client: ssh_hosts = [scheduler_host, *worker_hosts] try: cluster = SSHCluster( hosts=ssh_hosts, connect_options={"known_hosts": None}, worker_options={"nthreads": 1}, # scheduler_options={"port": 0, "dashboard_address": ":8787"} ) client = Client(cluster) except (KeyError, OSError): scheduler_address = f'{scheduler_host}:8786' client = Client(address=scheduler_address) return client
def setup_cluster(self, mode='local', cluster=None, **kwargs): if self.client is not None: raise ValueError('Client is already set - call shutdown first!') if cluster is None: if mode == 'local': cluster = LocalCluster(**kwargs) elif mode == 'ssh': cluster = SSHCluster(**kwargs) elif mode == 'slurm': raise NotImplementedError('Slurm cluster is not implemented' ' in this version!') else: raise RuntimeError('Unknown mode of setup client ' '{}!'.format(mode)) self.client = Client(cluster)
def _exec_sample_dask(func, dask_args, verbosity): from dask.distributed import Client, progress, SSHCluster python_list = None if 'remote_python' in dask_args.keys(): # start with python of the scheduler python_list = [dask_args['remote_python'][dask_args['scheduler']]] worker_list = [] for worker, n_cpus in dask_args['workers'].items(): worker_list.extend([worker] * n_cpus) if python_list is not None: # add python path of the workers python_list.extend([dask_args['remote_python'][worker]] * n_cpus) cluster = SSHCluster([dask_args['scheduler']] + worker_list, connect_options={"known_hosts": None}, worker_options={ "nthreads": 1, "n_workers": 1 }, scheduler_options={ "port": 0, "dashboard_address": ":8787" }, remote_python=python_list) client = Client(cluster) def _exec_sample(X): map_eval = client.map(func, X) if verbosity: progress(map_eval) result = client.submit(list, map_eval) return ot.Sample(result.result()) return _exec_sample, cluster, client
def execute(self, pipeline_context, execution_plan): check.inst_param(pipeline_context, "pipeline_context", SystemPipelineExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) check.param_invariant( isinstance(pipeline_context.executor, DaskExecutor), "pipeline_context", "Expected executor to be DaskExecutor got {}".format( pipeline_context.executor), ) check.invariant( pipeline_context.instance.is_persistent, "Dask execution requires a persistent DagsterInstance", ) step_levels = execution_plan.execution_step_levels() pipeline_name = pipeline_context.pipeline_def.name instance = pipeline_context.instance cluster_type = self.cluster_type if cluster_type == "local": from dask.distributed import LocalCluster cluster = LocalCluster(**self.build_dict(pipeline_name)) elif cluster_type == "yarn": from dask_yarn import YarnCluster cluster = YarnCluster(**self.build_dict(pipeline_name)) elif cluster_type == "ssh": from dask.distributed import SSHCluster cluster = SSHCluster(**self.build_dict(pipeline_name)) elif cluster_type == "pbs": from dask_jobqueue import PBSCluster cluster = PBSCluster(**self.build_dict(pipeline_name)) elif cluster_type == "moab": from dask_jobqueue import MoabCluster cluster = MoabCluster(**self.build_dict(pipeline_name)) elif cluster_type == "sge": from dask_jobqueue import SGECluster cluster = SGECluster(**self.build_dict(pipeline_name)) elif cluster_type == "lsf": from dask_jobqueue import LSFCluster cluster = LSFCluster(**self.build_dict(pipeline_name)) elif cluster_type == "slurm": from dask_jobqueue import SLURMCluster cluster = SLURMCluster(**self.build_dict(pipeline_name)) elif cluster_type == "oar": from dask_jobqueue import OARCluster cluster = OARCluster(**self.build_dict(pipeline_name)) elif cluster_type == "kube": from dask_kubernetes import KubeCluster cluster = KubeCluster(**self.build_dict(pipeline_name)) else: raise ValueError( f"Must be providing one of the following ('local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube') not {cluster_type}" ) with dask.distributed.Client(cluster) as client: execution_futures = [] execution_futures_dict = {} for step_level in step_levels: for step in step_level: # We ensure correctness in sequencing by letting Dask schedule futures and # awaiting dependencies within each step. dependencies = [] for step_input in step.step_inputs: for key in step_input.dependency_keys: dependencies.append(execution_futures_dict[key]) run_config = dict(pipeline_context.run_config, execution={"in_process": {}}) recon_repo = pipeline_context.pipeline.get_reconstructable_repository( ) dask_task_name = "%s.%s" % (pipeline_name, step.key) recon_pipeline = recon_repo.get_reconstructable_pipeline( pipeline_name) future = client.submit( query_on_dask_worker, dependencies, recon_pipeline, pipeline_context.pipeline_run, run_config, [step.key], pipeline_context.mode_def.name, instance.get_ref(), key=dask_task_name, resources=get_dask_resource_requirements(step.tags), ) execution_futures.append(future) execution_futures_dict[step.key] = future # This tells Dask to awaits the step executions and retrieve their results to the # master futures = dask.distributed.as_completed(execution_futures, with_results=True) # Allow interrupts while waiting for the results from Dask for future, result in iterate_with_context( raise_interrupts_immediately, futures): for step_event in result: check.inst(step_event, DagsterEvent) yield step_event
def execute(pipeline_context, execution_plan): check.inst_param(pipeline_context, 'pipeline_context', SystemPipelineExecutionContext) check.inst_param(execution_plan, 'execution_plan', ExecutionPlan) dask_config = pipeline_context.executor_config check.param_invariant( isinstance(pipeline_context.executor_config, DaskConfig), 'pipeline_context', 'Expected executor_config to be DaskConfig got {}'.format( pipeline_context.executor_config), ) # Checks to ensure storage is compatible with Dask configuration storage = pipeline_context.environment_dict.get('storage') check.invariant(storage.keys(), 'Must specify storage to use Dask execution') check.invariant( pipeline_context.instance.is_persistent, 'Dask execution requires a persistent DagsterInstance', ) # https://github.com/dagster-io/dagster/issues/2440 check.invariant( pipeline_context.system_storage_def.is_persistent, 'Cannot use in-memory storage with Dask, use filesystem, S3, or GCS', ) step_levels = execution_plan.execution_step_levels() pipeline_name = pipeline_context.pipeline_def.name instance = pipeline_context.instance cluster_type = dask_config.cluster_type if cluster_type == 'local': from dask.distributed import LocalCluster cluster = LocalCluster(**dask_config.build_dict(pipeline_name)) elif cluster_type == 'yarn': from dask_yarn import YarnCluster cluster = YarnCluster(**dask_config.build_dict(pipeline_name)) elif cluster_type == 'ssh': from dask.distributed import SSHCluster cluster = SSHCluster(**dask_config.build_dict(pipeline_name)) elif cluster_type == 'pbs': from dask_jobqueue import PBSCluster cluster = PBSCluster(**dask_config.build_dict(pipeline_name)) elif cluster_type == 'kube': from dask_kubernetes import KubeCluster cluster = KubeCluster(**dask_config.build_dict(pipeline_name)) else: raise ValueError( f"Must be providing one of the following ('local', 'yarn', 'ssh', 'pbs', 'kube') not {cluster_type}" ) with dask.distributed.Client(cluster) as client: execution_futures = [] execution_futures_dict = {} for step_level in step_levels: for step in step_level: # We ensure correctness in sequencing by letting Dask schedule futures and # awaiting dependencies within each step. dependencies = [] for step_input in step.step_inputs: for key in step_input.dependency_keys: dependencies.append(execution_futures_dict[key]) environment_dict = dict(pipeline_context.environment_dict, execution={'in_process': {}}) variables = { 'executionParams': { 'selector': { 'name': pipeline_name }, 'runConfigData': environment_dict, 'mode': pipeline_context.mode_def.name, 'executionMetadata': { 'runId': pipeline_context.pipeline_run.run_id }, 'stepKeys': [step.key], } } dask_task_name = '%s.%s' % (pipeline_name, step.key) future = client.submit( query_on_dask_worker, pipeline_context.pipeline. get_reconstructable_repository(), variables, dependencies, instance.get_ref(), key=dask_task_name, resources=get_dask_resource_requirements(step.tags), ) execution_futures.append(future) execution_futures_dict[step.key] = future # This tells Dask to awaits the step executions and retrieve their results to the # master for future in dask.distributed.as_completed(execution_futures): for step_event in future.result(): check.inst(step_event, DagsterEvent) yield step_event
def execute(self, pipeline_context, execution_plan): check.inst_param(pipeline_context, "pipeline_context", SystemPipelineExecutionContext) check.inst_param(execution_plan, "execution_plan", ExecutionPlan) check.param_invariant( isinstance(pipeline_context.executor, DaskExecutor), "pipeline_context", "Expected executor to be DaskExecutor got {}".format(pipeline_context.executor), ) # Checks to ensure storage is compatible with Dask configuration storage = pipeline_context.run_config.get("storage") check.invariant(storage.keys(), "Must specify storage to use Dask execution") check.invariant( pipeline_context.instance.is_persistent, "Dask execution requires a persistent DagsterInstance", ) # https://github.com/dagster-io/dagster/issues/2440 check.invariant( pipeline_context.system_storage_def.is_persistent, "Cannot use in-memory storage with Dask, use filesystem, S3, or GCS", ) step_levels = execution_plan.execution_step_levels() pipeline_name = pipeline_context.pipeline_def.name instance = pipeline_context.instance cluster_type = self.cluster_type if cluster_type == "local": from dask.distributed import LocalCluster cluster = LocalCluster(**self.build_dict(pipeline_name)) elif cluster_type == "yarn": from dask_yarn import YarnCluster cluster = YarnCluster(**self.build_dict(pipeline_name)) elif cluster_type == "ssh": from dask.distributed import SSHCluster cluster = SSHCluster(**self.build_dict(pipeline_name)) elif cluster_type == "pbs": from dask_jobqueue import PBSCluster cluster = PBSCluster(**self.build_dict(pipeline_name)) elif cluster_type == "moab": from dask_jobqueue import MoabCluster cluster = MoabCluster(**self.build_dict(pipeline_name)) elif cluster_type == "sge": from dask_jobqueue import SGECluster cluster = SGECluster(**self.build_dict(pipeline_name)) elif cluster_type == "lsf": from dask_jobqueue import LSFCluster cluster = LSFCluster(**self.build_dict(pipeline_name)) elif cluster_type == "slurm": from dask_jobqueue import SLURMCluster cluster = SLURMCluster(**self.build_dict(pipeline_name)) elif cluster_type == "oar": from dask_jobqueue import OARCluster cluster = OARCluster(**self.build_dict(pipeline_name)) elif cluster_type == "kube": from dask_kubernetes import KubeCluster cluster = KubeCluster(**self.build_dict(pipeline_name)) else: raise ValueError( f"Must be providing one of the following ('local', 'yarn', 'ssh', 'pbs', 'moab', 'sge', 'lsf', 'slurm', 'oar', 'kube') not {cluster_type}" ) with dask.distributed.Client(cluster) as client: execution_futures = [] execution_futures_dict = {} for step_level in step_levels: for step in step_level: # We ensure correctness in sequencing by letting Dask schedule futures and # awaiting dependencies within each step. dependencies = [] for step_input in step.step_inputs: for key in step_input.dependency_keys: dependencies.append(execution_futures_dict[key]) run_config = dict(pipeline_context.run_config, execution={"in_process": {}}) recon_repo = pipeline_context.pipeline.get_reconstructable_repository() variables = { "executionParams": { "selector": { "pipelineName": pipeline_name, "repositoryName": recon_repo.get_definition().name, "repositoryLocationName": "<<in_process>>", }, "runConfigData": run_config, "mode": pipeline_context.mode_def.name, "executionMetadata": {"runId": pipeline_context.pipeline_run.run_id}, "stepKeys": [step.key], } } dask_task_name = "%s.%s" % (pipeline_name, step.key) workspace = create_in_process_ephemeral_workspace( pointer=pipeline_context.pipeline.get_reconstructable_repository().pointer ) future = client.submit( query_on_dask_worker, workspace, variables, dependencies, instance.get_ref(), key=dask_task_name, resources=get_dask_resource_requirements(step.tags), ) execution_futures.append(future) execution_futures_dict[step.key] = future # This tells Dask to awaits the step executions and retrieve their results to the # master for future in dask.distributed.as_completed(execution_futures): for step_event in future.result(): check.inst(step_event, DagsterEvent) yield step_event