async def create_cluster(self, workers=1) -> None: print('~~ creating dask cluster...') print(f'~~ workers = {workers}') # create dask scheduler self.scheduler = self.spawn( DaskScheduler, routes={ # '/': 8787, }, ) self.scheduler.ready = Future() self.scheduler_uri = f'tcp://{self.scheduler.ip}:8786' await sleep(1) # create workers self.workers = [] await self.add_workers(workers) print('~~ waiting for cluster nodes') await self.wait_for_nodes() print('~~ dask cluster ready') self.dask = DaskClient(address=self.scheduler_uri)
def generate_captchas(): # start Dask distributed client with 4 processes / 1 thread per process client = DaskClient(n_workers=6, threads_per_worker=1) # submit future functions to cluster futures = [] for i in range(10000): futures.append(client.submit(synth_captcha, pure=False)) # execute and compute results (synchronous / blocking!) results = client.gather(futures) print(len(results)) # stop & release client client.close()
async def get_client(self, asynchronous = True) -> Any: """ Creates and returns a DaskClient object for the cluster """ if not self.alive: raise SystemError('cluster is dead') from dask.distributed import Client as DaskClient from distributed.security import Security security = Security( tls_ca_file = f'{self._prefix:s}_ca.crt', tls_client_cert = f'{self._prefix:s}_node.crt', tls_client_key = f'{self._prefix:s}_node.key', require_encryption = True, ) return DaskClient( f'tls://{self._scheduler.public_ip4:s}:{self._dask_ipc:d}', security = security, asynchronous = asynchronous, )
async def init(ident: str, background_tasks: BackgroundTasks) -> bool: """ Start running an algorithm. Parameters ---------- ident : str The identifier paired with this algorithm. Returns ------- success : bool Notes ----- This function has side effects: it launches background job with algorithm class. This class runs the ``run`` function, which posts queries to Redis and process answers posted to Redis. If the algorithm class has a ``get_query`` method, the class will respond to the API request ``/get_query``. The method ``run`` should be modified to handle this. params : Dict[str, Any] Pulled from the experiment config and Redis. Here's an example YAML configuration: .. code:: yaml targets: - 1 - 2 - 3 - 4 samplers: - Random - random2 - class: Random - foo: bar """ # TODO: Better handling of exceptions if params keys don't match logger.info("backend: initializing %s", ident) config = rj.jsonget("exp_config") try: if f"state-{ident}" in rj.keys(): logger.warning(f"Initializing alg from key 'state-{ident}'") # See https://github.com/andymccurdy/redis-py/issues/1006 rj2 = Client(host="redis", port=6379, decode_responses=False) state = rj2.get(f"state-{ident}") alg = cloudpickle.loads(state) else: logger.warning(f"Initializing alg from config") params = config["samplers"][ident] _class = params.pop("class", ident) Sampler = getattr(samplers, _class) params = {k: _fmt_params(k, v) for k, v in params.items()} logger.warning("Sampler for %s = %s", ident, Sampler) common = config["sampling"]["common"] p = deepcopy(common) p.update(params) kwargs = dict(ident=ident, n=config["n"], **p) logger.warning("class=%s kwargs= %s", _class, kwargs) alg = Sampler(ident=ident, n=config["n"], **p) except Exception as e: msg = exception_to_string(e) logger.error(f"Error on alg={ident} init: {msg}") flush_logger(logger) raise ExpParsingError(status_code=500, detail=msg) SAMPLERS[ident] = alg dask_client = DaskClient("127.0.0.2:8786") logger.info("Before adding init task") background_tasks.add_task(alg.run, dask_client) logger.info("Returning") return True
def set_scheduler(self, scheduler, scheduler_host, scheduler_port=None): """ Set a scheduler by scheduler type, scheduler_host(and scheduler_port) :param scheduler: the scheduler type, should be either dask or spark :type scheduler: :class:`str` :param scheduler_host: the host address of scheduler :type scheduler_host: :class:`str` :param scheduler_port: the port of scheduler :type scheduler_port: :class:`str` """ if scheduler != "dask" and scheduler != "spark": raise MsPASSError( "scheduler should be either dask or spark but " + str(scheduler) + " is found.", "Fatal", ) prev_scheduler = self._scheduler self._scheduler = scheduler if scheduler == "spark": scheduler_host_has_port = False self._spark_master_url = scheduler_host # add spark:// prefix if not exist if "spark://" not in scheduler_host: self._spark_master_url = "spark://" + self._spark_master_url # check if spark host address contains port number already if self._spark_master_url.count(":") == 2: scheduler_host_has_port = True # add port if not scheduler_host_has_port and scheduler_port: self._spark_master_url += ":" + scheduler_port # sanity check prev_spark_context = None prev_spark_conf = None if hasattr(self, "_spark_context"): prev_spark_context = self._spark_context prev_spark_conf = self._spark_context.getConf() try: if hasattr(self, "_spark_context") and isinstance( self._spark_context, SparkContext): # update the confinguration spark_conf = self._spark_context._conf.setMaster( self._spark_master_url) else: spark_conf = (SparkConf().setAppName("mspass").setMaster( self._spark_master_url)) # stop the previous spark context # FIXME if the new context does not start, we shouldn't stop the previous here. # if prev_spark_context: # prev_spark_context.stop() # create a new spark context -> might cause error so that execute exception code spark = SparkSession.builder.config( conf=spark_conf).getOrCreate() self._spark_context = spark.sparkContext except Exception as err: # restore the spark context by the previous spark configuration if prev_spark_conf: self._spark_context = SparkContext.getOrCreate( conf=prev_spark_conf) # restore the scheduler type if self._scheduler == "spark" and prev_scheduler == "dask": self._scheduler = prev_scheduler raise MsPASSError( "Runntime error: cannot create a spark configuration with: " + self._spark_master_url, "Fatal", ) # close previous dask client if success if hasattr(self, "_dask_client"): del self._dask_client elif scheduler == "dask": scheduler_host_has_port = False self._dask_client_address = scheduler_host # check if scheduler_host contains port number already if ":" in scheduler_host: scheduler_host_has_port = True # add port if not scheduler_host_has_port: if scheduler_port: self._dask_client_address += ":" + scheduler_port else: # use to port 8786 by default if not specified self._dask_client_address += ":8786" # sanity check prev_dask_client = None if hasattr(self, "_dask_client"): prev_dask_client = self._dask_client try: # create a new dask client self._dask_client = DaskClient(self._dask_client_address) except Exception as err: # restore the dask client if exists if prev_dask_client: self._dask_client = prev_dask_client # restore the scheduler type if self._scheduler == "dask" and prev_scheduler == "spark": self._scheduler = prev_scheduler raise MsPASSError( "Runntime error: cannot create a dask client with: " + self._dask_client_address, "Fatal", ) # remove previous spark context if success setting new dask client if hasattr(self, "_spark_context"): del self._spark_context
def __init__( self, database_host=None, scheduler=None, scheduler_host=None, job_name="mspass", database_name="mspass", schema=None, collection=None, ): # job_name should be a string if database_host is not None and not type(database_host) is str: raise MsPASSError( "database_host should be a string but " + str(type(database_host)) + " is found.", "Fatal", ) if scheduler is not None and scheduler != "dask" and scheduler != "spark": raise MsPASSError( "scheduler should be either dask or spark but " + str(scheduler) + " is found.", "Fatal", ) if scheduler_host is not None and not type(scheduler_host) is str: raise MsPASSError( "scheduler_host should be a string but " + str(type(scheduler_host)) + " is found.", "Fatal", ) if job_name is not None and not type(job_name) is str: raise MsPASSError( "job_name should be a string but " + str(type(job_name)) + " is found.", "Fatal", ) if database_name is not None and not type(database_name) is str: raise MsPASSError( "database_name should be a string but " + str(type(database_name)) + " is found.", "Fatal", ) # collection should be a string if collection is not None and type(collection) is not str: raise MsPASSError( "collection should be a string but " + str(type(collection)) + " is found.", "Fatal", ) # check env variables MSPASS_DB_ADDRESS = os.environ.get("MSPASS_DB_ADDRESS") MONGODB_PORT = os.environ.get("MONGODB_PORT") MSPASS_SCHEDULER = os.environ.get("MSPASS_SCHEDULER") MSPASS_SCHEDULER_ADDRESS = os.environ.get("MSPASS_SCHEDULER_ADDRESS") DASK_SCHEDULER_PORT = os.environ.get("DASK_SCHEDULER_PORT") SPARK_MASTER_PORT = os.environ.get("SPARK_MASTER_PORT") # create a database client # priority: parameter -> env -> default database_host_has_port = False if database_host: database_address = database_host # check if database_host contains port number already if ":" in database_address: database_host_has_port = True elif MSPASS_DB_ADDRESS: database_address = MSPASS_DB_ADDRESS else: database_address = "localhost" # add port if not database_host_has_port and MONGODB_PORT: database_address += ":" + MONGODB_PORT try: self._db_client = DBClient(database_address) self._db_client.server_info() except Exception as err: raise MsPASSError( "Runntime error: cannot create a database client with: " + database_address, "Fatal", ) # set default database name self._default_database_name = database_name self._default_schema = schema self._default_collection = collection # create a Global History Manager if schema: global_history_manager_db = Database(self._db_client, database_name, db_schema=schema) else: global_history_manager_db = Database(self._db_client, database_name) self._global_history_manager = GlobalHistoryManager( global_history_manager_db, job_name, collection=collection) # set scheduler if scheduler: self._scheduler = scheduler elif MSPASS_SCHEDULER: self._scheduler = MSPASS_SCHEDULER else: self._scheduler = "dask" # scheduler configuration if self._scheduler == "spark": scheduler_host_has_port = False if scheduler_host: self._spark_master_url = scheduler_host # add spark:// prefix if not exist if "spark://" not in scheduler_host: self._spark_master_url = "spark://" + self._spark_master_url # check if spark host address contains port number already if self._spark_master_url.count(":") == 2: scheduler_host_has_port = True elif MSPASS_SCHEDULER_ADDRESS: self._spark_master_url = MSPASS_SCHEDULER_ADDRESS # add spark:// prefix if not exist if "spark://" not in MSPASS_SCHEDULER_ADDRESS: self._spark_master_url = "spark://" + self._spark_master_url else: self._spark_master_url = "local" # add port number # 1. not the default 'local' # 2. scheduler_host and does not contain port number # 3. SPARK_MASTER_PORT exists if ((scheduler_host or MSPASS_SCHEDULER_ADDRESS) and not scheduler_host_has_port and SPARK_MASTER_PORT): self._spark_master_url += ":" + SPARK_MASTER_PORT # sanity check try: spark = (SparkSession.builder.appName("mspass").master( self._spark_master_url).getOrCreate()) self._spark_context = spark.sparkContext except Exception as err: raise MsPASSError( "Runntime error: cannot create a spark configuration with: " + self._spark_master_url, "Fatal", ) elif self._scheduler == "dask": # if no defind scheduler_host and no MSPASS_SCHEDULER_ADDRESS, use local cluster to create a client if not scheduler_host and not MSPASS_SCHEDULER_ADDRESS: self._dask_client = DaskClient() else: scheduler_host_has_port = False # set host if scheduler_host: self._dask_client_address = scheduler_host # check if scheduler_host contains port number already if ":" in scheduler_host: scheduler_host_has_port = True else: self._dask_client_address = MSPASS_SCHEDULER_ADDRESS # add port if not scheduler_host_has_port and DASK_SCHEDULER_PORT: self._dask_client_address += ":" + DASK_SCHEDULER_PORT else: # use to port 8786 by default if not specified self._dask_client_address += ":8786" # sanity check try: self._dask_client = DaskClient(self._dask_client_address) except Exception as err: raise MsPASSError( "Runntime error: cannot create a dask client with: " + self._dask_client_address, "Fatal", )
def __init__(self): """Initialize a dask client.""" self.dask_client = DaskClient() self.tasks = {} self._id_counter = itertools.count()
def connect( args: argparse.Namespace ) -> typing.Tuple[dask.distributed.Client, typing.Optional[dask.distributed.LocalCluster]]: """ Connect to the dask cluster specifed by the arguments in `args` Specifically, this function uses args.cluster_location to determine whether to start a dask.distributed.LocalCluster (in case args.cluster_location is "LOCAL") or to (attempt to) connect to an existing cluster (any other value). If a local cluster is started, it will use a number of worker processes equal to args.num_procs. Each process will use args.num_threads_per_proc threads. The scheduler for the local cluster will listen to a random port. Parameters ---------- args: argparse.Namespace A namespace containing the following fields: * cluster_location * client_restart * num_procs * num_threads_per_proc Returns ------- client: dask.distributed.Client The client for the dask connection cluster: dask.distributed.LocalCluster or None If a local cluster is started, the reference to the local cluster object is returned. Otherwise, None is returned. """ from dask.distributed import Client as DaskClient from dask.distributed import LocalCluster as DaskCluster client = None cluster = None if args.cluster_location == "LOCAL": msg = "[dask_utils]: starting local dask cluster" logger.info(msg) cluster = DaskCluster(n_workers=args.num_procs, processes=True, threads_per_worker=args.num_threads_per_proc) client = DaskClient(cluster) else: msg = "[dask_utils]: attempting to connect to dask cluster: {}" msg = msg.format(args.cluster_location) logger.info(msg) client = DaskClient(address=args.cluster_location) if args.client_restart: msg = "[dask_utils]: restarting client" logger.info(msg) client.restart() return client, cluster