Пример #1
0
    async def create_cluster(self, workers=1) -> None:
        print('~~ creating dask cluster...')
        print(f'~~   workers = {workers}')

        # create dask scheduler
        self.scheduler = self.spawn(
            DaskScheduler,
            routes={
                # '/': 8787,
            },
        )
        self.scheduler.ready = Future()
        self.scheduler_uri = f'tcp://{self.scheduler.ip}:8786'

        await sleep(1)

        # create workers
        self.workers = []
        await self.add_workers(workers)

        print('~~ waiting for cluster nodes')
        await self.wait_for_nodes()

        print('~~ dask cluster ready')
        self.dask = DaskClient(address=self.scheduler_uri)
Пример #2
0
def generate_captchas():
    # start Dask distributed client with 4 processes / 1 thread per process
    client = DaskClient(n_workers=6, threads_per_worker=1)
    # submit future functions to cluster
    futures = []
    for i in range(10000):
        futures.append(client.submit(synth_captcha, pure=False))
    # execute and compute results (synchronous / blocking!)
    results = client.gather(futures)
    print(len(results))
    # stop & release client
    client.close()
Пример #3
0
    async def get_client(self, asynchronous = True) -> Any:
        """
        Creates and returns a DaskClient object for the cluster
        """

        if not self.alive:
            raise SystemError('cluster is dead')

        from dask.distributed import Client as DaskClient
        from distributed.security import Security

        security = Security(
            tls_ca_file = f'{self._prefix:s}_ca.crt',
            tls_client_cert = f'{self._prefix:s}_node.crt',
            tls_client_key = f'{self._prefix:s}_node.key',
            require_encryption = True,
        )

        return DaskClient(
            f'tls://{self._scheduler.public_ip4:s}:{self._dask_ipc:d}',
            security = security,
            asynchronous = asynchronous,
        )
Пример #4
0
async def init(ident: str, background_tasks: BackgroundTasks) -> bool:
    """
    Start running an algorithm.

    Parameters
    ----------
    ident : str
        The identifier paired with this algorithm.

    Returns
    -------
    success : bool

    Notes
    -----
    This function has side effects: it launches background job with
    algorithm class. This class runs the ``run`` function, which posts
    queries to Redis and process answers posted to Redis.

    If the algorithm class has a ``get_query`` method, the class will
    respond to the API request ``/get_query``. The method ``run`` should
    be modified to handle this.

    params : Dict[str, Any]
        Pulled from the experiment config and Redis.
        Here's an example YAML configuration:

    .. code:: yaml

       targets:
         - 1
         - 2
         - 3
         - 4
       samplers:
         - Random
         - random2
           - class: Random
           - foo: bar

    """
    # TODO: Better handling of exceptions if params keys don't match
    logger.info("backend: initializing %s", ident)
    config = rj.jsonget("exp_config")

    try:
        if f"state-{ident}" in rj.keys():
            logger.warning(f"Initializing alg from key 'state-{ident}'")
            # See https://github.com/andymccurdy/redis-py/issues/1006
            rj2 = Client(host="redis", port=6379, decode_responses=False)
            state = rj2.get(f"state-{ident}")
            alg = cloudpickle.loads(state)
        else:
            logger.warning(f"Initializing alg from config")
            params = config["samplers"][ident]
            _class = params.pop("class", ident)
            Sampler = getattr(samplers, _class)
            params = {k: _fmt_params(k, v) for k, v in params.items()}
            logger.warning("Sampler for %s = %s", ident, Sampler)
            common = config["sampling"]["common"]
            p = deepcopy(common)
            p.update(params)
            kwargs = dict(ident=ident, n=config["n"], **p)
            logger.warning("class=%s kwargs= %s", _class, kwargs)
            alg = Sampler(ident=ident, n=config["n"], **p)
    except Exception as e:
        msg = exception_to_string(e)
        logger.error(f"Error on alg={ident} init: {msg}")
        flush_logger(logger)
        raise ExpParsingError(status_code=500, detail=msg)

    SAMPLERS[ident] = alg

    dask_client = DaskClient("127.0.0.2:8786")
    logger.info("Before adding init task")
    background_tasks.add_task(alg.run, dask_client)
    logger.info("Returning")
    return True
Пример #5
0
    def set_scheduler(self, scheduler, scheduler_host, scheduler_port=None):
        """
        Set a scheduler by scheduler type, scheduler_host(and scheduler_port)

        :param scheduler: the scheduler type, should be either dask or spark
        :type scheduler: :class:`str`
        :param scheduler_host: the host address of scheduler
        :type scheduler_host: :class:`str`
        :param scheduler_port: the port of scheduler
        :type scheduler_port: :class:`str`
        """
        if scheduler != "dask" and scheduler != "spark":
            raise MsPASSError(
                "scheduler should be either dask or spark but " +
                str(scheduler) + " is found.",
                "Fatal",
            )

        prev_scheduler = self._scheduler
        self._scheduler = scheduler
        if scheduler == "spark":
            scheduler_host_has_port = False

            self._spark_master_url = scheduler_host
            # add spark:// prefix if not exist
            if "spark://" not in scheduler_host:
                self._spark_master_url = "spark://" + self._spark_master_url
            # check if spark host address contains port number already
            if self._spark_master_url.count(":") == 2:
                scheduler_host_has_port = True

            # add port
            if not scheduler_host_has_port and scheduler_port:
                self._spark_master_url += ":" + scheduler_port

            # sanity check
            prev_spark_context = None
            prev_spark_conf = None
            if hasattr(self, "_spark_context"):
                prev_spark_context = self._spark_context
                prev_spark_conf = self._spark_context.getConf()
            try:
                if hasattr(self, "_spark_context") and isinstance(
                        self._spark_context, SparkContext):
                    # update the confinguration
                    spark_conf = self._spark_context._conf.setMaster(
                        self._spark_master_url)
                else:
                    spark_conf = (SparkConf().setAppName("mspass").setMaster(
                        self._spark_master_url))
                # stop the previous spark context
                # FIXME if the new context does not start, we shouldn't stop the previous here.
                # if prev_spark_context:
                #    prev_spark_context.stop()
                # create a new spark context -> might cause error so that execute exception code
                spark = SparkSession.builder.config(
                    conf=spark_conf).getOrCreate()
                self._spark_context = spark.sparkContext
            except Exception as err:
                # restore the spark context by the previous spark configuration
                if prev_spark_conf:
                    self._spark_context = SparkContext.getOrCreate(
                        conf=prev_spark_conf)
                # restore the scheduler type
                if self._scheduler == "spark" and prev_scheduler == "dask":
                    self._scheduler = prev_scheduler
                raise MsPASSError(
                    "Runntime error: cannot create a spark configuration with: "
                    + self._spark_master_url,
                    "Fatal",
                )
            # close previous dask client if success
            if hasattr(self, "_dask_client"):
                del self._dask_client

        elif scheduler == "dask":
            scheduler_host_has_port = False
            self._dask_client_address = scheduler_host
            # check if scheduler_host contains port number already
            if ":" in scheduler_host:
                scheduler_host_has_port = True

            # add port
            if not scheduler_host_has_port:
                if scheduler_port:
                    self._dask_client_address += ":" + scheduler_port
                else:
                    # use to port 8786 by default if not specified
                    self._dask_client_address += ":8786"

            # sanity check
            prev_dask_client = None
            if hasattr(self, "_dask_client"):
                prev_dask_client = self._dask_client
            try:
                # create a new dask client
                self._dask_client = DaskClient(self._dask_client_address)
            except Exception as err:
                # restore the dask client if exists
                if prev_dask_client:
                    self._dask_client = prev_dask_client
                # restore the scheduler type
                if self._scheduler == "dask" and prev_scheduler == "spark":
                    self._scheduler = prev_scheduler
                raise MsPASSError(
                    "Runntime error: cannot create a dask client with: " +
                    self._dask_client_address,
                    "Fatal",
                )
            # remove previous spark context if success setting new dask client
            if hasattr(self, "_spark_context"):
                del self._spark_context
Пример #6
0
    def __init__(
        self,
        database_host=None,
        scheduler=None,
        scheduler_host=None,
        job_name="mspass",
        database_name="mspass",
        schema=None,
        collection=None,
    ):
        # job_name should be a string
        if database_host is not None and not type(database_host) is str:
            raise MsPASSError(
                "database_host should be a string but " +
                str(type(database_host)) + " is found.",
                "Fatal",
            )
        if scheduler is not None and scheduler != "dask" and scheduler != "spark":
            raise MsPASSError(
                "scheduler should be either dask or spark but " +
                str(scheduler) + " is found.",
                "Fatal",
            )
        if scheduler_host is not None and not type(scheduler_host) is str:
            raise MsPASSError(
                "scheduler_host should be a string but " +
                str(type(scheduler_host)) + " is found.",
                "Fatal",
            )
        if job_name is not None and not type(job_name) is str:
            raise MsPASSError(
                "job_name should be a string but " + str(type(job_name)) +
                " is found.",
                "Fatal",
            )
        if database_name is not None and not type(database_name) is str:
            raise MsPASSError(
                "database_name should be a string but " +
                str(type(database_name)) + " is found.",
                "Fatal",
            )
        # collection should be a string
        if collection is not None and type(collection) is not str:
            raise MsPASSError(
                "collection should be a string but " + str(type(collection)) +
                " is found.",
                "Fatal",
            )

        # check env variables
        MSPASS_DB_ADDRESS = os.environ.get("MSPASS_DB_ADDRESS")
        MONGODB_PORT = os.environ.get("MONGODB_PORT")
        MSPASS_SCHEDULER = os.environ.get("MSPASS_SCHEDULER")
        MSPASS_SCHEDULER_ADDRESS = os.environ.get("MSPASS_SCHEDULER_ADDRESS")
        DASK_SCHEDULER_PORT = os.environ.get("DASK_SCHEDULER_PORT")
        SPARK_MASTER_PORT = os.environ.get("SPARK_MASTER_PORT")

        # create a database client
        # priority: parameter -> env -> default
        database_host_has_port = False
        if database_host:
            database_address = database_host
            # check if database_host contains port number already
            if ":" in database_address:
                database_host_has_port = True

        elif MSPASS_DB_ADDRESS:
            database_address = MSPASS_DB_ADDRESS
        else:
            database_address = "localhost"
        # add port
        if not database_host_has_port and MONGODB_PORT:
            database_address += ":" + MONGODB_PORT

        try:
            self._db_client = DBClient(database_address)
            self._db_client.server_info()
        except Exception as err:
            raise MsPASSError(
                "Runntime error: cannot create a database client with: " +
                database_address,
                "Fatal",
            )

        # set default database name
        self._default_database_name = database_name
        self._default_schema = schema
        self._default_collection = collection

        # create a Global History Manager
        if schema:
            global_history_manager_db = Database(self._db_client,
                                                 database_name,
                                                 db_schema=schema)
        else:
            global_history_manager_db = Database(self._db_client,
                                                 database_name)
        self._global_history_manager = GlobalHistoryManager(
            global_history_manager_db, job_name, collection=collection)

        # set scheduler
        if scheduler:
            self._scheduler = scheduler
        elif MSPASS_SCHEDULER:
            self._scheduler = MSPASS_SCHEDULER
        else:
            self._scheduler = "dask"

        # scheduler configuration
        if self._scheduler == "spark":
            scheduler_host_has_port = False
            if scheduler_host:
                self._spark_master_url = scheduler_host
                # add spark:// prefix if not exist
                if "spark://" not in scheduler_host:
                    self._spark_master_url = "spark://" + self._spark_master_url
                # check if spark host address contains port number already
                if self._spark_master_url.count(":") == 2:
                    scheduler_host_has_port = True

            elif MSPASS_SCHEDULER_ADDRESS:
                self._spark_master_url = MSPASS_SCHEDULER_ADDRESS
                # add spark:// prefix if not exist
                if "spark://" not in MSPASS_SCHEDULER_ADDRESS:
                    self._spark_master_url = "spark://" + self._spark_master_url
            else:
                self._spark_master_url = "local"

            # add port number
            # 1. not the default 'local'
            # 2. scheduler_host and does not contain port number
            # 3. SPARK_MASTER_PORT exists
            if ((scheduler_host or MSPASS_SCHEDULER_ADDRESS)
                    and not scheduler_host_has_port and SPARK_MASTER_PORT):
                self._spark_master_url += ":" + SPARK_MASTER_PORT

            # sanity check
            try:
                spark = (SparkSession.builder.appName("mspass").master(
                    self._spark_master_url).getOrCreate())
                self._spark_context = spark.sparkContext
            except Exception as err:
                raise MsPASSError(
                    "Runntime error: cannot create a spark configuration with: "
                    + self._spark_master_url,
                    "Fatal",
                )

        elif self._scheduler == "dask":
            # if no defind scheduler_host and no MSPASS_SCHEDULER_ADDRESS, use local cluster to create a client
            if not scheduler_host and not MSPASS_SCHEDULER_ADDRESS:
                self._dask_client = DaskClient()
            else:
                scheduler_host_has_port = False
                # set host
                if scheduler_host:
                    self._dask_client_address = scheduler_host
                    # check if scheduler_host contains port number already
                    if ":" in scheduler_host:
                        scheduler_host_has_port = True
                else:
                    self._dask_client_address = MSPASS_SCHEDULER_ADDRESS

                # add port
                if not scheduler_host_has_port and DASK_SCHEDULER_PORT:
                    self._dask_client_address += ":" + DASK_SCHEDULER_PORT
                else:
                    # use to port 8786 by default if not specified
                    self._dask_client_address += ":8786"
                # sanity check
                try:
                    self._dask_client = DaskClient(self._dask_client_address)
                except Exception as err:
                    raise MsPASSError(
                        "Runntime error: cannot create a dask client with: " +
                        self._dask_client_address,
                        "Fatal",
                    )
Пример #7
0
 def __init__(self):
     """Initialize a dask client."""
     self.dask_client = DaskClient()
     self.tasks = {}
     self._id_counter = itertools.count()
Пример #8
0
def connect(
    args: argparse.Namespace
) -> typing.Tuple[dask.distributed.Client,
                  typing.Optional[dask.distributed.LocalCluster]]:
    """ Connect to the dask cluster specifed by the arguments in `args`

    Specifically, this function uses args.cluster_location to determine whether
    to start a dask.distributed.LocalCluster (in case args.cluster_location is
    "LOCAL") or to (attempt to) connect to an existing cluster (any other
    value).

    If a local cluster is started, it will use a number of worker processes
    equal to args.num_procs. Each process will use args.num_threads_per_proc
    threads. The scheduler for the local cluster will listen to a random port.

    Parameters
    ----------
    args: argparse.Namespace
        A namespace containing the following fields:
        
        * cluster_location
        * client_restart
        * num_procs
        * num_threads_per_proc

    Returns
    -------
    client: dask.distributed.Client
        The client for the dask connection

    cluster: dask.distributed.LocalCluster or None
        If a local cluster is started, the reference to the local cluster
        object is returned. Otherwise, None is returned.
    """

    from dask.distributed import Client as DaskClient
    from dask.distributed import LocalCluster as DaskCluster

    client = None
    cluster = None

    if args.cluster_location == "LOCAL":

        msg = "[dask_utils]: starting local dask cluster"
        logger.info(msg)

        cluster = DaskCluster(n_workers=args.num_procs,
                              processes=True,
                              threads_per_worker=args.num_threads_per_proc)

        client = DaskClient(cluster)

    else:
        msg = "[dask_utils]: attempting to connect to dask cluster: {}"
        msg = msg.format(args.cluster_location)
        logger.info(msg)

        client = DaskClient(address=args.cluster_location)

        if args.client_restart:
            msg = "[dask_utils]: restarting client"
            logger.info(msg)
            client.restart()

    return client, cluster