def ee_dask_deploy(config, pb_id, image, n_workers=1, buffers=[], secrets=[]):
    """Deploy Dask execution engine.

    :param config: configuration DB handle
    :param pb_id: processing block ID
    :param image: Docker image to deploy
    :param n_workers: number of Dask workers
    :param buffers: list of buffers to mount on Dask workers
    :param secrets: list of secrets to mount on Dask workers
    :return: deployment ID and Dask client handle

    """
    # Make deployment
    deploy_id = "proc-{}-dask".format(pb_id)
    values = {"image": image, "worker.replicas": n_workers}
    for i, b in enumerate(buffers):
        values["buffers[{}]".format(i)] = b
    for i, s in enumerate(secrets):
        values["secrets[{}]".format(i)] = s
    deploy = ska_sdp_config.Deployment(
        deploy_id, "helm", {"chart": "dask", "values": values}
    )
    for txn in config.txn():
        txn.create_deployment(deploy)

    # Wait for scheduler to become available
    scheduler = deploy_id + "-scheduler." + os.environ["SDP_HELM_NAMESPACE"] + ":8786"
    client = None
    while client is None:
        try:
            client = distributed.Client(scheduler, timeout=1)
        except:
            pass

    return deploy_id, client
示例#2
0
def create_deployment(config, pb):
    logger.info("Deploying DALiuGE...")
    deploy_id = pb.pb_id + "-daliuge"
    deployment = ska_sdp_config.Deployment(deploy_id, "helm", {
        'chart': 'daliuge',
    })
    for txn in config.txn():
        txn.create_deployment(deployment)
    return deployment
def create_deployment(config, pb):
    logger.info("Deploying DALiuGE...")
    deploy_id = "proc-{}-daliuge".format(pb.id)
    deployment = ska_sdp_config.Deployment(
        deploy_id,
        "helm",
        {
            "chart": "daliuge",
        },
    )
    for txn in config.txn():
        txn.create_deployment(deployment)
    return deployment
    def _start_workflow(txn, pb_id):
        """
        Start the workflow for a processing block.

        :param txn: config DB transaction
        :param pb_id: processing block ID

        """
        LOG.info("Making deployment for processing block %s", pb_id)

        # Read the processing block
        pb = txn.get_processing_block(pb_id)

        # Get workflow type, id and version
        wf_type = pb.workflow["type"]
        wf_id = pb.workflow["id"]
        wf_version = pb.workflow["version"]
        wf_description = "{} workflow {}, version {}".format(wf_type, wf_id, wf_version)

        # Get the container image for the workflow
        workflow = txn.get_workflow(wf_type, wf_id, wf_version)
        if workflow is None:
            wf_image = None
        else:
            wf_image = workflow.get("image")

        if wf_image is not None:
            # Make the deployment
            LOG.info("Deploying %s", wf_description)
            deploy_id = "proc-{}-workflow".format(pb_id)
            values = {}
            values["env"] = {}
            for v in ["SDP_CONFIG_HOST", "SDP_HELM_NAMESPACE"]:
                values["env"][v] = os.environ[v]
            values["wf_image"] = wf_image
            values["pb_id"] = pb_id
            chart = {"chart": "workflow", "values": values}
            deploy = ska_sdp_config.Deployment(deploy_id, "helm", chart)
            txn.create_deployment(deploy)
            # Set status to STARTING, and resources_available to False
            state = {"status": "STARTING", "resources_available": False}
        else:
            # Invalid workflow, so set status to FAILED
            state = {"status": "FAILED", "reason": "No image for " + wf_description}

        # Create the processing block state.
        txn.create_processing_block_state(pb_id, state)
示例#5
0
def buffer_create(config, name, size=None):
    """Create buffer reservation.

    :param config: configuration DB handle
    :param name: name
    :param size: size, uses default in chart if None

    """
    deploy_id = name
    values = {}
    if size is not None:
        values["size"] = size
    deploy = ska_sdp_config.Deployment(deploy_id, "helm", {
        "chart": "buffer",
        "values": values
    })
    for txn in config.txn():
        txn.create_deployment(deploy)
示例#6
0
    def _deploy(self, deploy_name, values=None):
        """
        Deploy the Helm chart.

        :param deploy_name: deployment name
        :param values: optional dict of values

        """
        LOG.info("Deploying Helm chart: %s", deploy_name)
        self._deploy_id = "proc-{}-{}".format(self._pb_id, deploy_name)
        self.update_deploy_status("RUNNING")

        chart = {
            "chart": deploy_name,  # Helm chart deploy from the repo
        }

        if values is not None:
            chart["values"] = values

        deploy = ska_sdp_config.Deployment(self._deploy_id, "helm", chart)
        for txn in self._config.txn():
            txn.create_deployment(deploy)
def main(argv):
    pb_id = argv[0]
    for txn in config.txn():
        txn.take_processing_block(pb_id, config.client_lease)
        pb = txn.get_processing_block(pb_id)

    # Show
    log.info("Claimed processing block %s", pb)

    # Deploy PSS Receive with 1 worker.
    log.info("Deploying PSS Receive...")
    deploy_id = pb.pb_id + "-pss-receive"
    deploy = ska_sdp_config.Deployment(
        deploy_id,
        "helm",
        {
            'chart': 'pss-receive',  # Helm chart deploy/charts/pss-receive
        })
    for txn in config.txn():
        txn.create_deployment(deploy)
    try:

        # Just idle until processing block or disappears
        log.info("Done, now idling...")
        for txn in config.txn():
            if not txn.is_processing_block_owner(pb.pb_id):
                break
            txn.loop(True)

    finally:

        # Clean up pss receive deployment.
        for txn in config.txn():
            txn.delete_deployment(deploy)

        config.close()
示例#8
0
def main():
    """Main loop."""

    # Get environment variables to pass to workflow containers.
    values_env = get_environment_variables(
        ['SDP_CONFIG_HOST', 'SDP_HELM_NAMESPACE'])

    # Fetch workflow definitions.
    workflows_version, workflows_realtime, workflows_batch = \
        update_workflow_definition(WORKFLOWS_URL, WORKFLOWS_SCHEMA)
    next_workflows_refresh = time.time() + WORKFLOWS_REFRESH

    # Connect to configuration database.
    client = ska_sdp_config.Config()

    LOG.debug("Starting main loop...")
    for txn in client.txn():

        # Update workflow definitions if it is time to do so.

        if time.time() >= next_workflows_refresh:
            LOG.debug('Updating workflow definitions')
            workflows_version, workflows_realtime, workflows_batch = \
                update_workflow_definition(WORKFLOWS_URL, WORKFLOWS_SCHEMA)
            next_workflows_refresh = time.time() + WORKFLOWS_REFRESH

        # Get lists of processing blocks and deployments.

        current_pbs = txn.list_processing_blocks()
        current_deployments = txn.list_deployments()

        # Make list of current PBs with deployments, inferred from the deployment IDs.

        current_pbs_with_deployment = list(
            set(map(get_pb_id_from_deploy_id, current_deployments)))

        LOG.debug("Current PBs: {}".format(current_pbs))
        LOG.debug("Current deployments: {}".format(current_deployments))
        LOG.debug("Current PBs with deployment: {}".format(
            current_pbs_with_deployment))

        # Delete deployments not associated with processing blocks.

        for deploy_id in current_deployments:
            # Get ID of associated processing block by taking prefix of deployment ID.
            pb_id = get_pb_id_from_deploy_id(deploy_id)
            if pb_id not in current_pbs:
                LOG.info("Deleting deployment {}".format(deploy_id))
                deploy = txn.get_deployment(deploy_id)
                txn.delete_deployment(deploy)

        # Deploy workflow for processing blocks without deployments.

        for pb_id in current_pbs:
            if pb_id in current_pbs_with_deployment:
                continue
            pb = txn.get_processing_block(pb_id)
            wf_type = pb.workflow['type']
            wf_id = pb.workflow['id']
            wf_version = pb.workflow['version']
            LOG.info(
                "PB {} has no deployment (workflow type = {}, ID = {}, version = {})"
                "".format(pb_id, wf_type, wf_id, wf_version))
            if wf_type == "realtime":
                if (wf_id, wf_version) in workflows_realtime:
                    LOG.info(
                        "Deploying realtime workflow ID = {}, version = {}"
                        "".format(wf_id, wf_version))
                    wf_image = workflows_realtime[(wf_id, wf_version)]
                    deploy_id = "{}-workflow".format(pb_id)
                    # Values to pass to workflow Helm chart.
                    # Copy environment variable values and add argument values.
                    values = dict(values_env)
                    values['wf_image'] = wf_image
                    values['pb_id'] = pb_id
                    deploy = ska_sdp_config.Deployment(deploy_id, 'helm', {
                        'chart': 'workflow',
                        'values': values
                    })
                    LOG.info("Creating deployment {}".format(deploy_id))
                    txn.create_deployment(deploy)
                else:
                    # Unknown realtime workflow ID and version.
                    LOG.error("Workflow ID = {} version = {} is not supported".
                              format(wf_id, wf_version))
            elif wf_type == "batch":
                LOG.warning("Batch workflows are not supported at present")
            else:
                LOG.error("Unknown workflow type: {}".format(wf_type))

        LOG.debug("Waiting...")
        txn.loop(wait=True, timeout=next_workflows_refresh - time.time())
示例#9
0
#
# This is done by adding the request to the configuration database,
# where it will be picked up and executed by appropriate
# controllers. In the full system this will involve external checks
# for whether the workflow actually has been assigned enough resources
# to do this - and for obtaining such assignments the workflow would
# need to communicate with a scheduler process. But we are ignoring
# all of that at the moment.
log.info("Deploying Dask...")
deploy_id = pb.pb_id + "-dask"
deploy = ska_sdp_config.Deployment(
    deploy_id,
    "helm",
    {
        'chart': 'stable/dask',
        'values': {
            'jupyter.enabled': 'false',
            'worker.replicas': 2,
            # We want to access Dask in-cluster using a DNS name
            'scheduler.serviceType': 'ClusterIP'
        }
    })
for txn in config.txn():
    txn.create_deployment(deploy)
try:

    # Wait for Dask to become available. At some point there will be a
    # way to learn about availability from the configuration database
    # (clearly populated by controllers querying Helm/Kubernetes).  So
    # for the moment we'll simply query the DNS name where we know
    # that Dask must become available eventually
    log.info("Waiting for Dask...")
示例#10
0
    def _deploy(self, deploy_name, n_workers, func, f_args):
        """
        Make the deployment and execute the function.

        This is called from the thread.

        :param deploy_name: deployment name
        :param func: function to process
        :param f_args: function arguments
        :param n_workers: number of dask workers

        """

        LOG.info("Deploying Dask...")
        self._deploy_id = "proc-{}-{}".format(self._pb_id, deploy_name)
        LOG.info(self._deploy_id)

        # Set Deployment to RUNNING status in the config_db
        self.update_deploy_status("RUNNING")

        # Hack for mismatch between formats of dask/distributed package version
        # Getting image from config db through the pb type, id and version
        wf_image = None
        for txn in self._config.txn():
            pb = txn.get_processing_block(self._pb_id)
            wf_image = txn.get_workflow(pb.workflow["type"], pb.workflow["id"],
                                        pb.workflow["version"])

        values = {"worker.replicas": n_workers}
        if wf_image is not None:
            values.update(wf_image)

        deploy = ska_sdp_config.Deployment(
            self._deploy_id,
            "helm",
            {
                "chart": "dask",
                "values": values
            },
        )

        for txn in self._config.txn():
            txn.create_deployment(deploy)

        LOG.info("Waiting for Dask...")
        client = None

        for _ in range(200):
            try:
                client = distributed.Client(self._deploy_id + "-scheduler." +
                                            os.environ["SDP_HELM_NAMESPACE"] +
                                            ":8786")
            except Exception as ex:
                LOG.error(ex)
        if client is None:
            LOG.error("Could not connect to Dask!")
            sys.exit(1)
        LOG.info("Connected to Dask")

        # Computing result
        result = func(*f_args)
        compute_result = result.compute()
        LOG.info("Computed Result %s", compute_result)

        # Update Deployment Status
        self.update_deploy_status("FINISHED")
示例#11
0
def make_deployment(dpl_name, dpl_args, pb_id):
    """Make a deployment given PB parameters."""
    return ska_sdp_config.Deployment(pb_id + "-" + dpl_name, **dpl_args)
示例#12
0
def main(argv):
    pb_id = argv[0]

    # Note that this process "claims" the workfow with a lease. This
    # means that once a processing block has been claimed, this script
    # must check in with the configuration database every ~10 seconds
    # or will be declared dead (and presumably restarted). This
    # obviously means that no serious work should actually happen here.
    for txn in config.txn():
        txn.take_processing_block(pb_id, config.client_lease)
        pb = txn.get_processing_block(pb_id)

    # Show
    log.info("Claimed processing block %s", pb)

    # Deploy Dask with 2 workers.
    # This is done by adding the request to the configuration database,
    # where it will be picked up and executed by appropriate
    # controllers. In the full system this will involve external checks
    # for whether the workflow actually has been assigned enough resources
    # to do this - and for obtaining such assignments the workflow would
    # need to communicate with a scheduler process. But we are ignoring
    # all of that at the moment.
    log.info("Deploying Dask...")
    deploy_id = pb.pb_id + "-dask"
    deploy = ska_sdp_config.Deployment(
        deploy_id,
        "helm",
        {
            'chart': 'stable/dask',
            'values': {
                'jupyter.enabled': 'false',
                'worker.replicas': 2,
                # We want to access Dask in-cluster using a DNS name
                'scheduler.serviceType': 'ClusterIP'
            }
        })
    for txn in config.txn():
        txn.create_deployment(deploy)
    try:

        # Wait for Dask to become available. At some point there will be a
        # way to learn about availability from the configuration database
        # (clearly populated by controllers querying Helm/Kubernetes).  So
        # for the moment we'll simply query the DNS name where we know
        # that Dask must become available eventually
        log.info("Waiting for Dask...")
        client = None
        for _ in range(200):
            try:
                client = distributed.Client(deploy_id + '-scheduler.' +
                                            os.environ['SDP_HELM_NAMESPACE'] +
                                            ':8786')
            except Exception as e:
                print(e)
        if client is None:
            log.error("Could not connect to Dask!")
            exit(1)
        log.info("Connected to Dask")

        # Now we can use Dask to do some calculations. Let's use a silly
        # example from the documentation.
        def inc(x):
            return x + 1

        L = client.map(inc, range(1000))
        log.info("Dask results: {}".format(client.gather(L)))

        # Just idle until processing block or we lose ownership
        log.info("Done, now idling...")
        for txn in config.txn():
            if not txn.is_processing_block_owner(pb.pb_id):
                break
            txn.loop(True)

    finally:

        # Clean up Dask deployment. This should also become semi-optional
        # eventually, as clearly the processing controller should learn to
        # free all deploymts associated with a workflow if it terminates
        # for whatever reason.
        for txn in config.txn():
            txn.delete_deployment(deploy)

        config.close()
示例#13
0
log.info("Waiting for processing block...")
for txn in config.txn():
    pb = txn.take_processing_block_by_workflow(
        workflow, config.client_lease)
    if pb is not None:
        continue
    txn.loop(wait=True)

# Show
log.info("Claimed processing block %s", pb)

# Deploy Vis Receive with 1 worker.
log.info("Deploying Vis Receive...")
deploy_id = pb.pb_id + "-vis-receive"
deploy = ska_sdp_config.Deployment(
    deploy_id, "helm", {
        'chart': 'vis-receive', # Helm chart deploy/charts/vis-receive
    })
for txn in config.txn():
    txn.create_deployment(deploy)
try:

    # Just idle until processing block or we lose ownership
    log.info("Done, now idling...")
    for txn in config.txn():
        if not txn.is_processing_block_owner(pb.pb_id):
            break
        txn.loop(True)

finally:

    # Clean up vis receive deployment.