示例#1
0
def run_pipeline_func(client: kfp.Client,
                      pipeline_name: str,
                      pipeline_id: str,
                      pipeline_paramters_path: dict,
                      recurring_flag: bool = False,
                      cron_exp: str = ''):
    pipeline_params = read_pipeline_params(
        pipeline_paramters_path=pipeline_paramters_path)
    pipeline_params = pipeline_params if pipeline_params is not None else {}

    experiment_id = None
    experiment_name = "{}-{}".format(pipeline_name,
                                     os.environ["INPUT_EXPERIMENT_NAME"])
    try:
        experiment_id = client.get_experiment(
            experiment_name=experiment_name).to_dict()["id"]
    except ValueError:
        experiment_id = client.create_experiment(
            name=experiment_name).to_dict()["id"]

    namespace = os.getenv("INPUT_PIPELINE_NAMESPACE") if not str.isspace(
        os.getenv("INPUT_PIPELINE_NAMESPACE")) else None

    job_name = 'Run {} on {}'.format(
        pipeline_name,
        datetime.now().strftime("%Y-%m-%d %H:%M:%S"))

    logging.info(f"experiment_id: {experiment_id}, \
                 job_name: {job_name}, \
                 pipeline_params: {pipeline_params}, \
                 pipeline_id: {pipeline_id}, \
                 namespace: {namespace}, \
                 cron_exp: {cron_exp}")

    if recurring_flag == "true":
        client.create_recurring_run(experiment_id=experiment_id,
                                    job_name=job_name,
                                    params=pipeline_params,
                                    pipeline_id=pipeline_id,
                                    cron_expression=cron_exp)
        logging.info(
            "Successfully started the recurring pipeline, head over to kubeflow to check it out"
        )

    client.run_pipeline(experiment_id=experiment_id,
                        job_name=job_name,
                        params=pipeline_params,
                        pipeline_id=pipeline_id)
    logging.info(
        "Successfully started the pipeline, head over to kubeflow to check it out"
    )
示例#2
0
class KubeflowClient(object):

    log = logging.getLogger(__name__)

    def __init__(self, config, project_name, context):
        token = AuthHandler().obtain_id_token()
        self.host = config.host
        self.client = Client(self.host, existing_token=token)
        self.project_name = project_name
        self.pipeline_description = config.run_config.description
        self.generator = PipelineGenerator(config, project_name, context)

    def list_pipelines(self):
        pipelines = self.client.list_pipelines(page_size=30).pipelines
        return tabulate(map(lambda x: [x.name, x.id], pipelines),
                        headers=["Name", "ID"])

    def run_once(
        self,
        pipeline,
        image,
        experiment_name,
        run_name,
        wait,
        image_pull_policy="IfNotPresent",
    ) -> None:
        run = self.client.create_run_from_pipeline_func(
            self.generator.generate_pipeline(pipeline, image,
                                             image_pull_policy),
            arguments={},
            experiment_name=experiment_name,
            run_name=run_name,
        )

        if wait:
            run.wait_for_run_completion(timeout=WAIT_TIMEOUT)

    def compile(self,
                pipeline,
                image,
                output,
                image_pull_policy="IfNotPresent"):
        Compiler().compile(
            self.generator.generate_pipeline(pipeline, image,
                                             image_pull_policy),
            output,
        )
        self.log.info("Generated pipeline definition was saved to %s" % output)

    def upload(self, pipeline, image, image_pull_policy="IfNotPresent"):
        pipeline = self.generator.generate_pipeline(pipeline, image,
                                                    image_pull_policy)

        if self._pipeline_exists(self.project_name):
            pipeline_id = self._get_pipeline_id(self.project_name)
            version_id = self._upload_pipeline_version(pipeline, pipeline_id)
            self.log.info("New version of pipeline created: %s", version_id)
        else:
            (pipeline_id, version_id) = self._upload_pipeline(pipeline)
            self.log.info("Pipeline created")

        self.log.info(
            f"Pipeline link: {self.host}/#/pipelines/details/%s/version/%s",
            pipeline_id,
            version_id,
        )

    def _pipeline_exists(self, pipeline_name):
        return self._get_pipeline_id(pipeline_name) is not None

    def _get_pipeline_id(self, pipeline_name):
        pipelines = self.client.pipelines.list_pipelines(filter=json.dumps({
            "predicates": [{
                "key": "name",
                "op": 1,
                "string_value": pipeline_name,
            }]
        })).pipelines

        if pipelines:
            return pipelines[0].id

    def _upload_pipeline_version(self, pipeline_func, pipeline_id):
        version_name = f"{clean_name(self.project_name)}-{uuid.uuid4()}"[:100]
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            return self.client.pipeline_uploads.upload_pipeline_version(
                f.name,
                name=version_name,
                pipelineid=pipeline_id,
                _request_timeout=10000,
            ).id

    def _upload_pipeline(self, pipeline_func):
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            pipeline = self.client.pipeline_uploads.upload_pipeline(
                f.name,
                name=self.project_name,
                description=self.pipeline_description,
                _request_timeout=10000,
            )
            return (pipeline.id, pipeline.default_version.id)

    def _ensure_experiment_exists(self, experiment_name):
        try:
            experiment = self.client.get_experiment(
                experiment_name=experiment_name)
            self.log.info(f"Existing experiment found: {experiment.id}")
        except ValueError as e:
            if not str(e).startswith("No experiment is found"):
                raise

            experiment = self.client.create_experiment(experiment_name)
            self.log.info(f"New experiment created: {experiment.id}")

        return experiment.id

    def schedule(self, experiment_name, cron_expression):
        experiment_id = self._ensure_experiment_exists(experiment_name)
        pipeline_id = self._get_pipeline_id(self.project_name)
        self._disable_runs(experiment_id, pipeline_id)
        self.client.create_recurring_run(
            experiment_id,
            f"{self.project_name} on {cron_expression}",
            cron_expression=cron_expression,
            pipeline_id=pipeline_id,
        )
        self.log.info("Pipeline scheduled to %s", cron_expression)

    def _disable_runs(self, experiment_id, pipeline_id):
        runs = self.client.list_recurring_runs(experiment_id=experiment_id)
        if runs.jobs is not None:
            my_runs = [
                job for job in runs.jobs
                if job.pipeline_spec.pipeline_id == pipeline_id
            ]
            for job in my_runs:
                self.client.jobs.delete_job(job.id)
                self.log.info(f"Previous schedule deleted {job.id}")
示例#3
0
class KubeflowClient(object):

    log = logging.getLogger(__name__)

    def __init__(self, config, project_name, context):
        token = self.obtain_id_token()
        self.host = config.host
        self.client = Client(self.host, existing_token=token)
        self.project_name = project_name
        self.context = context
        dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
        self.volume_meta = config.run_config.volume

    def list_pipelines(self):
        pipelines = self.client.list_pipelines(page_size=30).pipelines
        return tabulate(map(lambda x: [x.name, x.id], pipelines),
                        headers=["Name", "ID"])

    def run_once(
        self,
        pipeline,
        image,
        experiment_name,
        run_name,
        wait,
        image_pull_policy="IfNotPresent",
    ) -> None:
        run = self.client.create_run_from_pipeline_func(
            self.generate_pipeline(pipeline, image, image_pull_policy),
            arguments={},
            experiment_name=experiment_name,
            run_name=run_name,
        )

        if wait:
            run.wait_for_run_completion(timeout=WAIT_TIMEOUT)

    def obtain_id_token(self):
        from google.auth.transport.requests import Request
        from google.oauth2 import id_token
        from google.auth.exceptions import DefaultCredentialsError

        client_id = os.environ.get(IAP_CLIENT_ID, None)

        jwt_token = None

        if not client_id:
            self.log.info(
                "No IAP_CLIENT_ID provided, skipping custom IAP authentication"
            )
            return jwt_token

        try:
            self.log.debug("Obtaining JWT token for %s." + client_id)
            jwt_token = id_token.fetch_id_token(Request(), client_id)
            self.log.info("Obtained JWT token for MLFLOW connectivity.")
        except DefaultCredentialsError as ex:
            self.log.warning(
                str(ex) +
                (" Note that this authentication method does not work with default"
                 " credentials obtained via 'gcloud auth application-default login'"
                 " command. Refer to documentation on how to configure service account"
                 " locally"
                 " (https://cloud.google.com/docs/authentication/production#manually)"
                 ))
        except Exception as e:
            self.log.error("Failed to obtain IAP access token. " + str(e))
        finally:
            return jwt_token

    def generate_pipeline(self, pipeline, image, image_pull_policy):
        @dsl.pipeline(
            name=self.project_name,
            description="Kubeflow pipeline for Kedro project",
        )
        def convert_kedro_pipeline_to_kfp() -> None:
            """Convert from a Kedro pipeline into a kfp container graph."""

            node_volumes = (_setup_volumes()
                            if self.volume_meta is not None else {})
            node_dependencies = self.context.pipelines.get(
                pipeline).node_dependencies
            kfp_ops = _build_kfp_ops(node_dependencies, node_volumes)
            for node, dependencies in node_dependencies.items():
                for dependency in dependencies:
                    kfp_ops[node.name].after(kfp_ops[dependency.name])

        def _setup_volumes():
            vop = dsl.VolumeOp(
                name="data-volume-create",
                resource_name="data-volume",
                size=self.volume_meta.size,
                modes=self.volume_meta.access_modes,
                storage_class=self.volume_meta.storageclass,
            )
            if self.volume_meta.skip_init:
                return {"/home/kedro/data": vop.volume}
            else:
                volume_init = dsl.ContainerOp(
                    name="data-volume-init",
                    image=image,
                    command=["sh", "-c"],
                    arguments=[
                        " ".join([
                            "cp",
                            "--verbose",
                            "-r",
                            "/home/kedro/data/*",
                            "/home/kedro/datavolume",
                        ])
                    ],
                    pvolumes={"/home/kedro/datavolume": vop.volume},
                )
                volume_init.container.set_image_pull_policy(image_pull_policy)
                return {"/home/kedro/data": volume_init.pvolume}

        def _build_kfp_ops(node_dependencies: Dict[Node, Set[Node]],
                           node_volumes: Dict) -> Dict[str, dsl.ContainerOp]:
            """Build kfp container graph from Kedro node dependencies. """
            kfp_ops = {}

            env = [
                V1EnvVar(name=IAP_CLIENT_ID,
                         value=os.environ.get(IAP_CLIENT_ID, ""))
            ]

            if is_mlflow_enabled():
                kfp_ops["mlflow-start-run"] = dsl.ContainerOp(
                    name="mlflow-start-run",
                    image=image,
                    command=["kedro"],
                    arguments=[
                        "kubeflow",
                        "mlflow-start",
                        dsl.RUN_ID_PLACEHOLDER,
                    ],
                    file_outputs={"mlflow_run_id": "/tmp/mlflow_run_id"},
                )
                kfp_ops["mlflow-start-run"].container.set_image_pull_policy(
                    image_pull_policy)
                env.append(
                    V1EnvVar(
                        name="MLFLOW_RUN_ID",
                        value=kfp_ops["mlflow-start-run"].output,
                    ))

            for node in node_dependencies:
                name = _clean_name(node.name)
                kfp_ops[node.name] = dsl.ContainerOp(
                    name=name,
                    image=image,
                    command=["kedro"],
                    arguments=["run", "--node", node.name],
                    pvolumes=node_volumes,
                    container_kwargs={"env": env},
                )
                kfp_ops[node.name].container.set_image_pull_policy(
                    image_pull_policy)

            return kfp_ops

        return convert_kedro_pipeline_to_kfp

    def compile(self,
                pipeline,
                image,
                output,
                image_pull_policy="IfNotPresent"):
        Compiler().compile(
            self.generate_pipeline(pipeline, image, image_pull_policy), output)
        self.log.info("Generated pipeline definition was saved to %s" % output)

    def upload(self, pipeline, image, image_pull_policy="IfNotPresent"):
        pipeline = self.generate_pipeline(pipeline, image, image_pull_policy)

        if self._pipeline_exists(self.project_name):
            pipeline_id = self._get_pipeline_id(self.project_name)
            version_id = self._upload_pipeline_version(pipeline, pipeline_id,
                                                       self.project_name)
            self.log.info("New version of pipeline created: %s", version_id)
        else:
            (pipeline_id,
             version_id) = self._upload_pipeline(pipeline, self.project_name)
            self.log.info("Pipeline created")

        self.log.info(
            f"Pipeline link: {self.host}/#/pipelines/details/%s/version/%s",
            pipeline_id,
            version_id,
        )

    def _pipeline_exists(self, pipeline_name):
        return self._get_pipeline_id(pipeline_name) is not None

    def _get_pipeline_id(self, pipeline_name):
        pipelines = self.client.pipelines.list_pipelines(filter=json.dumps({
            "predicates": [{
                "key": "name",
                "op": 1,
                "string_value": pipeline_name,
            }]
        })).pipelines

        if pipelines:
            return pipelines[0].id

    def _upload_pipeline_version(self, pipeline_func, pipeline_id,
                                 pipeline_name):
        version_name = f"{_clean_name(pipeline_name)}-{uuid.uuid4()}"[:100]
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            return self.client.pipeline_uploads.upload_pipeline_version(
                f.name, name=version_name, pipelineid=pipeline_id).id

    def _upload_pipeline(self, pipeline_func, pipeline_name):
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            pipeline = self.client.pipeline_uploads.upload_pipeline(
                f.name, name=pipeline_name)
            return (pipeline.id, pipeline.default_version.id)

    def _ensure_experiment_exists(self, experiment_name):
        try:
            experiment = self.client.get_experiment(
                experiment_name=experiment_name)
            self.log.info(f"Existing experiment found: {experiment.id}")
        except ValueError as e:
            if not str(e).startswith("No experiment is found"):
                raise

            experiment = self.client.create_experiment(experiment_name)
            self.log.info(f"New experiment created: {experiment.id}")

        return experiment.id

    def schedule(self, experiment_name, cron_expression):
        experiment_id = self._ensure_experiment_exists(experiment_name)
        pipeline_id = self._get_pipeline_id(self.project_name)
        self._disable_runs(experiment_id, pipeline_id)
        self.client.create_recurring_run(
            experiment_id,
            f"{self.project_name} on {cron_expression}",
            cron_expression=cron_expression,
            pipeline_id=pipeline_id,
        )
        self.log.info("Pipeline scheduled to %s", cron_expression)

    def _disable_runs(self, experiment_id, pipeline_id):
        runs = self.client.list_recurring_runs(experiment_id=experiment_id)
        if runs.jobs is not None:
            my_runs = [
                job for job in runs.jobs
                if job.pipeline_spec.pipeline_id == pipeline_id
            ]
            for job in my_runs:
                self.client.jobs.delete_job(job.id)
                self.log.info(f"Previous schedule deleted {job.id}")