def find_pipeline_id(pipeline_name: str, client: kfp.Client, page_size: str = 100, page_token: str = "") -> str: """Function to find the pipeline id of a pipeline. Arguments: pipeline_name {str} -- The name of the pipeline of interest client {kfp.Client} -- The kfp client page_size {str} -- The number of pipelines to collect a each API request Keyword Arguments: page_token {str} -- The page token to use for the API request (default: {" "}) Returns: [type] -- The pipeline id. If None no match """ while True: pipelines = client.list_pipelines(page_size=page_size, page_token=page_token) for pipeline in pipelines.pipelines: if pipeline.name == pipeline_name: logging.info(f"The pipeline id is: {pipeline.id}") return pipeline.id # Start need to know where to do next itteration from page_token = pipelines.next_page_token # If no next tooken break if not page_token: logging.info( f"Could not find the pipeline, is the name: {pipeline_name} correct?" ) break
def pipeline_id(client: kfp.Client, name: str): """Gets the ID of the kubeflow pipeline with the name 'name' Args: name of the pipeline Returns: id of the pipeline """ page_token = "" while page_token is not None: p = client.list_pipelines(page_token=page_token, page_size=100) if p.pipelines is None: return "" for p in p.pipelines: if p.name == name: return p.id page_token = p.next_page_token return ""
class KubeflowClient(object): log = logging.getLogger(__name__) def __init__(self, config, project_name, context): token = AuthHandler().obtain_id_token() self.host = config.host self.client = Client(self.host, existing_token=token) self.project_name = project_name self.pipeline_description = config.run_config.description self.generator = PipelineGenerator(config, project_name, context) def list_pipelines(self): pipelines = self.client.list_pipelines(page_size=30).pipelines return tabulate(map(lambda x: [x.name, x.id], pipelines), headers=["Name", "ID"]) def run_once( self, pipeline, image, experiment_name, run_name, wait, image_pull_policy="IfNotPresent", ) -> None: run = self.client.create_run_from_pipeline_func( self.generator.generate_pipeline(pipeline, image, image_pull_policy), arguments={}, experiment_name=experiment_name, run_name=run_name, ) if wait: run.wait_for_run_completion(timeout=WAIT_TIMEOUT) def compile(self, pipeline, image, output, image_pull_policy="IfNotPresent"): Compiler().compile( self.generator.generate_pipeline(pipeline, image, image_pull_policy), output, ) self.log.info("Generated pipeline definition was saved to %s" % output) def upload(self, pipeline, image, image_pull_policy="IfNotPresent"): pipeline = self.generator.generate_pipeline(pipeline, image, image_pull_policy) if self._pipeline_exists(self.project_name): pipeline_id = self._get_pipeline_id(self.project_name) version_id = self._upload_pipeline_version(pipeline, pipeline_id) self.log.info("New version of pipeline created: %s", version_id) else: (pipeline_id, version_id) = self._upload_pipeline(pipeline) self.log.info("Pipeline created") self.log.info( f"Pipeline link: {self.host}/#/pipelines/details/%s/version/%s", pipeline_id, version_id, ) def _pipeline_exists(self, pipeline_name): return self._get_pipeline_id(pipeline_name) is not None def _get_pipeline_id(self, pipeline_name): pipelines = self.client.pipelines.list_pipelines(filter=json.dumps({ "predicates": [{ "key": "name", "op": 1, "string_value": pipeline_name, }] })).pipelines if pipelines: return pipelines[0].id def _upload_pipeline_version(self, pipeline_func, pipeline_id): version_name = f"{clean_name(self.project_name)}-{uuid.uuid4()}"[:100] with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) return self.client.pipeline_uploads.upload_pipeline_version( f.name, name=version_name, pipelineid=pipeline_id, _request_timeout=10000, ).id def _upload_pipeline(self, pipeline_func): with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) pipeline = self.client.pipeline_uploads.upload_pipeline( f.name, name=self.project_name, description=self.pipeline_description, _request_timeout=10000, ) return (pipeline.id, pipeline.default_version.id) def _ensure_experiment_exists(self, experiment_name): try: experiment = self.client.get_experiment( experiment_name=experiment_name) self.log.info(f"Existing experiment found: {experiment.id}") except ValueError as e: if not str(e).startswith("No experiment is found"): raise experiment = self.client.create_experiment(experiment_name) self.log.info(f"New experiment created: {experiment.id}") return experiment.id def schedule(self, experiment_name, cron_expression): experiment_id = self._ensure_experiment_exists(experiment_name) pipeline_id = self._get_pipeline_id(self.project_name) self._disable_runs(experiment_id, pipeline_id) self.client.create_recurring_run( experiment_id, f"{self.project_name} on {cron_expression}", cron_expression=cron_expression, pipeline_id=pipeline_id, ) self.log.info("Pipeline scheduled to %s", cron_expression) def _disable_runs(self, experiment_id, pipeline_id): runs = self.client.list_recurring_runs(experiment_id=experiment_id) if runs.jobs is not None: my_runs = [ job for job in runs.jobs if job.pipeline_spec.pipeline_id == pipeline_id ] for job in my_runs: self.client.jobs.delete_job(job.id) self.log.info(f"Previous schedule deleted {job.id}")
class KubeflowClient(object): log = logging.getLogger(__name__) def __init__(self, config, project_name, context): token = self.obtain_id_token() self.host = config.host self.client = Client(self.host, existing_token=token) self.project_name = project_name self.context = context dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True self.volume_meta = config.run_config.volume def list_pipelines(self): pipelines = self.client.list_pipelines(page_size=30).pipelines return tabulate(map(lambda x: [x.name, x.id], pipelines), headers=["Name", "ID"]) def run_once( self, pipeline, image, experiment_name, run_name, wait, image_pull_policy="IfNotPresent", ) -> None: run = self.client.create_run_from_pipeline_func( self.generate_pipeline(pipeline, image, image_pull_policy), arguments={}, experiment_name=experiment_name, run_name=run_name, ) if wait: run.wait_for_run_completion(timeout=WAIT_TIMEOUT) def obtain_id_token(self): from google.auth.transport.requests import Request from google.oauth2 import id_token from google.auth.exceptions import DefaultCredentialsError client_id = os.environ.get(IAP_CLIENT_ID, None) jwt_token = None if not client_id: self.log.info( "No IAP_CLIENT_ID provided, skipping custom IAP authentication" ) return jwt_token try: self.log.debug("Obtaining JWT token for %s." + client_id) jwt_token = id_token.fetch_id_token(Request(), client_id) self.log.info("Obtained JWT token for MLFLOW connectivity.") except DefaultCredentialsError as ex: self.log.warning( str(ex) + (" Note that this authentication method does not work with default" " credentials obtained via 'gcloud auth application-default login'" " command. Refer to documentation on how to configure service account" " locally" " (https://cloud.google.com/docs/authentication/production#manually)" )) except Exception as e: self.log.error("Failed to obtain IAP access token. " + str(e)) finally: return jwt_token def generate_pipeline(self, pipeline, image, image_pull_policy): @dsl.pipeline( name=self.project_name, description="Kubeflow pipeline for Kedro project", ) def convert_kedro_pipeline_to_kfp() -> None: """Convert from a Kedro pipeline into a kfp container graph.""" node_volumes = (_setup_volumes() if self.volume_meta is not None else {}) node_dependencies = self.context.pipelines.get( pipeline).node_dependencies kfp_ops = _build_kfp_ops(node_dependencies, node_volumes) for node, dependencies in node_dependencies.items(): for dependency in dependencies: kfp_ops[node.name].after(kfp_ops[dependency.name]) def _setup_volumes(): vop = dsl.VolumeOp( name="data-volume-create", resource_name="data-volume", size=self.volume_meta.size, modes=self.volume_meta.access_modes, storage_class=self.volume_meta.storageclass, ) if self.volume_meta.skip_init: return {"/home/kedro/data": vop.volume} else: volume_init = dsl.ContainerOp( name="data-volume-init", image=image, command=["sh", "-c"], arguments=[ " ".join([ "cp", "--verbose", "-r", "/home/kedro/data/*", "/home/kedro/datavolume", ]) ], pvolumes={"/home/kedro/datavolume": vop.volume}, ) volume_init.container.set_image_pull_policy(image_pull_policy) return {"/home/kedro/data": volume_init.pvolume} def _build_kfp_ops(node_dependencies: Dict[Node, Set[Node]], node_volumes: Dict) -> Dict[str, dsl.ContainerOp]: """Build kfp container graph from Kedro node dependencies. """ kfp_ops = {} env = [ V1EnvVar(name=IAP_CLIENT_ID, value=os.environ.get(IAP_CLIENT_ID, "")) ] if is_mlflow_enabled(): kfp_ops["mlflow-start-run"] = dsl.ContainerOp( name="mlflow-start-run", image=image, command=["kedro"], arguments=[ "kubeflow", "mlflow-start", dsl.RUN_ID_PLACEHOLDER, ], file_outputs={"mlflow_run_id": "/tmp/mlflow_run_id"}, ) kfp_ops["mlflow-start-run"].container.set_image_pull_policy( image_pull_policy) env.append( V1EnvVar( name="MLFLOW_RUN_ID", value=kfp_ops["mlflow-start-run"].output, )) for node in node_dependencies: name = _clean_name(node.name) kfp_ops[node.name] = dsl.ContainerOp( name=name, image=image, command=["kedro"], arguments=["run", "--node", node.name], pvolumes=node_volumes, container_kwargs={"env": env}, ) kfp_ops[node.name].container.set_image_pull_policy( image_pull_policy) return kfp_ops return convert_kedro_pipeline_to_kfp def compile(self, pipeline, image, output, image_pull_policy="IfNotPresent"): Compiler().compile( self.generate_pipeline(pipeline, image, image_pull_policy), output) self.log.info("Generated pipeline definition was saved to %s" % output) def upload(self, pipeline, image, image_pull_policy="IfNotPresent"): pipeline = self.generate_pipeline(pipeline, image, image_pull_policy) if self._pipeline_exists(self.project_name): pipeline_id = self._get_pipeline_id(self.project_name) version_id = self._upload_pipeline_version(pipeline, pipeline_id, self.project_name) self.log.info("New version of pipeline created: %s", version_id) else: (pipeline_id, version_id) = self._upload_pipeline(pipeline, self.project_name) self.log.info("Pipeline created") self.log.info( f"Pipeline link: {self.host}/#/pipelines/details/%s/version/%s", pipeline_id, version_id, ) def _pipeline_exists(self, pipeline_name): return self._get_pipeline_id(pipeline_name) is not None def _get_pipeline_id(self, pipeline_name): pipelines = self.client.pipelines.list_pipelines(filter=json.dumps({ "predicates": [{ "key": "name", "op": 1, "string_value": pipeline_name, }] })).pipelines if pipelines: return pipelines[0].id def _upload_pipeline_version(self, pipeline_func, pipeline_id, pipeline_name): version_name = f"{_clean_name(pipeline_name)}-{uuid.uuid4()}"[:100] with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) return self.client.pipeline_uploads.upload_pipeline_version( f.name, name=version_name, pipelineid=pipeline_id).id def _upload_pipeline(self, pipeline_func, pipeline_name): with NamedTemporaryFile(suffix=".yaml") as f: Compiler().compile(pipeline_func, f.name) pipeline = self.client.pipeline_uploads.upload_pipeline( f.name, name=pipeline_name) return (pipeline.id, pipeline.default_version.id) def _ensure_experiment_exists(self, experiment_name): try: experiment = self.client.get_experiment( experiment_name=experiment_name) self.log.info(f"Existing experiment found: {experiment.id}") except ValueError as e: if not str(e).startswith("No experiment is found"): raise experiment = self.client.create_experiment(experiment_name) self.log.info(f"New experiment created: {experiment.id}") return experiment.id def schedule(self, experiment_name, cron_expression): experiment_id = self._ensure_experiment_exists(experiment_name) pipeline_id = self._get_pipeline_id(self.project_name) self._disable_runs(experiment_id, pipeline_id) self.client.create_recurring_run( experiment_id, f"{self.project_name} on {cron_expression}", cron_expression=cron_expression, pipeline_id=pipeline_id, ) self.log.info("Pipeline scheduled to %s", cron_expression) def _disable_runs(self, experiment_id, pipeline_id): runs = self.client.list_recurring_runs(experiment_id=experiment_id) if runs.jobs is not None: my_runs = [ job for job in runs.jobs if job.pipeline_spec.pipeline_id == pipeline_id ] for job in my_runs: self.client.jobs.delete_job(job.id) self.log.info(f"Previous schedule deleted {job.id}")
class KubeflowClient: """ A wrapper of the existing Kubeflow Pipelines Client which enriches it to be able to access more of the Kubeflow Pipelines API. """ def __init__(self, host: Optional[str] = None, client_id: Optional[str] = None, namespace: Optional[str] = "kubeflow"): """ Instandiate a new KubeflowClient Args: host (str): The host we can find the Kubeflow API at (e.g. https://{APP_NAME}.endpoints.{PROJECT_ID}.cloud.goog/pipeline) client_id (str): The IAP client id we can use for authorisate (e.g. "XXXXXX-XXXXXXXXX.apps.googleusercontent.com") namespace (str): The Kuberenetes / Kubeflow namespace to deploy to (e.g. kubeflow) """ self.host = host self.client_id = client_id self.namespace = namespace logging.info(f"KubeflowClient: host: {host}, client_id: {client_id}") self.kfp_client = Client(host, client_id, namespace) self.config = self.kfp_client._load_config(self.host, self.client_id, self.namespace, None, None) # print(f"kfp auth:") # print(f"\thost: {self.host}") # print(f"\tclient_id: {self.client_id}") # print(f"\tnamespace: {self.namespace}") # print(f"\tapi_key: {self.config.api_key}") self.kfp_pipelines = self._connect_pipelines_api() self.kfp_runs = self._connect_runs_api() self.kfp_jobs = self._connect_jobs_api() def create_pipeline(self, pipeline_func, pipeline_name): """ Create a new Kubeflow Pipeline using the provided pipeline function Args: pipeline_func: The method decorated by @dsl.pipeline which defines the pipeline Returns: The Kubeflow Pipeline object created """ try: (_, pipeline_package_path) = tempfile.mkstemp(suffix=".zip") compiler.Compiler().compile(pipeline_func, pipeline_package_path) logging.info(f"Compiled piopeline to: {pipeline_package_path}") return self.kfp_client.upload_pipeline(pipeline_package_path, pipeline_name) finally: pass # os.remove(pipeline_package_path) def create_experiment(self, experiment_name): """ Create a new Kubeflow Pipelines Experiment (grouping of pipeliens / runs) Args: experiment_name (str): The name of the experiment Returns: The Kubeflow experiement object created """ return self.kfp_client.create_experiment(name=experiment_name) def list_experiments(self): """ List the Experiments in the current namespace Returns: A list of all the Experiments """ all_experiments = list() next_page_token = "" while next_page_token is not None: response = self.kfp_client.list_experiments( page_size=100, page_token=next_page_token) if response.experiments is None: break all_experiments.extend(response.experiments) next_page_token = response.next_page_token count = len(all_experiments) # print(f"list_experiments: found {count}") return all_experiments def find_job(self, job_name): """ Look up a job by its name (in the current namespace). Returns None if the job cannot be found Args: job_name (str): The name of the job to find Returns: A reference to the job if found, and None if not. """ jobs = self.list_jobs() if jobs is None: return None for j in jobs: if j.name == job_name: return j return None def list_jobs(self): """ List the Jobs in the current namespace Returns: A list of all the Jobs """ all_jobs = list() next_page_token = "" while next_page_token is not None: response = self.kfp_jobs.list_jobs(page_size=100, page_token=next_page_token) if response.jobs is None: break all_jobs.extend(response.jobs) next_page_token = response.next_page_token count = len(all_jobs) # print(f"all_jobs: found {count}") return all_jobs def delete_job(self, job): """ Delete a `Job` using its job.id Args: job (KubeflowJob): A `Job` object to delete Returns: True if the `Job` was deleted succesfully """ self.kfp_jobs.delete_job(id=job.id) return True def create_job(self, name: str, pipeline, experiment, description=None, enabled=True, max_concurrency=1, cron=None): """ Create a new Kubeflow Pipelines Job Args: name (str): The name of the `Job` pipeline (Pipeline): The `Pipeline` object to execute when the `Job` is called experiment (Experiment): The `Experiment` object to create the `Job` in. description (str): A description of what the `Job` is all about enabled (bool): Should be `Job` be enabled? max_concurrency (int): How many concurrent executions of the `Job` are allowed? cron (str): The CRON expression to use to execute the job periodicalls Returns: The Kubeflow API response object. """ key = kfp_server_api.models.ApiResourceKey( id=experiment.id, type=kfp_server_api.models.ApiResourceType.EXPERIMENT) reference = kfp_server_api.models.ApiResourceReference( key, kfp_server_api.models.ApiRelationship.OWNER) spec = kfp_server_api.models.ApiPipelineSpec(pipeline_id=pipeline.id) trigger = None if cron is not None: cron_schedule = kfp_server_api.models.api_cron_schedule.ApiCronSchedule( cron=cron) trigger = kfp_server_api.models.api_trigger.ApiTrigger( cron_schedule=cron_schedule) run_body = kfp_server_api.models.ApiJob( name=name, description=description, pipeline_spec=spec, resource_references=[reference], enabled=True, trigger=trigger, max_concurrency=str(max_concurrency), ) response = self.kfp_jobs.create_job(body=run_body) return response def list_runs(self, experiment_name): """ List the `Runs` in the specified Exper`iment Args: experiment_name (str): The name of the `Experiment` Returns: A list of all the `Runs` in the current `Experiment` """ experiment = self.get_experiment(experiment_name=experiment_name) all_runs = list() next_page_token = "" while next_page_token is not None: response = self.kfp_client.list_runs(page_size=100, page_token=next_page_token) if response.runs is None: break all_runs.extend(response.runs) next_page_token = response.next_page_token run_count = len(all_runs) # print(f"list_runs: found {run_count}") return all_runs def list_pipelines(self): """ List the `Pipelines` in the current namespace Returns: A list of all the `Pipelines` in the current `Experiment` """ all_pipelines = list() response = self.kfp_client.list_pipelines(page_size=100) next_page_token = "" while next_page_token is not None: response = self.kfp_client.list_pipelines( page_size=100, page_token=next_page_token) if response.pipelines is None: break all_pipelines.extend(response.pipelines) next_page_token = response.next_page_token pipeline_count = len(all_pipelines) # print(f"list_pipelines: found {pipeline_count}") return all_pipelines def find_experiment(self, id=None, name=None): """ Look up an `Experiment` by its name or id. Returns None if the `Experiment` cannot be found. Both `id` and `name` are optional, but atleast one must be provided. Where both a provided, the function will return with the first `Experiment` matching either id or name. Args: id (str): The `id` of the `Experiment` to find name (string): The `name` of the `Experiment` to find Returns: A reference to the `Experiment` if found, and None if not. """ experiments = self.list_experiments() if experiments is None: return None for e in experiments: if e.name == name: return e if e.id == id: return e return None def find_pipeline(self, name): """ Look up a `Pipeline` by its name (in the current namespace). Returns None if the `Pipeline` cannot be found Args: name (str): The name of the `Pipeline` to find Returns: A reference to the `Pipeline` if found, and `None` if not. """ pipelines = self.list_pipelines() if pipelines is None: return None for p in pipelines: if p.name == name: return p return None def delete_pipeline(self, pipeline): """ Delete the specified `Pipeline` Args: pipeline: The pipeline object to delete Returns: True if successfull """ # Go through all my pipelines to find the one to delete self.kfp_pipelines.delete_pipeline(pipeline.id) return True def _connect_pipelines_api(self): """ Create a new PipelineServiceApi client """ api_client = kfp_server_api.api_client.ApiClient(self.config) pipelines_api = kfp_server_api.api.pipeline_service_api.PipelineServiceApi( api_client) return pipelines_api def _connect_runs_api(self): """ Create a new PipelineServiceApi client """ api_client = kfp_server_api.api_client.ApiClient(self.config) runs_api = kfp_server_api.api.run_service_api.RunServiceApi(api_client) return runs_api def _connect_jobs_api(self): """ Create a new PipelineServiceApi client """ api_client = kfp_server_api.api_client.ApiClient(self.config) runs_api = kfp_server_api.api.job_service_api.JobServiceApi(api_client) return runs_api