def start_non_interactive_pipeline_run( self, job_uuid, project_uuid, pipeline_definition: PipelineDefinition, run_config: Dict[str, Union[str, Dict[str, str]]], ) -> str: """Starts a non-interactive pipeline run. It is a pipeline run that is part of a job. Args: job_uuid: UUID of the job. project_uuid: UUID of the project. pipeline_definition: A json description of the pipeline. run_config: Configuration of the run for the compute backend. Example: { 'host_user_dir': '/home/../userdir', 'project_dir': '/home/../pipelines/uuid', 'env_uuid_docker_id_mappings': { 'b6527b0b-bfcc-4aff-91d1-37f9dfd5d8e8': 'sha256:61f82126945bb25dd85d6a5b122a1815df1c0c5f91621089cde0938be4f698d4' } } Returns: Status of the pipeline run. "FAILURE" or "SUCCESS". """ pipeline_uuid = pipeline_definition["uuid"] job_dir = os.path.join("/userdir", "jobs", project_uuid, pipeline_uuid, job_uuid) snapshot_dir = os.path.join(job_dir, "snapshot") run_dir = os.path.join(job_dir, self.request.id) # TODO: It should not copy all directories, e.g. not "data". # Copy the contents of `snapshot_dir` to the new (not yet existing # folder) `run_dir` (that will then be created by `copytree`). # copytree(snapshot_dir, run_dir) os.system('cp -R "%s" "%s"' % (snapshot_dir, run_dir)) # Update the `run_config` for the interactive pipeline run. The # pipeline run should execute on the `run_dir` as its # `project_dir`. Note that the `project_dir` inside the # `run_config` has to be the abs path w.r.t. the host because it is # used by the `docker.sock` when mounting the dir to the container # of a step. host_base_user_dir = os.path.split(run_config["host_user_dir"])[0] # To join the paths, the `run_dir` cannot start with `/userdir/...` # but should start as `userdir/...` run_config["project_dir"] = os.path.join(host_base_user_dir, run_dir[1:]) run_config["run_endpoint"] = f"jobs/{job_uuid}" run_config["pipeline_uuid"] = pipeline_uuid run_config["project_uuid"] = project_uuid # Overwrite the `pipeline.json`, that was copied from the snapshot, # with the new `pipeline.json` that contains the new parameters for # every step. pipeline_json = os.path.join(run_dir, run_config["pipeline_path"]) with open(pipeline_json, "w") as f: json.dump(pipeline_definition, f, indent=4, sort_keys=True) with launch_noninteractive_session( docker_client, self.request.id, project_uuid, run_config["pipeline_path"], run_config["project_dir"], ): status = run_pipeline( pipeline_definition, project_uuid, run_config, task_id=self.request.id ) return status
def start_non_interactive_pipeline_run( self, job_uuid, project_uuid, pipeline_definition: PipelineDefinition, run_config: Dict[str, Union[str, Dict[str, str]]], ) -> str: """Starts a non-interactive pipeline run. It is a pipeline run that is part of a job. Args: job_uuid: UUID of the job. project_uuid: UUID of the project. pipeline_definition: A json description of the pipeline. run_config: Configuration of the run for the compute backend. Example: { 'userdir_pvc': 'userdir-pvc', 'project_dir': 'pipelines/uuid', 'env_uuid_to_image': { 'b6527b0b-bfcc-4aff-91d1-37f9dfd5d8e8': 'sha256:61f82126945bb25dd85d6a5b122a1815df1c0c5f91621089cde0938be4f698d4' } } Returns: Status of the pipeline run. "FAILURE" or "SUCCESS". """ pipeline_uuid = pipeline_definition["uuid"] job_dir = os.path.join("/userdir", "jobs", project_uuid, pipeline_uuid, job_uuid) snapshot_dir = os.path.join(job_dir, "snapshot") run_dir = os.path.join(job_dir, self.request.id) # Copy the contents of `snapshot_dir` to the new (not yet existing # folder) `run_dir`. No need to use_gitignore since the snapshot # was copied with use_gitignore=True. copytree(snapshot_dir, run_dir, use_gitignore=False) # Update the `run_config` for the interactive pipeline run. The # pipeline run should execute on the `run_dir` as its # `project_dir`. Note that the `project_dir` inside the # `run_config` has to be relative to userdir_pvc as it is used # by k8s as a subpath of userdir_pvc userdir_pvc = run_config["userdir_pvc"] # For non interactive runs the session uuid is equal to the task # uuid, which is actually the pipeline run uuid. session_uuid = self.request.id run_config["session_uuid"] = session_uuid run_config["session_type"] = "noninteractive" run_config["pipeline_uuid"] = pipeline_uuid run_config["project_uuid"] = project_uuid run_config["project_dir"] = run_dir run_config["run_endpoint"] = f"jobs/{job_uuid}" # Overwrite the `pipeline.json`, that was copied from the snapshot, # with the new `pipeline.json` that contains the new parameters for # every step. pipeline_json = os.path.join(run_dir, run_config["pipeline_path"]) with open(pipeline_json, "w") as f: json.dump(pipeline_definition, f, indent=4, sort_keys=True) # Note that run_config contains user_env_variables, which is of # interest for the session_config. session_config = copy.deepcopy(run_config) session_config.pop("env_uuid_to_image") session_config.pop("run_endpoint") session_config["userdir_pvc"] = userdir_pvc session_config["services"] = pipeline_definition.get("services", {}) session_config["env_uuid_to_image"] = run_config["env_uuid_to_image"] with launch_noninteractive_session( session_uuid, session_config, lambda: AbortableAsyncResult(session_uuid).is_aborted(), ): status = run_pipeline( pipeline_definition, run_config, session_uuid, task_id=self.request.id, ) return status