def _transaction( self, project_uuid: str, run_config: Dict[str, Any], pipeline: Pipeline, ): # specify the task_id beforehand to avoid race conditions # between the task and its presence in the db task_id = str(uuid.uuid4()) # NOTE: we are setting the status of the run ourselves without # using the option of celery to get the status of tasks. This # way we do not have to configure a backend (where the default # of "rpc://" does not give the results we would want). run = { "uuid": task_id, "pipeline_uuid": pipeline.properties["uuid"], "project_uuid": project_uuid, "status": "PENDING", } db.session.add(models.InteractivePipelineRun(**run)) # need to flush because otherwise the bulk insertion of pipeline # steps will lead to foreign key errors # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats db.session.flush() # Set an initial value for the status of the pipeline steps that # will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.PipelineRunStep( **{ "run_uuid": task_id, "step_uuid": step_uuid, "status": "PENDING", } ) ) db.session.bulk_save_objects(pipeline_steps) run["pipeline_steps"] = pipeline_steps self.collateral_kwargs["project_uuid"] = project_uuid self.collateral_kwargs["task_id"] = task_id self.collateral_kwargs["pipeline"] = pipeline self.collateral_kwargs["run_config"] = run_config self.collateral_kwargs["env_variables"] = get_proj_pip_env_variables( project_uuid, pipeline.properties["uuid"] ) return run
def _transaction( self, job: Dict[str, Any], pipeline_run_spec: Dict[str, Any], pipeline_definitions: List[Dict[str, Any]], pipeline_run_ids: List[str], ): db.session.add(models.Job(**job)) # So that the job can be returned with all runs. job["pipeline_runs"] = [] # To be later used by the collateral effect function. tasks_to_launch = [] for pipeline_definition, id_ in zip(pipeline_definitions, pipeline_run_ids): # Note: the pipeline definition contains the parameters of # the specific run. pipeline_run_spec["pipeline_definition"] = pipeline_definition pipeline = construct_pipeline(**pipeline_run_spec) # Specify the task_id beforehand to avoid race conditions # between the task and its presence in the db. task_id = str(uuid.uuid4()) tasks_to_launch.append((task_id, pipeline)) non_interactive_run = { "job_uuid": job["job_uuid"], "run_uuid": task_id, "pipeline_run_id": id_, "pipeline_uuid": pipeline.properties["uuid"], "project_uuid": job["project_uuid"], "status": "PENDING", } db.session.add( models.NonInteractivePipelineRun(**non_interactive_run)) # Need to flush because otherwise the bulk insertion of # pipeline steps will lead to foreign key errors. # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats db.session.flush() # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipeline # steps that will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.PipelineRunStep( **{ "run_uuid": task_id, "step_uuid": step_uuid, "status": "PENDING", })) db.session.bulk_save_objects(pipeline_steps) non_interactive_run["pipeline_steps"] = pipeline_steps job["pipeline_runs"].append(non_interactive_run) self.collateral_kwargs["job"] = job self.collateral_kwargs["tasks_to_launch"] = tasks_to_launch self.collateral_kwargs["pipeline_run_spec"] = pipeline_run_spec return job
def _transaction(self, job_uuid: str): # with_entities is so that we do not retrieve the interactive # runs of the job, since we do not need those. job = ( models.Job.query.with_entities(models.Job) # Use with_for_update so that the job entry will be locked # until commit, so that if, for whatever reason, the same # job is launched concurrently the different launchs will # actually be serialized, i.e. one has to wait for the # commit of the other, so that the launched runs will # correctly refer to a different total_scheduled_executions # number. # https://docs.sqlalchemy.org/en/13/orm/query.html#sqlalchemy.orm.query.Query.with_for_update # https://www.postgresql.org/docs/9.0/sql-select.html#SQL-FOR-UPDATE-SHARE .with_for_update().filter_by(uuid=job_uuid).one()) # In case the job gets aborted while the scheduler attempts to # run it. if job.status == "ABORTED": self.collateral_kwargs["job"] = dict() self.collateral_kwargs["tasks_to_launch"] = [] self.collateral_kwargs["run_config"] = dict() # The status of jobs that run once is initially set to PENDING, # thus we need to update that. if job.status == "PENDING": job.status = "STARTED" # To be later used by the collateral effect function. tasks_to_launch = [] # The number of pipeline runs of a job, across all job runs. We # could use 'count' but 'max' is safer, if for any reason a # pipeline run is not there, e.g. if pipeline runs 0 and 2 are # there, but not 1, 'count' would keep returning 2, and no runs # could be launched anymore because of the (job_uuid, # pipeline_run_index) constraint. pipeline_run_index = (db.session.query( func.max(models.NonInteractivePipelineRun.pipeline_run_index)). filter_by(job_uuid=job_uuid).one())[0] if pipeline_run_index is None: pipeline_run_index = 0 else: pipeline_run_index += 1 # run_index is the index of the run within the runs of this job # scheduling/execution. for run_index, run_parameters in enumerate(job.parameters): pipeline_def = copy.deepcopy(job.pipeline_definition) # Set the pipeline parameters: pipeline_def["parameters"] = run_parameters.get( _config.PIPELINE_PARAMETERS_RESERVED_KEY, {}) # Set the steps parameters in the pipeline definition. for step_uuid, step_parameters in run_parameters.items(): # One of the entries is not actually a step_uuid. if step_uuid != _config.PIPELINE_PARAMETERS_RESERVED_KEY: pipeline_def["steps"][step_uuid][ "parameters"] = step_parameters # Instantiate a pipeline object given the specs, definition # and parameters. pipeline_run_spec = copy.deepcopy(job.pipeline_run_spec) pipeline_run_spec["pipeline_definition"] = pipeline_def pipeline = construct_pipeline(**pipeline_run_spec) # Specify the task_id beforehand to avoid race conditions # between the task and its presence in the db. task_id = str(uuid.uuid4()) tasks_to_launch.append((task_id, pipeline)) non_interactive_run = { "job_uuid": job.uuid, "uuid": task_id, "pipeline_uuid": job.pipeline_uuid, "project_uuid": job.project_uuid, "status": "PENDING", "parameters": run_parameters, "job_run_index": job.total_scheduled_executions, "job_run_pipeline_run_index": run_index, "pipeline_run_index": pipeline_run_index, "env_variables": job.env_variables, } pipeline_run_index += 1 db.session.add( models.NonInteractivePipelineRun(**non_interactive_run)) # Need to flush because otherwise the bulk insertion of # pipeline steps will lead to foreign key errors. # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats db.session.flush() # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipeline # steps that will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.PipelineRunStep( **{ "run_uuid": task_id, "step_uuid": step_uuid, "status": "PENDING", })) db.session.bulk_save_objects(pipeline_steps) job.total_scheduled_executions += 1 self.collateral_kwargs["job"] = job.as_dict() self.collateral_kwargs["tasks_to_launch"] = tasks_to_launch
def post(self): """Queues a new experiment.""" # TODO: possibly use marshal() on the post_data. Note that we # have moved over to using flask_restx # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.marshal # to make sure the default values etc. are filled in. post_data = request.get_json() # TODO: maybe we can expect a datetime (in the schema) so we # do not have to parse it here. Again note that we are now # using flask_restx # https://flask-restx.readthedocs.io/en/stable/api.html#flask_restx.fields.DateTime scheduled_start = post_data["scheduled_start"] scheduled_start = datetime.fromisoformat(scheduled_start) experiment = { "experiment_uuid": post_data["experiment_uuid"], "project_uuid": post_data["project_uuid"], "pipeline_uuid": post_data["pipeline_uuid"], "scheduled_start": scheduled_start, "total_number_of_pipeline_runs": len(post_data["pipeline_definitions"]), } db.session.add(models.Experiment(**experiment)) db.session.commit() pipeline_runs = [] pipeline_run_spec = post_data["pipeline_run_spec"] env_uuid_docker_id_mappings = None # this way we write the entire exp to db, but avoid # launching any run (celery task) if we detected a problem experiment_creation_error_messages = [] tasks_to_launch = [] # TODO: This can be made more efficient, since the pipeline # is the same for all pipeline runs. The only # difference is the parameters. So all the jobs could # be created in batch. for pipeline_definition, id_ in zip(post_data["pipeline_definitions"], post_data["pipeline_run_ids"]): pipeline_run_spec["pipeline_definition"] = pipeline_definition pipeline = construct_pipeline(**post_data["pipeline_run_spec"]) # specify the task_id beforehand to avoid race conditions # between the task and its presence in the db task_id = str(uuid.uuid4()) non_interactive_run = { "experiment_uuid": post_data["experiment_uuid"], "run_uuid": task_id, "pipeline_run_id": id_, "pipeline_uuid": pipeline.properties["uuid"], "project_uuid": post_data["project_uuid"], "status": "PENDING", } db.session.add( models.NonInteractivePipelineRun(**non_interactive_run)) # need to flush because otherwise the bulk insertion of # pipeline steps will lead to foreign key errors # https://docs.sqlalchemy.org/en/13/orm/persistence_techniques.html#bulk-operations-caveats db.session.flush() # TODO: this code is also in `namespace_runs`. Could # potentially be put in a function for modularity. # Set an initial value for the status of the pipeline # steps that will be run. step_uuids = [s.properties["uuid"] for s in pipeline.steps] pipeline_steps = [] for step_uuid in step_uuids: pipeline_steps.append( models.PipelineRunStep( **{ "run_uuid": task_id, "step_uuid": step_uuid, "status": "PENDING", })) db.session.bulk_save_objects(pipeline_steps) db.session.commit() non_interactive_run["pipeline_steps"] = pipeline_steps pipeline_runs.append(non_interactive_run) # get docker ids of images to use and make it so that the # images will not be deleted in case they become # outdated by an environment rebuild # compute it only once because this way we are guaranteed # that the mappings will be the same for all runs, having # a new environment build terminate while submitting the # different runs won't affect the experiment if env_uuid_docker_id_mappings is None: try: env_uuid_docker_id_mappings = lock_environment_images_for_run( task_id, post_data["project_uuid"], pipeline.get_environments(), ) except errors.ImageNotFound as e: experiment_creation_error_messages.append( f"Pipeline was referencing environments for " f"which an image does not exist, {e}") else: image_mappings = [ models.PipelineRunImageMapping( **{ "run_uuid": task_id, "orchest_environment_uuid": env_uuid, "docker_img_id": docker_id, }) for env_uuid, docker_id in env_uuid_docker_id_mappings.items() ] db.session.bulk_save_objects(image_mappings) db.session.commit() if len(experiment_creation_error_messages) == 0: # prepare the args for the task run_config = pipeline_run_spec["run_config"] run_config[ "env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings celery_job_kwargs = { "experiment_uuid": post_data["experiment_uuid"], "project_uuid": post_data["project_uuid"], "pipeline_definition": pipeline.to_dict(), "run_config": run_config, } # Due to circular imports we use the task name instead # of importing the function directly. tasks_to_launch.append({ "name": "app.core.tasks.start_non_interactive_pipeline_run", "eta": scheduled_start, "kwargs": celery_job_kwargs, "task_id": task_id, }) experiment["pipeline_runs"] = pipeline_runs if len(experiment_creation_error_messages) == 0: # Create Celery object with the Flask context celery = make_celery(current_app) for task in tasks_to_launch: res = celery.send_task(**task) # NOTE: this is only if a backend is configured. # The task does not return anything. Therefore we can # forget its result and make sure that the Celery # backend releases recourses (for storing and # transmitting results) associated to the task. # Uncomment the line below if applicable. res.forget() return experiment, 201 else: current_app.logger.error( "\n".join(experiment_creation_error_messages)) # simple way to update both in memory objects # and the db while avoiding multiple update statements # (1 for each object) for pipeline_run in experiment["pipeline_runs"]: pipeline_run.status = "SUCCESS" for step in pipeline_run["pipeline_steps"]: step.status = "FAILURE" models.PipelineRunStep.query.filter_by( run_uuid=pipeline_run["run_uuid"]).update( {"status": "FAILURE"}) models.NonInteractivePipelineRun.query.filter_by( experiment_uuid=post_data["experiment_uuid"]).update( {"status": "SUCCESS"}) db.session.commit() return { "message": ("Failed to create experiment because not all referenced" "environments are available.") }, 500