Exemplo n.º 1
0
def get_env_uuids_to_docker_id_mappings(
    project_uuid: str, env_uuids: Set[str]
) -> Dict[str, str]:
    """Map each environment uuid to its current image docker id.

    Args:
        project_uuid: UUID of the project to which the environments
         belong
        env_uuids: Set of environment uuids.

    Returns:
        Dict[env_uuid] = docker_id

    """
    env_uuid_docker_id_mappings = {
        env_uuid: get_environment_image_docker_id(
            _config.ENVIRONMENT_IMAGE_NAME.format(
                project_uuid=project_uuid, environment_uuid=env_uuid
            )
        )
        for env_uuid in env_uuids
    }
    missing_images = [
        str(errors.ImageNotFound(f"{env_uuid} has no docker image"))
        for env_uuid, docker_id in env_uuid_docker_id_mappings.items()
        if docker_id is None
    ]
    if len(missing_images) > 0:
        raise errors.ImageNotFound("\n".join(missing_images))
    return env_uuid_docker_id_mappings
Exemplo n.º 2
0
    def _collateral(
        self,
        project_uuid: str,
        task_id: str,
        pipeline: Pipeline,
        run_config: Dict[str, Any],
        env_variables: Dict[str, Any],
        **kwargs,
    ):
        # Get docker ids of images to use and make it so that the images
        # will not be deleted in case they become outdated by an
        # environment rebuild.
        try:
            env_uuid_docker_id_mappings = lock_environment_images_for_run(
                task_id,
                project_uuid,
                pipeline.get_environments(),
            )
        except errors.ImageNotFound as e:
            msg = (
                "Pipeline references environments that do not exist in the"
                f" project, the following environments do not exist: [{e}].\n\n"
                "Please make sure all pipeline steps are assigned an"
                " environment that exists in the project."
            )
            raise errors.ImageNotFound(msg)

        # Create Celery object with the Flask context and construct the
        # kwargs for the job.
        celery = make_celery(current_app)
        run_config["env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings
        run_config["user_env_variables"] = env_variables
        celery_job_kwargs = {
            "pipeline_definition": pipeline.to_dict(),
            "project_uuid": project_uuid,
            "run_config": run_config,
        }

        # Start the run as a background task on Celery. Due to circular
        # imports we send the task by name instead of importing the
        # function directly.
        res = celery.send_task(
            "app.core.tasks.run_pipeline",
            kwargs=celery_job_kwargs,
            task_id=task_id,
        )

        # NOTE: this is only if a backend is configured.  The task does
        # not return anything. Therefore we can forget its result and
        # make sure that the Celery backend releases recourses (for
        # storing and transmitting results) associated to the task.
        # Uncomment the line below if applicable.
        res.forget()
Exemplo n.º 3
0
def test_docker_image_exists_with_image_not_found_error(mocker, mock_client):
    mock_images = mocker.Mock()
    mock_images.get = mocker.Mock()

    images_mock = mocker.PropertyMock(return_value=mock_images)

    mock_client.images = images_mock

    image_tag = "gcr.io/sigint/test-image-name"

    mock_client.images.get.side_effect = docker_errors.ImageNotFound(
        "pew", "pew")

    exists = docker_utils.docker_image_exists(image_tag, mock_client)

    assert not exists
Exemplo n.º 4
0
    def _collateral(
        self,
        job: Dict[str, Any],
        pipeline_run_spec: Dict[str, Any],
        tasks_to_launch: Tuple[str, Pipeline],
    ):
        # Safety check in case the job has no runs.
        if not tasks_to_launch:
            return

        # Get docker ids of images to use and make it so that the
        # images will not be deleted in case they become outdate by an
        # an environment rebuild. Compute it only once because this way
        # we are guaranteed that the mappings will be the same for all
        # runs, having a new environment build terminate while
        # submitting the different runs won't affect the job.
        try:
            env_uuid_docker_id_mappings = lock_environment_images_for_run(
                # first (task_id, pipeline) -> task id.
                tasks_to_launch[0][0],
                job["project_uuid"],
                # first (task_id, pipeline) -> pipeline.
                tasks_to_launch[0][1].get_environments(),
            )
        except errors.ImageNotFound as e:
            raise errors.ImageNotFound(
                "Pipeline was referencing environments for "
                f"which an image does not exist, {e}")

        for task_id, _ in tasks_to_launch[1:]:
            image_mappings = [
                models.PipelineRunImageMapping(
                    **{
                        "run_uuid": task_id,
                        "orchest_environment_uuid": env_uuid,
                        "docker_img_id": docker_id,
                    })
                for env_uuid, docker_id in env_uuid_docker_id_mappings.items()
            ]
            db.session.bulk_save_objects(image_mappings)
        db.session.commit()

        # Launch each task through celery.
        celery = make_celery(current_app)
        for task_id, pipeline in tasks_to_launch:
            run_config = pipeline_run_spec["run_config"]
            run_config[
                "env_uuid_docker_id_mappings"] = env_uuid_docker_id_mappings
            celery_job_kwargs = {
                "job_uuid": job["job_uuid"],
                "project_uuid": job["project_uuid"],
                "pipeline_definition": pipeline.to_dict(),
                "run_config": run_config,
            }

            # Due to circular imports we use the task name instead of
            # importing the function directly.
            task_args = {
                "name": "app.core.tasks.start_non_interactive_pipeline_run",
                "eta": job["scheduled_start"],
                "kwargs": celery_job_kwargs,
                "task_id": task_id,
            }
            res = celery.send_task(**task_args)
            # NOTE: this is only if a backend is configured. The task
            # does not return anything. Therefore we can forget its
            # result and make sure that the Celery backend releases
            # recourses (for storing and transmitting results)
            # associated to the task. Uncomment the line below if
            # applicable.
            res.forget()
Exemplo n.º 5
0
    def _transaction(
        self,
        job_uuid: str,
        cron_schedule: str,
        parameters: Dict[str, Any],
        env_variables: Dict[str, str],
        next_scheduled_time: str,
        strategy_json: Dict[str, Any],
        confirm_draft,
    ):
        job = models.Job.query.with_for_update().filter_by(uuid=job_uuid).one()

        if cron_schedule is not None:
            if job.schedule is None and job.status != "DRAFT":
                raise ValueError(
                    ("Failed update operation. Cannot set the schedule of a "
                     "job which is not a cron job already."))

            if not croniter.is_valid(cron_schedule):
                raise ValueError(
                    f"Failed update operation. Invalid cron schedule: {cron_schedule}"
                )

            # Check when is the next time the job should be scheduled
            # starting from now.
            job.schedule = cron_schedule

            job.next_scheduled_time = croniter(
                cron_schedule, datetime.now(timezone.utc)).get_next(datetime)

        if parameters is not None:
            if job.schedule is None and job.status != "DRAFT":
                raise ValueError((
                    "Failed update operation. Cannot update the parameters of "
                    "a job which is not a cron job."))
            job.parameters = parameters

        if env_variables is not None:
            if job.schedule is None and job.status != "DRAFT":
                raise ValueError((
                    "Failed update operation. Cannot update the env variables of "
                    "a job which is not a cron job."))
            job.env_variables = env_variables

        if next_scheduled_time is not None:
            if job.status != "DRAFT":
                raise ValueError(
                    ("Failed update operation. Cannot set the next scheduled "
                     "time of a job which is not a draft."))
            if job.schedule is not None:
                raise ValueError(
                    ("Failed update operation. Cannot set the next scheduled "
                     "time of a cron job."))
            job.next_scheduled_time = datetime.fromisoformat(
                next_scheduled_time)

        if strategy_json is not None:
            if job.schedule is None and job.status != "DRAFT":
                raise ValueError(
                    ("Failed update operation. Cannot set the strategy json"
                     "of a job which is not a draft nor a cron job."))
            job.strategy_json = strategy_json

        if confirm_draft:
            if job.status != "DRAFT":
                raise ValueError(
                    "Failed update operation. The job is not a draft.")

            # Make sure all environments still exist, that is, the
            # pipeline is not referring non-existing environments.
            pipeline_def = job.pipeline_definition
            environment_uuids = set([
                step["environment"] for step in pipeline_def["steps"].values()
            ])
            env_uuids_missing_image = get_env_uuids_missing_image(
                job.project_uuid, environment_uuids)
            if env_uuids_missing_image:
                env_uuids_missing_image = ", ".join(env_uuids_missing_image)
                msg = (
                    "Pipeline references environments that do not exist in the"
                    f" project. The following environments do not exist:"
                    f" [{env_uuids_missing_image}].\n\n Please make sure all"
                    " pipeline steps are assigned an environment that exists"
                    " in the project.")
                raise errors.ImageNotFound(msg)

            if job.schedule is None:
                job.status = "PENDING"

                # One time job that needs to run right now. The
                # scheduler will not pick it up because it does not have
                # a next_scheduled_time.
                if job.next_scheduled_time is None:
                    job.last_scheduled_time = datetime.now(timezone.utc)
                    RunJob(self.tpe).transaction(job.uuid)
                else:
                    job.last_scheduled_time = job.next_scheduled_time

                # One time jobs that are set to run at a given date will
                # now be picked up by the scheduler, since they are not
                # a draft anymore.

            # Cron jobs are consired STARTED the moment the scheduler
            # can decide or not about running them.
            else:
                job.last_scheduled_time = job.next_scheduled_time
                job.status = "STARTED"