Пример #1
0
def scale_by_runs(max_tasks_memory):
    """
    @param max_tasks_memory: The amount of memory in MB to allow for all of the tasks.
    @type max_tasks_memory: int
    """
    from audit_logging.utils import get_user_details

    client, app_name = get_scale_client()

    celery_task_details = get_celery_task_details(client, app_name)
    running_tasks_memory = int(celery_task_details["memory"])
    celery_tasks = get_celery_tasks_scale_by_run()

    # Check if we need to scale for default system tasks.
    scale_default_tasks(client, app_name, celery_tasks)

    # Get run in progress
    runs = ExportRun.objects.filter(status=TaskState.SUBMITTED.value,
                                    deleted=False)
    total_tasks = 0
    running_tasks = client.get_running_tasks(app_name)
    logger.info(f"Running tasks: {running_tasks}")

    if running_tasks:
        total_tasks = running_tasks["pagination"].get("total_results", 0)
        # Get a list of running task names excluding the default celery tasks.
        running_task_names = [
            resource.get("name") for resource in running_tasks.get("resources")
            if resource.get("name") != "celery"
        ]
        finished_runs = ExportRun.objects.filter(
            Q(uid__in=running_task_names)
            & (Q(status__in=[
                state.value for state in TaskState.get_finished_states()
            ]) | Q(deleted=True)))

        finished_run_uids = []
        for finished_run in finished_runs:
            logger.info(
                f"Stopping {finished_run.uid} because it is in a finished state ({finished_run.status}) "
                f"or was deleted ({finished_run.deleted}).")
            finished_run_uids.append(str(finished_run.uid))
        kill_workers(task_names=finished_run_uids, client=client)

    for run in runs:
        celery_run_task = copy.deepcopy(celery_tasks["run"])

        logger.info(
            f"Checking to see if submitted run {run.uid} needs a new worker.")
        max_runs = int(os.getenv("RUNS_CONCURRENCY", 3))

        if max_runs and total_tasks >= max_runs:
            logger.info(
                f"total_tasks ({total_tasks}) >= max_runs ({max_runs})")
            break
        if running_tasks_memory + celery_run_task["memory"] >= max_tasks_memory:
            logger.info("Not enough available memory to scale another run.")
            break
        task_name = run.uid

        running_tasks_by_queue = client.get_running_tasks(app_name, task_name)
        running_tasks_by_queue_count = running_tasks_by_queue[
            "pagination"].get("total_results", 0)

        logger.info(
            f"Currently {running_tasks_by_queue_count} tasks running for {task_name}."
        )
        if running_tasks_by_queue_count:
            logger.info(f"Already a consumer for {task_name}")
            continue
        user_session = UserSession.objects.filter(user=run.user).last()
        session_token = None
        if user_session:
            session = Session.objects.get(session_key=user_session.session_id)
            session_token = session.get_decoded().get("session_token")

        user_details = get_user_details(run.user)
        pick_up_run_task.s(run_uid=str(run.uid),
                           session_token=session_token,
                           user_details=user_details).apply_async(
                               queue=str(task_name),
                               routing_key=str(task_name))
        celery_run_task["command"] = celery_run_task["command"].format(
            celery_group_name=task_name)
        run_task_command(client, app_name, str(task_name), celery_run_task)
        # Keep track of new resources being used.
        total_tasks += 1
        running_tasks_memory += celery_run_task["memory"]
Пример #2
0
    def build_tasks(
        self,
        primary_export_task,
        provider_task_uid=None,
        user=None,
        run=None,
        stage_dir=None,
        worker=None,
        service_type=None,
        session_token=None,
        *args,
        **kwargs,
    ):
        """
        Run OSM export tasks. Specifically create a task chain to be picked up by a celery worker later.

        :param primary_export_task: The task which converts the source data to the interchange format (i.e. OSM-> gpkg)
        :param provider_task_uid: A reference uid for the DataProviderTask model.
        :param user: The user executing the task.
        :param run: The ExportRun which this task will belong to.
        :param stage_dir: The directory where to store the files while they are being created.
        :param worker: The celery worker assigned this task.
        :param osm_gpkg: A OSM geopackage with the planet osm schema.
        :return: An DataProviderTaskRecord uid and the Celery Task Chain or None, False.
        """

        logger.debug("Running Job with id: {0}".format(provider_task_uid))
        # pull the provider_task from the database
        data_provider_task = (
            DataProviderTask.objects.select_related("provider")
            .prefetch_related("formats__supported_projections")
            .get(uid=provider_task_uid)
        )
        data_provider: DataProvider = data_provider_task.provider
        job = run.job

        # This is just to make it easier to trace when user_details haven't been sent
        user_details = kwargs.get("user_details")
        if user_details is None:
            from audit_logging.utils import get_user_details

            user_details = get_user_details(user)

        job_name = normalize_name(job.name)
        # get the formats to export
        formats: List[ExportFormat] = list(data_provider_task.formats.all())

        data_provider_task_record: DataProviderTaskRecord
        created: bool
        data_provider_task_record, created = DataProviderTaskRecord.objects.get_or_create(
            run=run,
            name=data_provider.name,
            provider=data_provider,
            status=TaskState.PENDING.value,
            display=True,
        )

        projections = [projection.srid for projection in run.job.projections.all()]

        """
        Create a celery chain which gets the data & runs export formats
        """

        queue_group = get_celery_queue_group(run_uid=run.uid, worker=worker)

        # Record estimates for size and time
        get_estimates_task.apply_async(
            queue=queue_group,
            routing_key=queue_group,
            kwargs={
                "run_uid": run.uid,
                "data_provider_task_uid": data_provider_task.uid,
                "data_provider_task_record_uid": data_provider_task_record.uid,
            },
        )

        skip_primary_export_task = False
        if set(item.name.lower() for item in formats).issubset(
            set(item.name.lower() for item in get_proxy_formats(data_provider))
        ):
            skip_primary_export_task = True

        export_tasks = {}  # {export_format : (etr_uid, export_task)}
        for export_format in formats:
            logger.info(f"Setting up task for format: {export_format.name} with {export_format.options}")
            if is_supported_proxy_format(export_format, data_provider):
                export_task = create_format_task("ogcapi-process")
            else:
                export_task = create_format_task(export_format.slug)

            default_projection = get_default_projection(export_format.get_supported_projection_list(), projections)
            task_name = export_format.name
            if default_projection:
                task_name = f"{task_name} - EPSG:{default_projection}"
            export_task_record = create_export_task_record(
                task_name=task_name,
                export_provider_task=data_provider_task_record,
                worker=worker,
                display=getattr(export_task, "display", False),
            )
            export_tasks[export_format] = (export_task_record, export_task)

        bbox = run.job.extents

        """
        Create a celery chain which gets the data & runs export formats
        """
        if export_tasks:
            subtasks = list()
            if data_provider.preview_url:
                subtasks.append(
                    create_datapack_preview.s(
                        run_uid=run.uid,
                        stage_dir=stage_dir,
                        task_uid=data_provider_task_record.uid,
                        user_details=user_details,
                    ).set(queue=queue_group, routing_key=queue_group)
                )

            for current_format, (export_task_record, export_task) in export_tasks.items():
                supported_projections = current_format.get_supported_projection_list()
                default_projection = get_default_projection(supported_projections, selected_projections=projections)

                subtasks.append(
                    export_task.s(
                        run_uid=run.uid,
                        stage_dir=stage_dir,
                        job_name=job_name,
                        task_uid=export_task_record.uid,
                        user_details=user_details,
                        locking_task_key=export_task_record.uid,
                        config=data_provider.config,
                        service_url=data_provider.url,
                        export_format_slug=current_format.slug,
                        bbox=bbox,
                        session_token=session_token,
                        provider_slug=data_provider.slug,
                        export_provider_task_record_uid=data_provider_task_record.uid,
                        worker=worker,
                        selection=job.the_geom.geojson,
                        layer=data_provider.layer,
                        service_type=service_type,
                    ).set(queue=queue_group, routing_key=queue_group)
                )

                for projection in list(set(supported_projections) & set(projections)):

                    # This task was already added as the initial format conversion.
                    if projection == default_projection:
                        continue

                    task_name = f"{export_task.name} - EPSG:{projection}"
                    projection_task = create_export_task_record(
                        task_name=task_name,
                        export_provider_task=data_provider_task_record,
                        worker=worker,
                        display=getattr(export_task, "display", True),
                    )
                    subtasks.append(
                        reprojection_task.s(
                            run_uid=run.uid,
                            stage_dir=stage_dir,
                            job_name=job_name,
                            task_uid=projection_task.uid,
                            user_details=user_details,
                            locking_task_key=data_provider_task_record.uid,
                            projection=projection,
                            config=data_provider.config,
                        ).set(queue=queue_group, routing_key=queue_group)
                    )

            format_tasks = chain(subtasks)
        else:
            format_tasks = None

        primary_export_task_record = create_export_task_record(
            task_name=primary_export_task.name,
            export_provider_task=data_provider_task_record,
            worker=worker,
            display=getattr(primary_export_task, "display", False),
        )

        if "osm" in primary_export_task.name.lower():
            queue_routing_key_name = "{}.large".format(queue_group)
        else:
            queue_routing_key_name = queue_group

        # Set custom zoom levels if available, otherwise use the provider defaults.

        min_zoom = data_provider_task.min_zoom if data_provider_task.min_zoom else data_provider.level_from
        max_zoom = data_provider_task.max_zoom if data_provider_task.max_zoom else data_provider.level_to

        primary_export_task_signature = primary_export_task.s(
            name=data_provider.slug,
            run_uid=run.uid,
            provider_slug=data_provider.slug,
            overpass_url=data_provider.url,
            stage_dir=stage_dir,
            export_provider_task_record_uid=data_provider_task_record.uid,
            worker=worker,
            job_name=job_name,
            bbox=bbox,
            selection=job.the_geom.geojson,
            user_details=user_details,
            task_uid=primary_export_task_record.uid,
            layer=data_provider.layer,
            level_from=min_zoom,
            level_to=max_zoom,
            service_type=service_type,
            service_url=data_provider.url,
            config=data_provider.config,
            session_token=session_token,
        )
        primary_export_task_signature = primary_export_task_signature.set(
            queue=queue_routing_key_name, routing_key=queue_routing_key_name
        )
        if skip_primary_export_task:
            tasks = chain(format_tasks)
            primary_export_task_record.delete()
        else:
            tasks = chain(primary_export_task_signature, format_tasks)

        tasks = chain(tasks)

        return data_provider_task_record.uid, tasks
Пример #3
0
    def parse_tasks(
        self,
        worker=None,
        run_uid=None,
        user_details=None,
        run_zip_file_slug_sets=None,
        session_token=None,
        queue_group=None,
    ):
        """
        This handles all of the logic for taking the information about what individual celery tasks and groups
        them under specific providers.

        Each Provider (e.g. OSM) gets a chain:  OSM_TASK -> FORMAT_TASKS = PROVIDER_SUBTASK_CHAIN
        They need to be finalized (was the task successful?) to update the database state:
            PROVIDER_SUBTASK_CHAIN -> FINALIZE_PROVIDER_TASK

        We also have an optional chain of tasks that get processed after the providers are run:
            AD_HOC_TASK1 -> AD_HOC_TASK2 -> FINALIZE_RUN_TASK = FINALIZE_RUN_TASK_COLLECTION

        If the PROVIDER_SUBTASK_CHAIN fails it needs to be cleaned up.  The clean up task also calls the
        finalize provider task. This is because when a task fails the failed task will call an on_error (link_error)
        task and never return.
            PROVIDER_SUBTASK_CHAIN -> FINALIZE_PROVIDER_TASK
                   |
                   v
                CLEAN_UP_FAILURE_TASK -> FINALIZE_PROVIDER_TASK

        Now there needs to be someway for the finalize tasks to be called.  Since we now have several a possible
        forked path, we need each path to check the state of the providers to see if they are all finished before
        moving on.
        It would be great if celery would implicitly handled that, but it doesn't ever merge the forked paths.
        So we add a WAIT_FOR_PROVIDERS task to check state once the providers are ready they call the final tasks.

        PROVIDER_SUBTASK_CHAIN -> FINALIZE_PROVIDER_TASK -> WAIT_FOR_PROVIDERS   \
                   |                                                              ==> FINALIZE_RUN_TASK_COLLECTION
                   v                                                             /
            CLEAN_UP_FAILURE_TASK -> FINALIZE_PROVIDER_TASK -> WAIT_FOR_PROVIDERS


        :param worker: A worker node (hostname) for a celery worker, this should match the node name used when starting,
         the celery worker.
        :param run_uid: A uid to reference an ExportRun.
        :return: The AsyncResult from the celery chain of all tasks for this run.
        """

        if not run_uid:
            raise Exception("Cannot parse_tasks without a run uid.")

        run = ExportRun.objects.prefetch_related(
            "job__projections", "job__data_provider_tasks", "data_provider_task_records"
        ).get(uid=run_uid)
        job = run.job
        run_dir = get_run_staging_dir(run.uid)

        if user_details is None:
            from audit_logging.utils import get_user_details

            user_details = get_user_details(job.user)

        wait_for_providers_settings = {
            "queue": f"{queue_group}.priority",
            "routing_key": f"{queue_group}.priority",
            "priority": TaskPriority.FINALIZE_PROVIDER.value,
        }

        finalize_task_settings = {
            "interval": 4,
            "max_retries": 10,
            "queue": f"{queue_group}.priority",
            "routing_key": f"{queue_group}.priority",
            "priority": TaskPriority.FINALIZE_RUN.value,
        }

        finalized_provider_task_chain_list = []
        # Create a task record which can hold tasks for the run (datapack)
        run_task_record, created = DataProviderTaskRecord.objects.get_or_create(
            run=run, name="run", slug="run", defaults={"status": TaskState.PENDING.value, "display": False}
        )
        if created:
            logger.info("New data provider task record created")
            run_task_record.status = TaskState.PENDING.value
            run_task_record.save()

        run_zip_task_chain = get_zip_task_chain(
            data_provider_task_record_uid=run_task_record.uid,
            worker=worker,
            user_details=user_details,
        )
        for data_provider_task in job.data_provider_tasks.all():

            data_provider_task_record = run.data_provider_task_records.filter(
                provider__slug=data_provider_task.provider.slug
            ).first()
            if (
                data_provider_task_record
                and TaskState[data_provider_task_record.status] in TaskState.get_finished_states()
            ):
                continue

            if self.type_task_map.get(data_provider_task.provider.export_provider_type.type_name):
                # Each task builder has a primary task which pulls the source data, grab that task here...
                type_name = data_provider_task.provider.export_provider_type.type_name

                primary_export_task = self.type_task_map.get(type_name)

                stage_dir = get_provider_staging_dir(run_dir, data_provider_task.provider.slug)
                args = {
                    "primary_export_task": primary_export_task,
                    "user": job.user,
                    "provider_task_uid": data_provider_task.uid,
                    "stage_dir": stage_dir,
                    "run": run,
                    "service_type": data_provider_task.provider.export_provider_type.type_name,
                    "worker": worker,
                    "user_details": user_details,
                    "session_token": session_token,
                }

                (
                    provider_task_record_uid,
                    provider_subtask_chain,
                ) = TaskChainBuilder().build_tasks(**args)

                wait_for_providers_signature = wait_for_providers_task.s(
                    run_uid=run_uid,
                    locking_task_key=run_uid,
                    callback_task=create_finalize_run_task_collection(
                        run_uid=run_uid,
                        run_provider_task_record_uid=run_task_record.uid,
                        run_zip_task_chain=run_zip_task_chain,
                        run_zip_file_slug_sets=run_zip_file_slug_sets,
                        apply_args=finalize_task_settings,
                    ),
                    apply_args=finalize_task_settings,
                ).set(**wait_for_providers_settings)

                if provider_subtask_chain:
                    # The finalize_export_provider_task will check all of the export tasks
                    # for this provider and save the export provider's status.

                    selection_task = create_task(
                        data_provider_task_record_uid=provider_task_record_uid,
                        worker=worker,
                        stage_dir=stage_dir,
                        task=output_selection_geojson_task,
                        selection=job.the_geom.geojson,
                        user_details=user_details,
                    )

                    # create signature to close out the provider tasks
                    finalize_export_provider_signature = finalize_export_provider_task.s(
                        data_provider_task_uid=provider_task_record_uid,
                        status=TaskState.COMPLETED.value,
                        locking_task_key=run_uid,
                    ).set(**finalize_task_settings)

                    # add zip if required
                    # skip zip if there is only one source in the data pack (they would be redundant files).
                    if data_provider_task.provider.zip and len(job.data_provider_tasks.all()) > 1:
                        zip_export_provider_sig = get_zip_task_chain(
                            data_provider_task_record_uid=provider_task_record_uid,
                            data_provider_task_record_uids=[provider_task_record_uid],
                            worker=worker,
                            user_details=user_details,
                        )
                        provider_subtask_chain = chain(provider_subtask_chain, zip_export_provider_sig)

                    finalized_provider_task_chain_list.append(
                        chain(
                            selection_task,
                            provider_subtask_chain,
                            finalize_export_provider_signature,
                            wait_for_providers_signature,
                        )
                    )

        # we kick off all of the sub-tasks at once down here rather than one at a time in the for loop above so
        # that if an error occurs earlier on in the method, all of the tasks will fail rather than an undefined
        # number of them. this simplifies error handling, because we don't have to deduce which tasks were
        # successfully kicked off and which ones failed.
        for item in finalized_provider_task_chain_list:
            item.apply_async(**finalize_task_settings)