def scale_by_runs(max_tasks_memory): """ @param max_tasks_memory: The amount of memory in MB to allow for all of the tasks. @type max_tasks_memory: int """ from audit_logging.utils import get_user_details client, app_name = get_scale_client() celery_task_details = get_celery_task_details(client, app_name) running_tasks_memory = int(celery_task_details["memory"]) celery_tasks = get_celery_tasks_scale_by_run() # Check if we need to scale for default system tasks. scale_default_tasks(client, app_name, celery_tasks) # Get run in progress runs = ExportRun.objects.filter(status=TaskState.SUBMITTED.value, deleted=False) total_tasks = 0 running_tasks = client.get_running_tasks(app_name) logger.info(f"Running tasks: {running_tasks}") if running_tasks: total_tasks = running_tasks["pagination"].get("total_results", 0) # Get a list of running task names excluding the default celery tasks. running_task_names = [ resource.get("name") for resource in running_tasks.get("resources") if resource.get("name") != "celery" ] finished_runs = ExportRun.objects.filter( Q(uid__in=running_task_names) & (Q(status__in=[ state.value for state in TaskState.get_finished_states() ]) | Q(deleted=True))) finished_run_uids = [] for finished_run in finished_runs: logger.info( f"Stopping {finished_run.uid} because it is in a finished state ({finished_run.status}) " f"or was deleted ({finished_run.deleted}).") finished_run_uids.append(str(finished_run.uid)) kill_workers(task_names=finished_run_uids, client=client) for run in runs: celery_run_task = copy.deepcopy(celery_tasks["run"]) logger.info( f"Checking to see if submitted run {run.uid} needs a new worker.") max_runs = int(os.getenv("RUNS_CONCURRENCY", 3)) if max_runs and total_tasks >= max_runs: logger.info( f"total_tasks ({total_tasks}) >= max_runs ({max_runs})") break if running_tasks_memory + celery_run_task["memory"] >= max_tasks_memory: logger.info("Not enough available memory to scale another run.") break task_name = run.uid running_tasks_by_queue = client.get_running_tasks(app_name, task_name) running_tasks_by_queue_count = running_tasks_by_queue[ "pagination"].get("total_results", 0) logger.info( f"Currently {running_tasks_by_queue_count} tasks running for {task_name}." ) if running_tasks_by_queue_count: logger.info(f"Already a consumer for {task_name}") continue user_session = UserSession.objects.filter(user=run.user).last() session_token = None if user_session: session = Session.objects.get(session_key=user_session.session_id) session_token = session.get_decoded().get("session_token") user_details = get_user_details(run.user) pick_up_run_task.s(run_uid=str(run.uid), session_token=session_token, user_details=user_details).apply_async( queue=str(task_name), routing_key=str(task_name)) celery_run_task["command"] = celery_run_task["command"].format( celery_group_name=task_name) run_task_command(client, app_name, str(task_name), celery_run_task) # Keep track of new resources being used. total_tasks += 1 running_tasks_memory += celery_run_task["memory"]
def build_tasks( self, primary_export_task, provider_task_uid=None, user=None, run=None, stage_dir=None, worker=None, service_type=None, session_token=None, *args, **kwargs, ): """ Run OSM export tasks. Specifically create a task chain to be picked up by a celery worker later. :param primary_export_task: The task which converts the source data to the interchange format (i.e. OSM-> gpkg) :param provider_task_uid: A reference uid for the DataProviderTask model. :param user: The user executing the task. :param run: The ExportRun which this task will belong to. :param stage_dir: The directory where to store the files while they are being created. :param worker: The celery worker assigned this task. :param osm_gpkg: A OSM geopackage with the planet osm schema. :return: An DataProviderTaskRecord uid and the Celery Task Chain or None, False. """ logger.debug("Running Job with id: {0}".format(provider_task_uid)) # pull the provider_task from the database data_provider_task = ( DataProviderTask.objects.select_related("provider") .prefetch_related("formats__supported_projections") .get(uid=provider_task_uid) ) data_provider: DataProvider = data_provider_task.provider job = run.job # This is just to make it easier to trace when user_details haven't been sent user_details = kwargs.get("user_details") if user_details is None: from audit_logging.utils import get_user_details user_details = get_user_details(user) job_name = normalize_name(job.name) # get the formats to export formats: List[ExportFormat] = list(data_provider_task.formats.all()) data_provider_task_record: DataProviderTaskRecord created: bool data_provider_task_record, created = DataProviderTaskRecord.objects.get_or_create( run=run, name=data_provider.name, provider=data_provider, status=TaskState.PENDING.value, display=True, ) projections = [projection.srid for projection in run.job.projections.all()] """ Create a celery chain which gets the data & runs export formats """ queue_group = get_celery_queue_group(run_uid=run.uid, worker=worker) # Record estimates for size and time get_estimates_task.apply_async( queue=queue_group, routing_key=queue_group, kwargs={ "run_uid": run.uid, "data_provider_task_uid": data_provider_task.uid, "data_provider_task_record_uid": data_provider_task_record.uid, }, ) skip_primary_export_task = False if set(item.name.lower() for item in formats).issubset( set(item.name.lower() for item in get_proxy_formats(data_provider)) ): skip_primary_export_task = True export_tasks = {} # {export_format : (etr_uid, export_task)} for export_format in formats: logger.info(f"Setting up task for format: {export_format.name} with {export_format.options}") if is_supported_proxy_format(export_format, data_provider): export_task = create_format_task("ogcapi-process") else: export_task = create_format_task(export_format.slug) default_projection = get_default_projection(export_format.get_supported_projection_list(), projections) task_name = export_format.name if default_projection: task_name = f"{task_name} - EPSG:{default_projection}" export_task_record = create_export_task_record( task_name=task_name, export_provider_task=data_provider_task_record, worker=worker, display=getattr(export_task, "display", False), ) export_tasks[export_format] = (export_task_record, export_task) bbox = run.job.extents """ Create a celery chain which gets the data & runs export formats """ if export_tasks: subtasks = list() if data_provider.preview_url: subtasks.append( create_datapack_preview.s( run_uid=run.uid, stage_dir=stage_dir, task_uid=data_provider_task_record.uid, user_details=user_details, ).set(queue=queue_group, routing_key=queue_group) ) for current_format, (export_task_record, export_task) in export_tasks.items(): supported_projections = current_format.get_supported_projection_list() default_projection = get_default_projection(supported_projections, selected_projections=projections) subtasks.append( export_task.s( run_uid=run.uid, stage_dir=stage_dir, job_name=job_name, task_uid=export_task_record.uid, user_details=user_details, locking_task_key=export_task_record.uid, config=data_provider.config, service_url=data_provider.url, export_format_slug=current_format.slug, bbox=bbox, session_token=session_token, provider_slug=data_provider.slug, export_provider_task_record_uid=data_provider_task_record.uid, worker=worker, selection=job.the_geom.geojson, layer=data_provider.layer, service_type=service_type, ).set(queue=queue_group, routing_key=queue_group) ) for projection in list(set(supported_projections) & set(projections)): # This task was already added as the initial format conversion. if projection == default_projection: continue task_name = f"{export_task.name} - EPSG:{projection}" projection_task = create_export_task_record( task_name=task_name, export_provider_task=data_provider_task_record, worker=worker, display=getattr(export_task, "display", True), ) subtasks.append( reprojection_task.s( run_uid=run.uid, stage_dir=stage_dir, job_name=job_name, task_uid=projection_task.uid, user_details=user_details, locking_task_key=data_provider_task_record.uid, projection=projection, config=data_provider.config, ).set(queue=queue_group, routing_key=queue_group) ) format_tasks = chain(subtasks) else: format_tasks = None primary_export_task_record = create_export_task_record( task_name=primary_export_task.name, export_provider_task=data_provider_task_record, worker=worker, display=getattr(primary_export_task, "display", False), ) if "osm" in primary_export_task.name.lower(): queue_routing_key_name = "{}.large".format(queue_group) else: queue_routing_key_name = queue_group # Set custom zoom levels if available, otherwise use the provider defaults. min_zoom = data_provider_task.min_zoom if data_provider_task.min_zoom else data_provider.level_from max_zoom = data_provider_task.max_zoom if data_provider_task.max_zoom else data_provider.level_to primary_export_task_signature = primary_export_task.s( name=data_provider.slug, run_uid=run.uid, provider_slug=data_provider.slug, overpass_url=data_provider.url, stage_dir=stage_dir, export_provider_task_record_uid=data_provider_task_record.uid, worker=worker, job_name=job_name, bbox=bbox, selection=job.the_geom.geojson, user_details=user_details, task_uid=primary_export_task_record.uid, layer=data_provider.layer, level_from=min_zoom, level_to=max_zoom, service_type=service_type, service_url=data_provider.url, config=data_provider.config, session_token=session_token, ) primary_export_task_signature = primary_export_task_signature.set( queue=queue_routing_key_name, routing_key=queue_routing_key_name ) if skip_primary_export_task: tasks = chain(format_tasks) primary_export_task_record.delete() else: tasks = chain(primary_export_task_signature, format_tasks) tasks = chain(tasks) return data_provider_task_record.uid, tasks
def parse_tasks( self, worker=None, run_uid=None, user_details=None, run_zip_file_slug_sets=None, session_token=None, queue_group=None, ): """ This handles all of the logic for taking the information about what individual celery tasks and groups them under specific providers. Each Provider (e.g. OSM) gets a chain: OSM_TASK -> FORMAT_TASKS = PROVIDER_SUBTASK_CHAIN They need to be finalized (was the task successful?) to update the database state: PROVIDER_SUBTASK_CHAIN -> FINALIZE_PROVIDER_TASK We also have an optional chain of tasks that get processed after the providers are run: AD_HOC_TASK1 -> AD_HOC_TASK2 -> FINALIZE_RUN_TASK = FINALIZE_RUN_TASK_COLLECTION If the PROVIDER_SUBTASK_CHAIN fails it needs to be cleaned up. The clean up task also calls the finalize provider task. This is because when a task fails the failed task will call an on_error (link_error) task and never return. PROVIDER_SUBTASK_CHAIN -> FINALIZE_PROVIDER_TASK | v CLEAN_UP_FAILURE_TASK -> FINALIZE_PROVIDER_TASK Now there needs to be someway for the finalize tasks to be called. Since we now have several a possible forked path, we need each path to check the state of the providers to see if they are all finished before moving on. It would be great if celery would implicitly handled that, but it doesn't ever merge the forked paths. So we add a WAIT_FOR_PROVIDERS task to check state once the providers are ready they call the final tasks. PROVIDER_SUBTASK_CHAIN -> FINALIZE_PROVIDER_TASK -> WAIT_FOR_PROVIDERS \ | ==> FINALIZE_RUN_TASK_COLLECTION v / CLEAN_UP_FAILURE_TASK -> FINALIZE_PROVIDER_TASK -> WAIT_FOR_PROVIDERS :param worker: A worker node (hostname) for a celery worker, this should match the node name used when starting, the celery worker. :param run_uid: A uid to reference an ExportRun. :return: The AsyncResult from the celery chain of all tasks for this run. """ if not run_uid: raise Exception("Cannot parse_tasks without a run uid.") run = ExportRun.objects.prefetch_related( "job__projections", "job__data_provider_tasks", "data_provider_task_records" ).get(uid=run_uid) job = run.job run_dir = get_run_staging_dir(run.uid) if user_details is None: from audit_logging.utils import get_user_details user_details = get_user_details(job.user) wait_for_providers_settings = { "queue": f"{queue_group}.priority", "routing_key": f"{queue_group}.priority", "priority": TaskPriority.FINALIZE_PROVIDER.value, } finalize_task_settings = { "interval": 4, "max_retries": 10, "queue": f"{queue_group}.priority", "routing_key": f"{queue_group}.priority", "priority": TaskPriority.FINALIZE_RUN.value, } finalized_provider_task_chain_list = [] # Create a task record which can hold tasks for the run (datapack) run_task_record, created = DataProviderTaskRecord.objects.get_or_create( run=run, name="run", slug="run", defaults={"status": TaskState.PENDING.value, "display": False} ) if created: logger.info("New data provider task record created") run_task_record.status = TaskState.PENDING.value run_task_record.save() run_zip_task_chain = get_zip_task_chain( data_provider_task_record_uid=run_task_record.uid, worker=worker, user_details=user_details, ) for data_provider_task in job.data_provider_tasks.all(): data_provider_task_record = run.data_provider_task_records.filter( provider__slug=data_provider_task.provider.slug ).first() if ( data_provider_task_record and TaskState[data_provider_task_record.status] in TaskState.get_finished_states() ): continue if self.type_task_map.get(data_provider_task.provider.export_provider_type.type_name): # Each task builder has a primary task which pulls the source data, grab that task here... type_name = data_provider_task.provider.export_provider_type.type_name primary_export_task = self.type_task_map.get(type_name) stage_dir = get_provider_staging_dir(run_dir, data_provider_task.provider.slug) args = { "primary_export_task": primary_export_task, "user": job.user, "provider_task_uid": data_provider_task.uid, "stage_dir": stage_dir, "run": run, "service_type": data_provider_task.provider.export_provider_type.type_name, "worker": worker, "user_details": user_details, "session_token": session_token, } ( provider_task_record_uid, provider_subtask_chain, ) = TaskChainBuilder().build_tasks(**args) wait_for_providers_signature = wait_for_providers_task.s( run_uid=run_uid, locking_task_key=run_uid, callback_task=create_finalize_run_task_collection( run_uid=run_uid, run_provider_task_record_uid=run_task_record.uid, run_zip_task_chain=run_zip_task_chain, run_zip_file_slug_sets=run_zip_file_slug_sets, apply_args=finalize_task_settings, ), apply_args=finalize_task_settings, ).set(**wait_for_providers_settings) if provider_subtask_chain: # The finalize_export_provider_task will check all of the export tasks # for this provider and save the export provider's status. selection_task = create_task( data_provider_task_record_uid=provider_task_record_uid, worker=worker, stage_dir=stage_dir, task=output_selection_geojson_task, selection=job.the_geom.geojson, user_details=user_details, ) # create signature to close out the provider tasks finalize_export_provider_signature = finalize_export_provider_task.s( data_provider_task_uid=provider_task_record_uid, status=TaskState.COMPLETED.value, locking_task_key=run_uid, ).set(**finalize_task_settings) # add zip if required # skip zip if there is only one source in the data pack (they would be redundant files). if data_provider_task.provider.zip and len(job.data_provider_tasks.all()) > 1: zip_export_provider_sig = get_zip_task_chain( data_provider_task_record_uid=provider_task_record_uid, data_provider_task_record_uids=[provider_task_record_uid], worker=worker, user_details=user_details, ) provider_subtask_chain = chain(provider_subtask_chain, zip_export_provider_sig) finalized_provider_task_chain_list.append( chain( selection_task, provider_subtask_chain, finalize_export_provider_signature, wait_for_providers_signature, ) ) # we kick off all of the sub-tasks at once down here rather than one at a time in the for loop above so # that if an error occurs earlier on in the method, all of the tasks will fail rather than an undefined # number of them. this simplifies error handling, because we don't have to deduce which tasks were # successfully kicked off and which ones failed. for item in finalized_provider_task_chain_list: item.apply_async(**finalize_task_settings)