def controller_for_region_code( region_code: str, allow_unlaunched: bool = False) -> BaseDirectIngestController: """Returns an instance of the region's controller, if one exists.""" if region_code not in get_supported_direct_ingest_region_codes(): raise DirectIngestError( msg= f"Unsupported direct ingest region [{region_code}] in project [{metadata.project_id()}]", error_type=DirectIngestErrorType.INPUT_ERROR, ) try: region = regions.get_region(region_code, is_direct_ingest=True) except FileNotFoundError: raise DirectIngestError( msg=f"Region [{region_code}] has no registered manifest", error_type=DirectIngestErrorType.INPUT_ERROR, ) if not allow_unlaunched and not region.is_ingest_launched_in_env(): check_is_region_launched_in_env(region) controller = region.get_ingestor() if not isinstance(controller, BaseDirectIngestController): raise DirectIngestError( msg= f"Controller for direct ingest region [{region_code}] has unexpected type [{type(controller)}]", error_type=DirectIngestErrorType.INPUT_ERROR, ) return controller
def build( cls, *, ingest_bucket_path: GcsfsBucketPath, allow_unlaunched: bool ) -> BaseDirectIngestController: """Retrieve a direct ingest GcsfsDirectIngestController associated with a particular ingest bucket. Returns: An instance of the region's direct ingest controller class (e.g., UsNdController) that can run ingest operations for the ingest instance associated with the input bucket. """ region_code = get_region_code_from_direct_ingest_bucket( ingest_bucket_path.bucket_name ) if ( region_code is None or region_code not in get_supported_direct_ingest_region_codes() ): raise DirectIngestError( msg=f"Unsupported direct ingest region [{region_code}] in " f"project [{metadata.project_id()}]", error_type=DirectIngestErrorType.INPUT_ERROR, ) region = cls._region_for_bucket(ingest_bucket_path) if not allow_unlaunched and not region.is_ingest_launched_in_env(): check_is_region_launched_in_env(region) controller_class = cls.get_controller_class(region) controller = controller_class(ingest_bucket_path=ingest_bucket_path) if not isinstance(controller, BaseDirectIngestController): raise ValueError(f"Unexpected controller class type [{type(controller)}]") return controller
def schedule_next_ingest_job_or_wait_if_necessary(self, just_finished_job: bool): """Creates a cloud task to run the next ingest job. Depending on the next job's IngestArgs, we either post a task to direct/scheduler/ if a wait_time is specified or direct/process_job/ if we can run the next job immediately.""" check_is_region_launched_in_env(self.region) process_job_queue_info = \ self.cloud_task_manager.get_process_job_queue_info(self.region) if process_job_queue_info.size() and not just_finished_job: logging.info( "Already running job [%s] - will not schedule another job for " "region [%s]", process_job_queue_info.task_names[0], self.region.region_code) return next_job_args = self._get_next_job_args() if not next_job_args: logging.info("No more jobs to run for region [%s] - returning", self.region.region_code) return if process_job_queue_info.is_task_queued(self.region, next_job_args): logging.info( "Already have task queued for next job [%s] - returning.", self._job_tag(next_job_args)) return # TODO(3020): Add similar logic between the raw data BQ import and ingest view export tasks # TODO(3162): Delete this wait logic from here once all regions have been transitioned to a SQL # preprocessing model. wait_time_sec = self._wait_time_sec_for_next_args(next_job_args) logging.info("Found next ingest job to run [%s] with wait time [%s].", self._job_tag(next_job_args), wait_time_sec) if wait_time_sec: scheduler_queue_info = \ self.cloud_task_manager.get_scheduler_queue_info(self.region) if scheduler_queue_info.size() <= 1: logging.info( "Creating cloud task to fire timer in [%s] seconds", wait_time_sec) self.cloud_task_manager. \ create_direct_ingest_scheduler_queue_task( region=self.region, just_finished_job=False, delay_sec=wait_time_sec) else: logging.info( "[%s] tasks already in the scheduler queue for region " "[%s] - not queueing another task.", str(scheduler_queue_info.size), self.region.region_code) else: logging.info("Creating cloud task to run job [%s]", self._job_tag(next_job_args)) self.cloud_task_manager.create_direct_ingest_process_job_task( region=self.region, ingest_args=next_job_args) self._on_job_scheduled(next_job_args)
def do_raw_data_import(self, data_import_args: GcsfsRawDataBQImportArgs) -> None: """Process a raw incoming file by importing it to BQ, tracking it in our metadata tables, and moving it to storage on completion. """ check_is_region_launched_in_env(self.region) if not self.region.are_raw_data_bq_imports_enabled_in_env(): raise ValueError( f"Raw data imports not enabled for region [{self.region.region_code}]" ) if not self.fs.exists(data_import_args.raw_data_file_path): logging.warning( "File path [%s] no longer exists - might have already been " "processed or deleted", data_import_args.raw_data_file_path, ) self.kick_scheduler(just_finished_job=True) return file_metadata = self.file_metadata_manager.get_file_metadata( data_import_args.raw_data_file_path) if file_metadata.processed_time: logging.warning( "File [%s] is already marked as processed. Skipping file processing.", data_import_args.raw_data_file_path.file_name, ) self.kick_scheduler(just_finished_job=True) return self.raw_file_import_manager.import_raw_file_to_big_query( data_import_args.raw_data_file_path, file_metadata) if not self.region.are_ingest_view_exports_enabled_in_env(): # TODO(#3162) This is a stopgap measure for regions that have only partially launched. Delete once SQL # pre-processing is enabled for all direct ingest regions. parts = filename_parts_from_path( data_import_args.raw_data_file_path) ingest_file_tags = self.get_file_tag_rank_list() if parts.file_tag in ingest_file_tags: self.fs.copy( data_import_args.raw_data_file_path, GcsfsFilePath.from_absolute_path( to_normalized_unprocessed_file_path_from_normalized_path( data_import_args.raw_data_file_path.abs_path(), file_type_override=GcsfsDirectIngestFileType. INGEST_VIEW, )), ) processed_path = self.fs.mv_path_to_processed_path( data_import_args.raw_data_file_path) self.file_metadata_manager.mark_file_as_processed( path=data_import_args.raw_data_file_path) self.fs.mv_path_to_storage(processed_path, self.storage_directory_path) self.kick_scheduler(just_finished_job=True)
def run_ingest_job_and_kick_scheduler_on_completion( self, args: IngestArgsType): check_is_region_launched_in_env(self.region) should_schedule = self._run_ingest_job(args) if should_schedule: self.kick_scheduler(just_finished_job=True) logging.info("Done running task. Returning.")
def _run_ingest_job(self, args: IngestArgsType) -> bool: """ Runs the full ingest process for this controller - reading and parsing raw input data, transforming it to our schema, then writing to the database. Returns: True if we should try to schedule the next job on completion. False, otherwise. """ check_is_region_launched_in_env(self.region) start_time = datetime.datetime.now() logging.info("Starting ingest for ingest run [%s]", self._job_tag(args)) contents_handle = self._get_contents_handle(args) if contents_handle is None: logging.warning( "Failed to get contents handle for ingest run [%s] - " "returning.", self._job_tag(args)) # If the file no-longer exists, we do want to kick the scheduler # again to pick up the next file to run. We expect this to happen # occasionally as a race when the scheduler picks up a file before # it has been properly moved. return True if not self._can_proceed_with_ingest_for_contents( args, contents_handle): logging.warning( "Cannot proceed with contents for ingest run [%s] - returning.", self._job_tag(args)) # If we get here, we've failed to properly split a file picked up # by the scheduler. We don't want to schedule a new job after # returning here, otherwise we'll get ourselves in a loop where we # continually try to schedule this file. return False logging.info("Successfully read contents for ingest run [%s]", self._job_tag(args)) if not self._are_contents_empty(args, contents_handle): self._parse_and_persist_contents(args, contents_handle) else: logging.warning( "Contents are empty for ingest run [%s] - skipping parse and " "persist steps.", self._job_tag(args)) self._do_cleanup(args) duration_sec = (datetime.datetime.now() - start_time).total_seconds() logging.info("Finished ingest in [%s] sec for ingest run [%s].", str(duration_sec), self._job_tag(args)) return True
def do_ingest_view_export(self, ingest_view_export_args: GcsfsIngestViewExportArgs) -> None: check_is_region_launched_in_env(self.region) if not self.region.are_ingest_view_exports_enabled_in_env(): raise ValueError(f'Ingest view exports not enabled for region [{self.region.region_code}]. Passed args: ' f'{ingest_view_export_args}') did_export = self.ingest_view_export_manager.export_view_for_args(ingest_view_export_args) if not did_export or not self.file_metadata_manager.get_ingest_view_metadata_pending_export(): logging.info("Creating cloud task to schedule next job.") self.cloud_task_manager.create_direct_ingest_handle_new_files_task(region=self.region, can_start_ingest=True)
def do_raw_data_import(self, data_import_args: GcsfsRawDataBQImportArgs) -> None: """Process a raw incoming file by importing it to BQ, tracking it in our metadata tables, and moving it to storage on completion. """ check_is_region_launched_in_env(self.region) if self.ingest_instance_status_manager.is_instance_paused(): logging.info("Ingest out of [%s] is currently paused.", self.ingest_bucket_path.uri()) return if self.ingest_instance == DirectIngestInstance.SECONDARY: raise ValueError( f"Raw data import not supported from SECONDARY ingest bucket " f"[{self.ingest_bucket_path}]. Raw data task for " f"[{data_import_args.raw_data_file_path}] should never have been " f"scheduled.") if not self.fs.exists(data_import_args.raw_data_file_path): logging.warning( "File path [%s] no longer exists - might have already been " "processed or deleted", data_import_args.raw_data_file_path, ) self.kick_scheduler(just_finished_job=True) return file_metadata = self.file_metadata_manager.get_raw_file_metadata( data_import_args.raw_data_file_path) if file_metadata.processed_time: logging.warning( "File [%s] is already marked as processed. Skipping file processing.", data_import_args.raw_data_file_path.file_name, ) self.kick_scheduler(just_finished_job=True) return self.raw_file_import_manager.import_raw_file_to_big_query( data_import_args.raw_data_file_path, file_metadata) processed_path = self.fs.mv_path_to_processed_path( data_import_args.raw_data_file_path) self.file_metadata_manager.mark_raw_file_as_processed( path=data_import_args.raw_data_file_path) self.fs.mv_path_to_storage(processed_path, self.storage_directory_path) self.kick_scheduler(just_finished_job=True)
def do_ingest_view_export( self, ingest_view_export_args: GcsfsIngestViewExportArgs) -> None: check_is_region_launched_in_env(self.region) if self.ingest_instance_status_manager.is_instance_paused(): logging.info("Ingest out of [%s] is currently paused.", self.ingest_bucket_path.uri()) return did_export = self.ingest_view_export_manager.export_view_for_args( ingest_view_export_args) if (not did_export or not self.file_metadata_manager. get_ingest_view_metadata_pending_export()): logging.info("Creating cloud task to schedule next job.") self.cloud_task_manager.create_direct_ingest_handle_new_files_task( region=self.region, ingest_instance=self.ingest_instance, ingest_bucket=self.ingest_bucket_path, can_start_ingest=True, )
def run_ingest_job_and_kick_scheduler_on_completion( self, args: IngestArgsType ) -> None: check_is_region_launched_in_env(self.region) if self.lock_manager.is_locked( postgres_to_bq_lock_name_for_schema( schema_type_for_system_level(self.system_level) ) ) or self.lock_manager.is_locked( postgres_to_bq_lock_name_for_schema(SchemaType.OPERATIONS) ): raise GCSPseudoLockAlreadyExists( "Postgres to BigQuery export is running, can not run ingest" ) with self.lock_manager.using_lock(self.ingest_process_lock_for_region()): should_schedule = self._run_ingest_job(args) if should_schedule: self.kick_scheduler(just_finished_job=True) logging.info("Done running task. Returning.")
def run_ingest_job_and_kick_scheduler_on_completion( self, args: GcsfsIngestArgs) -> None: check_is_region_launched_in_env(self.region) if self.ingest_instance_status_manager.is_instance_paused(): logging.info("Ingest out of [%s] is currently paused.", self.ingest_bucket_path.uri()) return if not self.region_lock_manager.can_proceed(): logging.warning( "Postgres to BigQuery export is running, can not run ingest") raise GCSPseudoLockAlreadyExists( "Postgres to BigQuery export is running, can not run ingest") with self.region_lock_manager.using_region_lock( expiration_in_seconds=self.default_job_lock_timeout_in_seconds( ), ): should_schedule = self._run_ingest_job(args) if should_schedule: self.kick_scheduler(just_finished_job=True) logging.info("Done running task. Returning.")
def handle_new_files(self, can_start_ingest: bool) -> None: """Searches the ingest directory for new/unprocessed files. Normalizes file names and splits files as necessary, schedules the next ingest job if allowed. Should only be called from the scheduler queue. """ if not can_start_ingest and self.region.is_ingest_launched_in_env(): raise ValueError( "The can_start_ingest flag should only be used for regions where ingest is not yet launched in a " "particular environment. If we want to be able to selectively pause ingest processing for a state, we " "will first have to build a config that is respected by both the /ensure_all_file_paths_normalized " "endpoint and any cloud functions that trigger ingest.") unnormalized_paths = self.fs.get_unnormalized_file_paths( self.ingest_directory_path) unnormalized_path_file_type = ( GcsfsDirectIngestFileType.RAW_DATA if self.region.is_raw_vs_ingest_file_name_detection_enabled() else GcsfsDirectIngestFileType.UNSPECIFIED) for path in unnormalized_paths: logging.info("File [%s] is not yet seen, normalizing.", path.abs_path()) self.fs.mv_path_to_normalized_path( path, file_type=unnormalized_path_file_type) if unnormalized_paths: logging.info( "Normalized at least one path - returning, will handle " "normalized files separately.") # Normalizing file paths will cause the cloud function that calls # this function to be re-triggered. return if not can_start_ingest: logging.warning( "Ingest not configured to start post-file normalization - returning." ) return check_is_region_launched_in_env(self.region) unprocessed_raw_paths = [] ingest_file_type_filter = ( GcsfsDirectIngestFileType.INGEST_VIEW if self.region.is_raw_vs_ingest_file_name_detection_enabled() else None) unprocessed_ingest_view_paths = self.fs.get_unprocessed_file_paths( self.ingest_directory_path, file_type_filter=ingest_file_type_filter) if self.region.is_raw_vs_ingest_file_name_detection_enabled(): unprocessed_raw_paths = self.fs.get_unprocessed_file_paths( self.ingest_directory_path, file_type_filter=GcsfsDirectIngestFileType.RAW_DATA, ) self._register_all_new_paths_in_metadata(unprocessed_raw_paths) if self.region.are_ingest_view_exports_enabled_in_env(): self._register_all_new_paths_in_metadata( unprocessed_ingest_view_paths) unprocessed_paths = unprocessed_raw_paths + unprocessed_ingest_view_paths did_split = False for path in unprocessed_ingest_view_paths: if self._split_file_if_necessary(path): did_split = True if did_split: if self.region.are_ingest_view_exports_enabled_in_env(): post_split_unprocessed_ingest_view_paths = ( self.fs.get_unprocessed_file_paths( self.ingest_directory_path, file_type_filter=GcsfsDirectIngestFileType.INGEST_VIEW, )) self._register_all_new_paths_in_metadata( post_split_unprocessed_ingest_view_paths) logging.info( "Split at least one path - returning, will handle split " "files separately.") # Writing new split files to storage will cause the cloud function # that calls this function to be re-triggered. return if unprocessed_paths: self.schedule_next_ingest_job_or_wait_if_necessary( just_finished_job=False)
def handle_new_files(self, can_start_ingest: bool): """Searches the ingest directory for new/unprocessed files. Normalizes file names and splits files as necessary, schedules the next ingest job if allowed. Should only be called from the scheduler queue. """ unnormalized_paths = self.fs.get_unnormalized_file_paths( self.ingest_directory_path) unnormalized_path_file_type = GcsfsDirectIngestFileType.RAW_DATA \ if self.region.is_raw_vs_ingest_file_name_detection_enabled() else GcsfsDirectIngestFileType.UNSPECIFIED for path in unnormalized_paths: logging.info("File [%s] is not yet seen, normalizing.", path.abs_path()) self.fs.mv_path_to_normalized_path( path, file_type=unnormalized_path_file_type) if unnormalized_paths: logging.info( "Normalized at least one path - returning, will handle " "normalized files separately.") # Normalizing file paths will cause the cloud function that calls # this function to be re-triggered. return if not can_start_ingest: logging.warning( "Ingest not configured to start post-file normalization - returning." ) return check_is_region_launched_in_env(self.region) unprocessed_raw_paths = [] ingest_file_type_filter = GcsfsDirectIngestFileType.INGEST_VIEW \ if self.region.is_raw_vs_ingest_file_name_detection_enabled() else None unprocessed_ingest_view_paths = self.fs.get_unprocessed_file_paths( self.ingest_directory_path, file_type_filter=ingest_file_type_filter) if self.region.is_raw_vs_ingest_file_name_detection_enabled(): unprocessed_raw_paths = self.fs.get_unprocessed_file_paths( self.ingest_directory_path, file_type_filter=GcsfsDirectIngestFileType.RAW_DATA) self._register_all_new_paths_in_metadata(unprocessed_raw_paths) if self.region.are_ingest_view_exports_enabled_in_env(): self._register_all_new_paths_in_metadata( unprocessed_ingest_view_paths) unprocessed_paths = unprocessed_raw_paths + unprocessed_ingest_view_paths did_split = False for path in unprocessed_ingest_view_paths: if self._split_file_if_necessary(path): did_split = True if did_split: if self.region.are_ingest_view_exports_enabled_in_env(): post_split_unprocessed_ingest_view_paths = \ self.fs.get_unprocessed_file_paths(self.ingest_directory_path, file_type_filter=GcsfsDirectIngestFileType.INGEST_VIEW) self._register_all_new_paths_in_metadata( post_split_unprocessed_ingest_view_paths) logging.info( "Split at least one path - returning, will handle split " "files separately.") # Writing new split files to storage will cause the cloud function # that calls this function to be re-triggered. return if unprocessed_paths: self.schedule_next_ingest_job_or_wait_if_necessary( just_finished_job=False)
def schedule_next_ingest_job_or_wait_if_necessary( self, just_finished_job: bool ) -> None: """Creates a cloud task to run the next ingest job. Depending on the next job's IngestArgs, we either post a task to direct/scheduler/ if a wait_time is specified or direct/process_job/ if we can run the next job immediately.""" check_is_region_launched_in_env(self.region) if self._schedule_any_pre_ingest_tasks(): logging.info("Found pre-ingest tasks to schedule - returning.") return if self.lock_manager.is_locked(self.ingest_process_lock_for_region()): logging.info("Direct ingest is already locked on region [%s]", self.region) return process_job_queue_info = self.cloud_task_manager.get_process_job_queue_info( self.region ) if process_job_queue_info.size() and not just_finished_job: logging.info( "Already running job [%s] - will not schedule another job for " "region [%s]", process_job_queue_info.task_names[0], self.region.region_code, ) return next_job_args = self._get_next_job_args() if not next_job_args: logging.info( "No more jobs to run for region [%s] - returning", self.region.region_code, ) return if process_job_queue_info.is_task_queued(self.region, next_job_args): logging.info( "Already have task queued for next job [%s] - returning.", self._job_tag(next_job_args), ) return if self.lock_manager.is_locked( postgres_to_bq_lock_name_for_schema( schema_type_for_system_level(self.system_level) ) ) or self.lock_manager.is_locked( postgres_to_bq_lock_name_for_schema(SchemaType.OPERATIONS) ): logging.info( "Postgres to BigQuery export is running, cannot run ingest - returning" ) return # TODO(#3020): Add similar logic between the raw data BQ import and ingest view export tasks # TODO(#3162): Delete this wait logic from here once all regions have been transitioned to a SQL # preprocessing model. wait_time_sec = self._wait_time_sec_for_next_args(next_job_args) logging.info( "Found next ingest job to run [%s] with wait time [%s].", self._job_tag(next_job_args), wait_time_sec, ) if wait_time_sec: scheduler_queue_info = self.cloud_task_manager.get_scheduler_queue_info( self.region ) if scheduler_queue_info.size() <= 1: logging.info( "Creating cloud task to fire timer in [%s] seconds", wait_time_sec ) self.cloud_task_manager.create_direct_ingest_scheduler_queue_task( region=self.region, just_finished_job=False, delay_sec=wait_time_sec ) else: logging.info( "[%s] tasks already in the scheduler queue for region " "[%s] - not queueing another task.", str(scheduler_queue_info.size), self.region.region_code, ) else: logging.info( "Creating cloud task to run job [%s]", self._job_tag(next_job_args) ) self.cloud_task_manager.create_direct_ingest_process_job_task( region=self.region, ingest_args=next_job_args ) self._on_job_scheduled(next_job_args)
def handle_new_files(self, can_start_ingest: bool) -> None: """Searches the ingest directory for new/unprocessed files. Normalizes file names and splits files as necessary, schedules the next ingest job if allowed. Should only be called from the scheduler queue. """ if not can_start_ingest and self.region.is_ingest_launched_in_env(): raise ValueError( "The can_start_ingest flag should only be used for regions where ingest is not yet launched in a " "particular environment. If we want to be able to selectively pause ingest processing for a state, we " "will first have to build a config that is respected by both the /ensure_all_raw_file_paths_normalized " "endpoint and any cloud functions that trigger ingest.") if self.ingest_instance_status_manager.is_instance_paused(): logging.info("Ingest out of [%s] is currently paused.", self.ingest_bucket_path.uri()) return unnormalized_paths = self.fs.get_unnormalized_file_paths( self.ingest_bucket_path) for path in unnormalized_paths: logging.info("File [%s] is not yet seen, normalizing.", path.abs_path()) self.fs.mv_path_to_normalized_path( path, file_type=GcsfsDirectIngestFileType.RAW_DATA) if unnormalized_paths: logging.info( "Normalized at least one path - returning, will handle " "normalized files separately.") # Normalizing file paths will cause the cloud function that calls # this function to be re-triggered. return if not can_start_ingest: logging.warning( "Ingest not configured to start post-file normalization - returning." ) return check_is_region_launched_in_env(self.region) unprocessed_ingest_view_paths = self.fs.get_unprocessed_file_paths( self.ingest_bucket_path, file_type_filter=GcsfsDirectIngestFileType.INGEST_VIEW, ) unprocessed_raw_paths = self.fs.get_unprocessed_file_paths( self.ingest_bucket_path, file_type_filter=GcsfsDirectIngestFileType.RAW_DATA, ) if (unprocessed_raw_paths and self.ingest_instance == DirectIngestInstance.SECONDARY): raise ValueError( f"Raw data import not supported from SECONDARY ingest bucket " f"[{self.ingest_bucket_path}], but found {len(unprocessed_raw_paths)} " f"raw files. All raw files should be removed from this bucket and " f"uploaded to the primary ingest bucket, if appropriate.") self._register_all_new_paths_in_metadata(unprocessed_raw_paths) self._register_all_new_paths_in_metadata(unprocessed_ingest_view_paths) unprocessed_paths = unprocessed_raw_paths + unprocessed_ingest_view_paths did_split = False for path in unprocessed_ingest_view_paths: if self._split_file_if_necessary(path): did_split = True if did_split: post_split_unprocessed_ingest_view_paths = ( self.fs.get_unprocessed_file_paths( self.ingest_bucket_path, file_type_filter=GcsfsDirectIngestFileType.INGEST_VIEW, )) self._register_all_new_paths_in_metadata( post_split_unprocessed_ingest_view_paths) logging.info( "Split at least one path - returning, will handle split " "files separately.") # Writing new split files to storage will cause the cloud function # that calls this function to be re-triggered. return if unprocessed_paths: self.schedule_next_ingest_job(just_finished_job=False)
def schedule_next_ingest_job(self, just_finished_job: bool) -> None: """Creates a cloud task to run a /process_job request for the file, which will process and commit the contents to Postgres.""" check_is_region_launched_in_env(self.region) if self.ingest_instance_status_manager.is_instance_paused(): logging.info("Ingest out of [%s] is currently paused.", self.ingest_bucket_path.uri()) return if self._schedule_any_pre_ingest_tasks(): logging.info("Found pre-ingest tasks to schedule - returning.") return if self.region_lock_manager.is_locked(): logging.info("Direct ingest is already locked on region [%s]", self.region) return process_job_queue_info = self.cloud_task_manager.get_process_job_queue_info( self.region, self.ingest_instance, ) if (process_job_queue_info.tasks_for_instance( region_code=self.region_code(), ingest_instance=self.ingest_instance) and not just_finished_job): logging.info( "Already running job [%s] - will not schedule another job for " "region [%s]", process_job_queue_info.task_names[0], self.region.region_code, ) return next_job_args = self._get_next_job_args() if not next_job_args: logging.info( "No more jobs to run for region [%s] - returning", self.region.region_code, ) return if process_job_queue_info.is_task_queued(self.region, next_job_args): logging.info( "Already have task queued for next job [%s] - returning.", self._job_tag(next_job_args), ) return if not self.region_lock_manager.can_proceed(): logging.info( "Postgres to BigQuery export is running, cannot run ingest - returning" ) return logging.info("Creating cloud task to run job [%s]", self._job_tag(next_job_args)) self.cloud_task_manager.create_direct_ingest_process_job_task( region=self.region, ingest_instance=self.ingest_instance, ingest_args=next_job_args, ) self._on_job_scheduled(next_job_args)