def _execute_indexing_task(target_index, start_id, end_id, notify_url): table = 'image' elasticsearch = elasticsearch_connect() progress = Value('d', 0.0) finish_time = Value('d', 0.0) exists_in_table = \ 'exists(SELECT 1 FROM {table} ' \ 'WHERE identifier = image.identifier) as "{name}"' exists_in_deleted_table = exists_in_table.format( table='api_deletedimage', name='deleted' ) exists_in_mature_table = exists_in_table.format( table='api_matureimage', name='mature' ) query = SQL(f''' SELECT *, {exists_in_deleted_table}, {exists_in_mature_table} FROM image WHERE id BETWEEN {start_id} AND {end_id} ''') log.info('Querying {}'.format(query)) indexer = TableIndexer( elasticsearch, table, progress, finish_time ) p = Process( target=_launch_reindex, args=(table, target_index, query, indexer, notify_url) ) p.start() log.info('Started indexing task')
def _execute_indexing_task( model_name, table_name, target_index, start_id, end_id, notify_url ): elasticsearch = elasticsearch_connect() deleted, mature = get_existence_queries(model_name, table_name) query = SQL( "SELECT *, {deleted}, {mature} " "FROM {table_name} " "WHERE id BETWEEN {start_id} AND {end_id};" ).format( deleted=deleted, mature=mature, table_name=Identifier(table_name), start_id=Literal(start_id), end_id=Literal(end_id), ) log.info(f"Querying {query}") indexer = TableIndexer(elasticsearch) p = Process( target=_launch_reindex, args=(model_name, table_name, target_index, query, indexer, notify_url), ) p.start() log.info("Started indexing task")
def perform_task( task_id: str, model: MediaType, action: TaskTypes, callback_url: Optional[str], progress: Value, finish_time: Value, active_workers: Value, **kwargs, ): """ Perform the task defined by the API request by invoking the task function with the correct arguments. Any additional keyword arguments will be forwarded to the appropriate task functions. :param task_id: the UUID assigned to the task for tracking :param model: the media type for which the action is being performed :param action: the name of the action being performed :param callback_url: the URL to which to make a request after the task is completed :param progress: shared memory for tracking the task's progress :param finish_time: shared memory for tracking the finish time of the task :param active_workers: shared memory for counting workers assigned to the task """ elasticsearch = elasticsearch_connect() indexer = TableIndexer( elasticsearch, task_id, callback_url, progress, active_workers, ) # Task functions # ============== # These functions must have a signature of ``Callable[[], None]``. def ingest_upstream(): # includes ``reindex`` refresh_api_table(model, progress) if model == "audio": refresh_api_table("audioset", progress, approach="basic") indexer.reindex(model, f"temp_import_{model}", **kwargs) def promote(): # includes point alias promote_api_table(model, progress) if model == "audio": promote_api_table("audioset", progress) indexer.point_alias(model, **kwargs) try: locs = locals() # contains all the task functions defined above if func := locs.get(action.value): func() # Run the task function if it is defined elif func := getattr(indexer, action.value): func(model, ** kwargs) # Directly invoke indexer methods if no task function
def _execute_indexing_task(target_index, start_id, end_id, notify_url): table = 'image' elasticsearch = elasticsearch_connect() progress = Value('d', 0.0) finish_time = Value('d', 0.0) query = SQL('SELECT * FROM {}' ' WHERE id BETWEEN {} AND {}'.format('image', start_id, end_id)) log.info('Querying {}'.format(query)) indexer = TableIndexer(elasticsearch, table, progress, finish_time) p = Process(target=_launch_reindex, args=(table, target_index, query, indexer, notify_url)) p.start() log.info('Started indexing task')
def run(self): # Map task types to actions. elasticsearch = elasticsearch_connect() indexer = TableIndexer(elasticsearch, self.model, self.progress, self.finish_time) if self.task_type == TaskTypes.REINDEX: indexer.reindex(self.model) elif self.task_type == TaskTypes.UPDATE_INDEX: indexer.update(self.model, self.since_date) elif self.task_type == TaskTypes.INGEST_UPSTREAM: reload_upstream(self.model) indexer.reindex(self.model) logging.info('Task {} exited.'.format(self.task_id)) if self.callback_url: try: requests.post(self.callback_url) except requests.exceptions.RequestException as e: logging.error('Failed to send callback!') logging.error(e)