示例#1
0
def _execute_indexing_task(target_index, start_id, end_id, notify_url):
    table = 'image'
    elasticsearch = elasticsearch_connect()
    progress = Value('d', 0.0)
    finish_time = Value('d', 0.0)
    exists_in_table = \
        'exists(SELECT 1 FROM {table} ' \
        'WHERE identifier = image.identifier) as "{name}"'
    exists_in_deleted_table = exists_in_table.format(
        table='api_deletedimage', name='deleted'
    )
    exists_in_mature_table = exists_in_table.format(
        table='api_matureimage', name='mature'
    )

    query = SQL(f'''
                SELECT *,
                  {exists_in_deleted_table}, {exists_in_mature_table}
                FROM image
                WHERE id BETWEEN {start_id} AND {end_id}
                ''')
    log.info('Querying {}'.format(query))
    indexer = TableIndexer(
        elasticsearch, table, progress, finish_time
    )
    p = Process(
        target=_launch_reindex,
        args=(table, target_index, query, indexer, notify_url)
    )
    p.start()
    log.info('Started indexing task')
示例#2
0
def _execute_indexing_task(
    model_name, table_name, target_index, start_id, end_id, notify_url
):
    elasticsearch = elasticsearch_connect()

    deleted, mature = get_existence_queries(model_name, table_name)
    query = SQL(
        "SELECT *, {deleted}, {mature} "
        "FROM {table_name} "
        "WHERE id BETWEEN {start_id} AND {end_id};"
    ).format(
        deleted=deleted,
        mature=mature,
        table_name=Identifier(table_name),
        start_id=Literal(start_id),
        end_id=Literal(end_id),
    )
    log.info(f"Querying {query}")
    indexer = TableIndexer(elasticsearch)
    p = Process(
        target=_launch_reindex,
        args=(model_name, table_name, target_index, query, indexer, notify_url),
    )
    p.start()
    log.info("Started indexing task")
示例#3
0
def perform_task(
    task_id: str,
    model: MediaType,
    action: TaskTypes,
    callback_url: Optional[str],
    progress: Value,
    finish_time: Value,
    active_workers: Value,
    **kwargs,
):
    """
    Perform the task defined by the API request by invoking the task function with the
    correct arguments. Any additional keyword arguments will be forwarded to the
    appropriate task functions.

    :param task_id: the UUID assigned to the task for tracking
    :param model: the media type for which the action is being performed
    :param action: the name of the action being performed
    :param callback_url: the URL to which to make a request after the task is completed
    :param progress: shared memory for tracking the task's progress
    :param finish_time: shared memory for tracking the finish time of the task
    :param active_workers: shared memory for counting workers assigned to the task
    """

    elasticsearch = elasticsearch_connect()
    indexer = TableIndexer(
        elasticsearch,
        task_id,
        callback_url,
        progress,
        active_workers,
    )

    # Task functions
    # ==============
    # These functions must have a signature of ``Callable[[], None]``.

    def ingest_upstream():  # includes ``reindex``
        refresh_api_table(model, progress)
        if model == "audio":
            refresh_api_table("audioset", progress, approach="basic")
        indexer.reindex(model, f"temp_import_{model}", **kwargs)

    def promote():  # includes point alias
        promote_api_table(model, progress)
        if model == "audio":
            promote_api_table("audioset", progress)
        indexer.point_alias(model, **kwargs)

    try:
        locs = locals()  # contains all the task functions defined above
        if func := locs.get(action.value):
            func()  # Run the task function if it is defined
        elif func := getattr(indexer, action.value):
            func(model, **
                 kwargs)  # Directly invoke indexer methods if no task function
def _execute_indexing_task(target_index, start_id, end_id, notify_url):
    table = 'image'
    elasticsearch = elasticsearch_connect()
    progress = Value('d', 0.0)
    finish_time = Value('d', 0.0)
    query = SQL('SELECT * FROM {}'
                ' WHERE id BETWEEN {} AND {}'.format('image', start_id,
                                                     end_id))
    log.info('Querying {}'.format(query))
    indexer = TableIndexer(elasticsearch, table, progress, finish_time)
    p = Process(target=_launch_reindex,
                args=(table, target_index, query, indexer, notify_url))
    p.start()
    log.info('Started indexing task')
示例#5
0
 def run(self):
     # Map task types to actions.
     elasticsearch = elasticsearch_connect()
     indexer = TableIndexer(elasticsearch, self.model, self.progress,
                            self.finish_time)
     if self.task_type == TaskTypes.REINDEX:
         indexer.reindex(self.model)
     elif self.task_type == TaskTypes.UPDATE_INDEX:
         indexer.update(self.model, self.since_date)
     elif self.task_type == TaskTypes.INGEST_UPSTREAM:
         reload_upstream(self.model)
         indexer.reindex(self.model)
     logging.info('Task {} exited.'.format(self.task_id))
     if self.callback_url:
         try:
             requests.post(self.callback_url)
         except requests.exceptions.RequestException as e:
             logging.error('Failed to send callback!')
             logging.error(e)