def enhance_with_archive_items(self, items): if items: ids = list(set([item.get("item_id") for item in items if item.get("item_id")])) archive_items = [] archive_lookup = {} if ids: query = {"$and": [{config.ID_FIELD: {"$in": ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list( superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query) ) for item in archive_items: handle_existing_data(item) archive_lookup[item[config.ID_FIELD]] = item for item in items: archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)}) updates = { config.ID_FIELD: item.get("item_id"), "item_id": item.get(config.ID_FIELD), "lock_user": archive_item.get("lock_user", None), "lock_time": archive_item.get("lock_time", None), "lock_action": archive_item.get("lock_action", None), "lock_session": archive_item.get("lock_session", None), "archive_item": archive_item if archive_item else None, } item.update(updates) handle_existing_data(item)
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get('item_id') for item in items if item.get('item_id')])) archive_items = [] if ids: query = {'$and': [{config.ID_FIELD: {'$in': ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list(superdesk.get_resource_service(ARCHIVE) .get_from_mongo(req=archive_req, lookup=query)) takes_service = TakesPackageService() for item in archive_items: handle_existing_data(item) takes_service.enhance_with_package_info(item) for item in items: archive_item = [i for i in archive_items if i.get(config.ID_FIELD) == item.get('item_id')] archive_item = archive_item[0] if len(archive_item) > 0 else \ {config.VERSION: item.get(config.VERSION, 1)} updates = { config.ID_FIELD: item.get('item_id'), 'item_id': item.get(config.ID_FIELD), 'lock_user': archive_item.get('lock_user', None), 'lock_time': archive_item.get('lock_time', None), 'lock_session': archive_item.get('lock_session', None), 'archive_item': archive_item if archive_item else None } item.update(updates) handle_existing_data(item)
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get("item_id") for item in items if item.get("item_id")])) archive_items = [] archive_lookup = {} if ids: query = {"$and": [{config.ID_FIELD: {"$in": ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list( superdesk.get_resource_service(ARCHIVE).get_from_mongo(req=archive_req, lookup=query) ) takes_service = TakesPackageService() takes_service.enhance_items_with_takes_packages(archive_items) for item in archive_items: handle_existing_data(item) archive_lookup[item[config.ID_FIELD]] = item for item in items: archive_item = archive_lookup.get(item.get("item_id"), {config.VERSION: item.get(config.VERSION, 1)}) updates = { config.ID_FIELD: item.get("item_id"), "item_id": item.get(config.ID_FIELD), "lock_user": archive_item.get("lock_user", None), "lock_time": archive_item.get("lock_time", None), "lock_session": archive_item.get("lock_session", None), "archive_item": archive_item if archive_item else None, } item.update(updates) handle_existing_data(item)
def get_next_order_sequence(self, blog_id): if blog_id is None: return 0 # get next order sequence and increment it blog = get_resource_service('blogs').find_and_modify( query={'_id': blog_id}, update={'$inc': { 'posts_order_sequence': 1 }}, upsert=False) if blog: order = blog and blog.get('posts_order_sequence') or None # support previous LB version when the sequence was not save into the blog if order is None: # find the highest order in the blog req = ParsedRequest() req.sort = '-order' req.max_results = 1 post = next( self.get_from_mongo(req=req, lookup={'blog': blog_id}), None) if post and post.get('order') is not None: order = post.get('order') + 1 # save the order into the blog get_resource_service('blogs').update( blog_id, {'posts_order_sequence': order + 1}, blog) else: order = 0 else: order = 0 return order
def get_expired_items(self, now): query_filter = self._get_query_for_expired_items(now) req = ParsedRequest() req.max_results = 100 return superdesk.get_resource_service(ARCHIVE).get_from_mongo( req=req, lookup=query_filter)
def on_create(self, docs): """ Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get('desk') if not desk: doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry') == 0: doc['content_expiry'] = None if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default then remove the old default if doc.get('working_stage', False): self.remove_old_default(desk, 'working_stage') if doc.get('default_incoming', False): self.remove_old_default(desk, 'default_incoming')
def get_expired_items(self, now): logger.info('Get expired content from published') query_filter = self.get_query_for_expired_items(now) req = ParsedRequest() req.sort = '_created' req.max_results = 100 return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query_filter)
def get_filters(self): """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter records. :return: """ # Get the most recent update time to the filter conditions and content_filters req = ParsedRequest() req.sort = '-_updated' req.max_results = 1 mindate = datetime.min.replace(tzinfo=pytz.UTC) latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) if not self.filters or \ latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \ latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate: logger.debug('Getting content filters and filter conditions') self.filters = dict() self.filters['filter_conditions'] = dict() self.filters['content_filters'] = dict() for fc in get_resource_service('filter_conditions').get(req=None, lookup={}): self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc} self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get( 'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate) for cf in get_resource_service('content_filters').get(req=None, lookup={}): self.filters['content_filters'][cf.get('_id')] = {'cf': cf} self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get( 'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate) else: logger.debug('Using chached content filters and filters conditions')
def test_compare_repos(self): with self.app.app_context(): req = ParsedRequest() req.args = {} req.max_results = 25 data = [{ 'headline': 'test {}'.format(i), 'slugline': 'rebuild {}'.format(i), 'type': 'text' if (i % 2 == 0) else 'picture' } for i in range(1, 100)] resolve_document_etag(data, 'archive') superdesk.app.data._search_backend('archive').bulk_insert( 'archive', data) get_resource_service('archive').post(data) consistency_init(self.app) items = get_resource_service('archive').get(req, {}) self.assertEquals(99, items.count()) consistency_record = CompareRepositories().run( 'archive', self.app.config['ELASTICSEARCH_URL'], self.app.config['ELASTICSEARCH_INDEXES'].get('archive')) self.assertEquals(consistency_record['mongo'], 99) self.assertEquals(consistency_record['elastic'], 198) self.assertEquals(consistency_record['identical'], 99) self.assertEquals(consistency_record['mongo_only'], 0) self.assertEquals(consistency_record['elastic_only'], 99) self.assertEquals(consistency_record['inconsistent'], 0)
def test_compare_repos(self): with self.app.app_context(): req = ParsedRequest() req.args = {} req.max_results = 25 data = [{'headline': 'test {}'.format(i), 'slugline': 'rebuild {}'.format(i), 'type': 'text' if (i % 2 == 0) else 'picture'} for i in range(1, 100)] resolve_document_etag(data, 'archive') superdesk.app.data._search_backend('archive').bulk_insert('archive', data) get_resource_service('archive').post(data) consistency_init(self.app) items = get_resource_service('archive').get(req, {}) self.assertEquals(99, items.count()) consistency_record = CompareRepositories().run('archive', self.app.config['ELASTICSEARCH_URL'], self.app.config['ELASTICSEARCH_INDEXES'].get('archive')) self.assertEquals(consistency_record['mongo'], 99) self.assertEquals(consistency_record['elastic'], 198) self.assertEquals(consistency_record['identical'], 99) self.assertEquals(consistency_record['mongo_only'], 0) self.assertEquals(consistency_record['elastic_only'], 99) self.assertEquals(consistency_record['inconsistent'], 0)
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_date = None while True: lookup = { '$and': [ self.not_item_entry_query, { '_updated': { '$lte': date_to_str(self.expiry) } } ] } if current_date: lookup['$and'].append({'_updated': {'$gte': current_date}}) req = ParsedRequest() req.sort = '[("_updated", 1)]' req.projection = '{"_id": 1, "_updated": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) if audits.count() == 0: break items = list([(item['_id'], item['_updated']) for item in audits]) current_date = items[len(items) - 1][1] service.delete({'_id': {'$in': [i[0] for i in items]}})
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append( {'_updated': { '$lte': date_to_str(self.expiry) }}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "extra.guid": 1, "extra._id": 1, "extra.item_id": 1, "extra.item": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) if audits.count() == 0: break items = list([(item['_id'], self._extract_item_id(item)) for item in audits]) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] service.delete({'_id': {'$in': audit_ids}})
def get_expired_items(self, expired_date_time, limit=100): """ Fetches the expired articles from published collection. Expiry Conditions: 1. can_be_removed flag is True 2. Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and allow_post_publish_actions flag is True :param expired_date_time: :param limit: :return: expired articles from published collection """ logger.info("Get expired content from published") query = { "$or": [ {"can_be_removed": True}, { "$and": [ {"expiry": {"$lte": expired_date_time}}, {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}}, {"allow_post_publish_actions": True}, ] }, ] } req = ParsedRequest() req.sort = "_created" req.max_results = limit return superdesk.get_resource_service("published").get_from_mongo(req=req, lookup=query)
def get_history_items(self, last_id, gte, item_id, chunk_size=0): history_service = get_resource_service('archive_history') last_processed_id = last_id while True: req = ParsedRequest() req.sort = '[("_id", 1), ("version", 1)]' query = {'$and': []} if gte: query['$and'].append({'_created': {'$gte': date_to_str(gte)}}) if item_id: query['$and'].append({'item_id': str(item_id)}) if last_processed_id: query['$and'].append({'_id': {'$gt': str(last_processed_id)}}) req.where = json.dumps(query) if chunk_size > 0: req.max_results = int(chunk_size) items = list(history_service.get(req=req, lookup=None)) if len(items) < 1: break last_processed_id = items[-1][config.ID_FIELD] yield items
def get_published_items(self): """ Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed. """ query = { QUEUE_STATE: PUBLISH_STATE.PENDING, "$or": [ { ITEM_STATE: { "$ne": CONTENT_STATE.SCHEDULED } }, { ITEM_STATE: CONTENT_STATE.SCHEDULED, "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): { "$lte": utcnow() }, }, ], } request = ParsedRequest() request.sort = "publish_sequence_no" request.max_results = 200 return list( get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def forwards(self, mongodb_collection, mongodb_database): archive_service = get_resource_service('archive') published_service = get_resource_service(self.resource) req = ParsedRequest() req.max_results = 50 for page in range(1, 200): # 10k limit req.page = page items = list(published_service.get(req=req, lookup=None)) if not items: break for item in items: published_date = item.get('firstpublished') if published_date is not None: compliant_lifetime = add_years(published_date, 1) extra = item.get('extra', {}) extra['compliantlifetime'] = compliant_lifetime try: published_service.system_update( ObjectId(item['_id']), {'extra': extra}, item) except SuperdeskApiError: continue archive_item = archive_service.find_one( req=None, _id=item['item_id']) if archive_item: archive_service.system_update(archive_item['_id'], {'extra': extra}, archive_item)
def _get_combined_view_data(self, items, request): """Get list of event and planning for the combined view :param items: :param request: object representing the HTTP request """ ids = set() for item in items: # don't want related planing items _id = item.get('event_item') or item.get('_id') ids.add(_id) filters = self._get_date_filters(request) page = request.page or 1 page_size = self._get_page_size(request) req = ParsedRequest() req.args = MultiDict() req.args['source'] = json.dumps({ 'query': { 'bool': { 'must': [{'terms': {'_id': list(ids)}}], } }, 'filter': filters, 'sort': self._get_sort(), 'size': self._get_page_size(request), 'from': (page - 1) * page_size }) req.page = request.page or 1 req.max_results = self._get_page_size(request) return get_resource_service('planning_search').get(req=req, lookup=None)
def get_expired_items(expired_date_time, limit=100): """ Fetches the expired articles from published collection. Expiry Conditions: 1. can_be_removed flag is True 2. Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and allow_post_publish_actions flag is True :param expired_date_time: :param limit: :return: expired articles from published collection """ logger.info('Get expired content from published') query = { '$or': [ {'can_be_removed': True}, {'$and': [ {'expiry': {'$lte': expired_date_time}}, {ITEM_STATE: {'$ne': CONTENT_STATE.SCHEDULED}}, {'allow_post_publish_actions': True} ]} ] } req = ParsedRequest() req.sort = '_created' req.max_results = limit return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query)
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info('Starting to purge audit logs of content items not in archive at {}'.format(utcnow())) # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append({'_updated': {'$lte': date_to_str(self.expiry)}}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "audit_id":1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) items = list([(item['_id'], item['audit_id']) for item in audits]) if len(items) == 0: logger.info('Finished purging audit logs of content items not in archive at {}'.format(utcnow())) return logger.info('Found {} orphaned audit items at {}'.format(len(items), utcnow())) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] logger.info('Deleting {} orphaned audit items at {}'.format(len(audit_ids), utcnow())) service.delete_ids_from_mongo(audit_ids)
def get_next_order_sequence(self, blog_id): if blog_id is None: return 0 # get next order sequence and increment it blog = get_resource_service('blogs').find_and_modify( query={'_id': blog_id}, update={'$inc': {'posts_order_sequence': 1}}, upsert=False) if blog: order = blog and blog.get('posts_order_sequence') or None # support previous LB version when the sequence was not save into the blog if order is None: # find the highest order in the blog req = ParsedRequest() req.sort = '-order' req.max_results = 1 post = next(self.get_from_mongo(req=req, lookup={'blog': blog_id}), None) if post and post.get('order') is not None: order = post.get('order') + 1 # save the order into the blog get_resource_service('blogs').update(blog_id, {'posts_order_sequence': order + 1}, blog) else: order = 0 else: order = 0 return order
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info('Starting to purge audit logs of none content items at {}'.format(utcnow())) while True: lookup = {'$and': [self.not_item_entry_query, {'_updated': {'$lte': date_to_str(self.expiry)}}]} if current_id: lookup['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) items = list(item.get('_id') for item in audits) if len(items) == 0: logger.info('Finished purging audit logs of none content items at {}'.format(utcnow())) return logger.info('Found {} audit items at {}'.format(len(items), utcnow())) current_id = items[len(items) - 1] logger.info('Deleting {} old audit items'.format(len(items))) service.delete_ids_from_mongo(items)
def on_create(self, docs): """Runs on stage create. Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get("desk") if not desk: doc["desk_order"] = 1 continue req = ParsedRequest() req.sort = "-desk_order" req.max_results = 1 prev_stage = self.get(req=req, lookup={"desk": doc["desk"]}) if doc.get("content_expiry") == 0: doc["content_expiry"] = None if prev_stage.count() == 0: doc["desk_order"] = 1 else: doc["desk_order"] = prev_stage[0].get("desk_order", 1) + 1 # if this new one is default then remove the old default if doc.get("working_stage", False): self.remove_old_default(desk, "working_stage") if doc.get("default_incoming", False): self.remove_old_default(desk, "default_incoming")
def get_filters(self): """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter records. :return: """ # Get the most recent update time to the filter conditions and content_filters req = ParsedRequest() req.sort = '-_updated' req.max_results = 1 mindate = datetime.min.replace(tzinfo=pytz.UTC) latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) if not self.filters or \ latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \ latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate: logger.debug('Getting content filters and filter conditions') self.filters = dict() self.filters['filter_conditions'] = dict() self.filters['content_filters'] = dict() for fc in get_resource_service('filter_conditions').get(req=None, lookup={}): self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc} self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get( 'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate) for cf in get_resource_service('content_filters').get(req=None, lookup={}): self.filters['content_filters'][cf.get('_id')] = {'cf': cf} self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get( 'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate) else: logger.debug('Using chached content filters and filters conditions')
def get_overdue_scheduled_items(expired_date_time, resource, limit=100): """ Fetches the overdue scheduled articles from given collection. Overdue Conditions: 1. it should be in 'scheduled' state 2. publish_schedule is less than or equal to expired_date_time :param expired_date_time: DateTime that scheduled tate will be checked against :param resource: Name of the resource to check the data from :param limit: Number of return items :return: overdue scheduled articles from published collection """ logger.info('Get overdue scheduled content from {}'.format(resource)) query = { '$and': [{ 'publish_schedule': { '$lte': expired_date_time } }, { ITEM_STATE: CONTENT_STATE.SCHEDULED }] } req = ParsedRequest() req.sort = '_modified' req.max_results = limit return superdesk.get_resource_service(resource).get_from_mongo( req=req, lookup=query)
def get_queue_items(retries=False): if retries: lookup = { '$and': [{ 'state': QueueState.RETRYING.value }, { 'next_retry_attempt_at': { '$lte': utcnow() } }, { 'destination.delivery_type': { '$ne': 'pull' } }] } else: lookup = { '$and': [{ 'state': QueueState.PENDING.value }, { 'destination.delivery_type': { '$ne': 'pull' } }] } request = ParsedRequest() request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 500) # ensure we publish in the correct sequence request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def _get_changed_items(self, existing_items, package): """Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ published_service = get_resource_service('published') req = ParsedRequest() query = {'query': {'filtered': {'filter': {'and': [{'terms': {QUEUE_STATE: [ PUBLISH_STATE.QUEUED, PUBLISH_STATE.QUEUED_NOT_TRANSMITTED]}}, {'term': {'item_id': package['item_id']}}]}}}, 'sort': [{'publish_sequence_no': 'desc'}]} req.args = {'source': json.dumps(query)} req.max_results = 1 previously_published_packages = published_service.get(req=req, lookup=None) if not previously_published_packages.count(): return [], [] previously_published_package = previously_published_packages[0] if 'groups' in previously_published_package: old_items = self.package_service.get_residrefs(previously_published_package) added_items = list(set(existing_items) - set(old_items)) removed_items = list(set(old_items) - set(existing_items)) return removed_items, added_items else: return [], []
def get_expired_items(self, expiry_datetime): """ Get the expired items where content state is not scheduled and :param datetime expiry_datetime: expiry datetime :return pymongo.cursor: expired non published items. """ query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def enhance_with_archive_items(self, items): if items: ids = list(set([item.get('item_id') for item in items if item.get('item_id')])) archive_items = [] archive_lookup = {} if ids: query = {'$and': [{config.ID_FIELD: {'$in': ids}}]} archive_req = ParsedRequest() archive_req.max_results = len(ids) # can't access published from elastic due filter on the archive resource hence going to mongo archive_items = list(superdesk.get_resource_service(ARCHIVE) .get_from_mongo(req=archive_req, lookup=query)) takes_service = TakesPackageService() takes_service.enhance_items_with_takes_packages(archive_items) for item in archive_items: handle_existing_data(item) archive_lookup[item[config.ID_FIELD]] = item for item in items: archive_item = archive_lookup.get(item.get('item_id'), {config.VERSION: item.get(config.VERSION, 1)}) updates = { config.ID_FIELD: item.get('item_id'), 'item_id': item.get(config.ID_FIELD), 'lock_user': archive_item.get('lock_user', None), 'lock_time': archive_item.get('lock_time', None), 'lock_action': archive_item.get('lock_action', None), 'lock_session': archive_item.get('lock_session', None), 'archive_item': archive_item if archive_item else None } item.update(updates) handle_existing_data(item)
def on_create(self, docs): """ Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get('desk') if not desk: doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry') == 0: doc['content_expiry'] = None if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default then remove the old default if doc.get('working_stage', False): self.remove_old_default(desk, 'working_stage') if doc.get('default_incoming', False): self.remove_old_default(desk, 'default_incoming')
def forwards(self, mongodb_collection, mongodb_database): archive_service = get_resource_service('archive') published_service = get_resource_service(self.resource) templates_service = get_resource_service('content_templates') template = templates_service.find_one(req=None, template_name='article') if not template: return req = ParsedRequest() req.max_results = 50 for page in range(1, 200): req.page = page items = list(published_service.get(req=req, lookup=None)) if not items: break for item in items: if ObjectId(item.get('template')) != template.get('_id'): extra = item.get('extra') if extra is not None: extra.pop('compliantlifetime', None) published_service.system_update( ObjectId(item['_id']), {'extra': extra}, item) archive_item = archive_service.find_one( req=None, _id=item.get('item_id')) if archive_item: archive_service.system_update( archive_item.get('_id'), {'extra': extra}, archive_item)
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def _filter_items(self, items): """ Remove events which are exist in the db. :param items: dict with events, ntbId used as a key :type items: dict :return: a list of events """ req = ParsedRequest() req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1}) req.max_results = len(items) existing_items = superdesk.get_resource_service('events').get_from_mongo( req, { 'ntb_id': { '$in': [ntb_id for ntb_id in items.keys()] } } ) for existing_item in existing_items: if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED: # update event items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD] else: # remove event when it has a state different from 'ingested' del items[existing_item['ntb_id']] return [items[i] for i in items.keys()]
def _get_changed_items(self, existing_items, package): """Returns the added and removed items from existing_items :param existing_items: Existing list :param updates: Changes :return: list of removed items and list of added items """ published_service = get_resource_service('published') req = ParsedRequest() query = {'query': {'filtered': {'filter': {'and': [{'terms': {QUEUE_STATE: [ PUBLISH_STATE.QUEUED, PUBLISH_STATE.QUEUED_NOT_TRANSMITTED]}}, {'term': {'item_id': package['item_id']}}]}}}, 'sort': [{'publish_sequence_no': 'desc'}]} req.args = {'source': json.dumps(query)} req.max_results = 1 previously_published_packages = published_service.get(req=req, lookup=None) if not previously_published_packages.count(): return [], [] previously_published_package = previously_published_packages[0] if 'groups' in previously_published_package: old_items = self.package_service.get_residrefs(previously_published_package) added_items = list(set(existing_items) - set(old_items)) removed_items = list(set(old_items) - set(existing_items)) return removed_items, added_items else: return [], []
def _get_events_and_planning(self, request, query, search_filter): """Get list of event and planning based on the search criteria :param request: object representing the HTTP request """ # params = request.args or MultiDict() # query = construct_combined_search_query(params) page = request.page or 1 max_results = self._get_page_size(request, search_filter) req = ParsedRequest() req.args = MultiDict() req.args['source'] = json.dumps({ 'query': query['query'], 'sort': query['sort'] if query.get('sort') else self._get_sort(), 'size': int((5 * max_results) * math.ceil(page / 3)), }) req.args['projections'] = json.dumps(['_id', 'type', 'event_item']) req.page = page req.max_results = max_results req.exec_on_fetched_resource = False # don't call on_fetched_resource return get_resource_service('planning_search').get(req=req, lookup=None)
def purge_old_entries(self): """ Purge entries older than the expiry :return: """ service = superdesk.get_resource_service("audit") logger.info("Starting to purge audit logs at {}".format(utcnow())) for _ in range(100): # make sure we don't get stuck lookup = { "$and": [{ "_id": { "$lt": ObjectId.from_datetime(self.expiry) } }] } req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) items = list(item.get("_id") for item in audits) if len(items) == 0: logger.info("Finished purging audit logs at {}".format( utcnow())) return logger.info("Found {} audit items at {}".format( len(items), utcnow())) service.delete_ids_from_mongo(items) logger.warning("Audit purge didn't finish in 100 iterations.")
def get_packages(self, doc_id, not_package_id=None): """ Retrieves package(s) if an article identified by doc_id is referenced in a package. :param str doc_id: identifier of the item in the package :param str not_package_id: not package id :return: articles of type composite """ query = { '$and': [{ ITEM_TYPE: CONTENT_TYPE.COMPOSITE }, { 'groups.refs.residRef': doc_id }] } if not_package_id: query['$and'].append({config.ID_FIELD: {'$ne': not_package_id}}) request = ParsedRequest() request.max_results = 100 return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
def get_items(self, now): """Get the items from the archive collection that have expiry in future and state is published, corrected, killed :param datetime now: current date time :return list: list of expired items """ logger.info('Fetching expired items from archive collection.') now = now + timedelta(minutes=self.expiry_minutes) query = { 'expiry': { '$gte': date_to_str(now) }, ITEM_STATE: { '$in': [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ] } } req = ParsedRequest() req.sort = '[("unique_id", 1)]' req.where = json.dumps(query) cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, self.default_page_size)) unique_id = cursor[0]['unique_id'] logger.info('Number of items to modify: {}, pages={}'.format( count, no_of_pages)) else: logger.info('No items to modify.') for page in range(0, no_of_pages): logger.info( 'Fetching items for page number: {} unique_id: {}'.format( (page + 1), unique_id)) req = ParsedRequest() req.sort = '[("unique_id", 1)]' if page == 0: query['unique_id'] = {'$gte': unique_id} else: query['unique_id'] = {'$gt': unique_id} req.where = json.dumps(query) req.max_results = self.default_page_size cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) items = list(cursor) if len(items) > 0: unique_id = items[len(items) - 1]['unique_id'] logger.info('Fetched No. of Items: {} for page: {}'.format( len(items), (page + 1))) yield items
def get_queue_items(retries=False, subscriber_id=None, priority=None): lookup = _get_queue_lookup(retries, priority) if subscriber_id: lookup['$and'].append({'subscriber_id': subscriber_id}) request = ParsedRequest() request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 100) # limit per subscriber now request.sort = '[("_created", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def index(): items_service = superdesk.get_resource_service("rss_items") req = ParsedRequest() req.args = request.args req.max_results = 200 items = list(items_service.get(req, {})) content = generate_feed(items) return flask.Response(content, mimetype="application/rss+xml")
def test_pagination(data_layer): resource = 'instruments' sub_resource_lookup = None req = ParsedRequest() req.max_results = 1 req.page = 2 results = data_layer.find(resource, req, sub_resource_lookup) assert len(results) == 1
def get_published_items(): """ Returns a list of items marked for publishing. """ query = {QUEUE_STATE: PUBLISH_STATE.PENDING} request = ParsedRequest() request.sort = 'publish_sequence_no' request.max_results = 100 return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def get_expired_items(self, page_size): """Get expired item that are not moved to legal :return: """ query = { "query": { "filtered": { "filter": { "and": [ {"range": {"expiry": {"lt": "now"}}}, {"term": {"moved_to_legal": False}}, {"not": {"term": {"state": CONTENT_STATE.SCHEDULED}}}, ] } } } } service = get_resource_service("published") req = ParsedRequest() req.args = {"source": json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) sequence_no = cursor[0]["publish_sequence_no"] logger.info("Number of items to move to legal archive: {}, pages={}".format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info( "Fetching published items " "for page number: {} sequence no: {}".format((page + 1), sequence_no) ) req = ParsedRequest() page_query = deepcopy(query) sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}} if page == 0: sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}} else: sequence_filter = {"range": {"publish_sequence_no": {"gt": sequence_no}}} page_query["query"]["filtered"]["filter"]["and"].append(sequence_filter) req.args = {"source": json.dumps(page_query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items): sequence_no = items[len(items) - 1]["publish_sequence_no"] logger.info( "Fetched No. of Items: {} for page: {} " "For import into legal archive.".format(len(items), (page + 1)) ) yield items
def get_expired_items(self, page_size): """Get expired item that are not moved to legal :return: """ query = { 'query': { 'filtered': { 'filter': { 'and': [ {'range': {'expiry': {'lt': 'now'}}}, {'term': {'moved_to_legal': False}}, {'not': {'term': {'state': CONTENT_STATE.SCHEDULED}}} ] } } } } service = get_resource_service('published') req = ParsedRequest() req.args = {'source': json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) sequence_no = cursor[0]['publish_sequence_no'] logger.info('Number of items to move to legal archive: {}, pages={}'.format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info('Fetching published items ' 'for page number: {} sequence no: {}'. format((page + 1), sequence_no)) req = ParsedRequest() page_query = deepcopy(query) sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}} if page == 0: sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}} else: sequence_filter = {'range': {'publish_sequence_no': {'gt': sequence_no}}} page_query['query']['filtered']['filter']['and'].append(sequence_filter) req.args = {'source': json.dumps(page_query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items): sequence_no = items[len(items) - 1]['publish_sequence_no'] logger.info('Fetched No. of Items: {} for page: {} ' 'For import into legal archive.'.format(len(items), (page + 1))) yield items
def _get_max_date_from_publish_queue(self): """ Get the max _updated date from legal_publish_queue collection :return datetime: _updated time """ legal_publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.LAST_UPDATED req.max_results = 1 queue_item = list(legal_publish_queue_service.get(req=req, lookup={})) return queue_item[0][config.LAST_UPDATED] if queue_item else None
def test_retrieve_items_after_index_rebuilt(self): with self.app.app_context(): req = ParsedRequest() req.args = {} req.max_results = 25 items = get_resource_service('archive').get(req, {}) self.assertEquals(10, items.count()) items = get_resource_service('ingest').get(req, {}) self.assertEquals(10, items.count())
def get_packages(self, doc_id): """ Retrieves if an article identified by doc_id is referenced in a package. :return: articles of type composite """ query = {'$and': [{'type': 'composite'}, {'groups.refs.guid': doc_id}]} request = ParsedRequest() request.max_results = 100 return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
def find(self, endpoint_name, where, max_results=0): """Find items for given endpoint using mongo query in python dict object. It handles request creation here so no need to do this in service. :param string endpoint_name :param dict where :param int max_results """ req = ParsedRequest() req.where = MongoJSONEncoder().encode(where) req.max_results = max_results return self.get_from_mongo(endpoint_name, req, None)
def get_items(self, now): """Get the items from the archive collection that have expiry in future and state is published, corrected, killed :param datetime now: current date time :return list: list of expired items """ logger.info('Fetching expired items from archive collection.') now = now + timedelta(minutes=self.expiry_minutes) query = { 'expiry': {'$gte': date_to_str(now)}, ITEM_STATE: {'$in': [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ]} } req = ParsedRequest() req.sort = '[("unique_id", 1)]' req.where = json.dumps(query) cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, self.default_page_size)) unique_id = cursor[0]['unique_id'] logger.info('Number of items to modify: {}, pages={}'.format(count, no_of_pages)) else: logger.info('No items to modify.') for page in range(0, no_of_pages): logger.info('Fetching items for page number: {} unique_id: {}'. format((page + 1), unique_id)) req = ParsedRequest() req.sort = '[("unique_id", 1)]' if page == 0: query['unique_id'] = {'$gte': unique_id} else: query['unique_id'] = {'$gt': unique_id} req.where = json.dumps(query) req.max_results = self.default_page_size cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) items = list(cursor) if len(items) > 0: unique_id = items[len(items) - 1]['unique_id'] logger.info('Fetched No. of Items: {} for page: {}'.format(len(items), (page + 1))) yield items
def get_packages(self, doc_id): """ Retrieves package(s) if an article identified by doc_id is referenced in a package. :param: doc_id identifier of the item in the package :return: articles of type composite """ query = {'$and': [{ITEM_TYPE: CONTENT_TYPE.COMPOSITE}, {'groups.refs.guid': doc_id}]} request = ParsedRequest() request.max_results = 100 return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
def _get_publish_queue_items_to_import(self, max_date): """ Get the queue items to import after max_date :param datetime max_date: :return : list of publish queue items """ publish_queue_service = get_resource_service("publish_queue") lookup = {} if max_date: lookup["$and"] = [{config.LAST_UPDATED: {"$gte": max_date}}] req = ParsedRequest() req.max_results = 500 return publish_queue_service.get(req=req, lookup=lookup)
def on_create(self, docs): for doc in docs: if not doc.get('desk'): doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry', 0) == 0: doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES'] if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1
def get_queue_items(): lookup = { "$and": [ { "$or": [ {"state": QueueState.PENDING.value}, {"state": QueueState.RETRYING.value, "next_retry_attempt_at": {"$lte": utcnow()}}, ] }, {"destination.delivery_type": {"$ne": "pull"}}, ] } request = ParsedRequest() request.max_results = app.config.get("MAX_TRANSMIT_QUERY_LIMIT", 500) return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_queue_items(retries=False): if retries: lookup = { "$and": [ {"state": QueueState.RETRYING.value}, {"next_retry_attempt_at": {"$lte": utcnow()}}, {"destination.delivery_type": {"$ne": "pull"}}, ] } else: lookup = {"$and": [{"state": QueueState.PENDING.value}, {"destination.delivery_type": {"$ne": "pull"}}]} request = ParsedRequest() request.max_results = app.config.get("MAX_TRANSMIT_QUERY_LIMIT", 500) # ensure we publish in the correct sequence request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True): """Get the expired items. Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration. :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()` :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS` :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT` :param boolean include_children: Include only root item if False, otherwise include the entire item chain :return list: expired content_api items """ if expiry_datetime is None: expiry_datetime = utcnow() if expiry_days is None: expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS'] if max_results is None: max_results = app.settings['MAX_EXPIRY_QUERY_LIMIT'] last_id = None expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days)) while True: query = {'$and': [{'_updated': {'$lte': expire_at}}]} if last_id is not None: query['$and'].append({'_id': {'$gt': last_id}}) if not include_children: query['$and'].append({'ancestors': {'$exists': False}}) req = ParsedRequest() req.sort = '_id' req.where = json.dumps(query) req.max_results = max_results items = list(self.get_from_mongo(req=req, lookup=None)) if not items: break last_id = items[-1]['_id'] yield items
def get_published_items(): """ Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed. """ query = { QUEUE_STATE: PUBLISH_STATE.PENDING, "$or": [ {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}}, { ITEM_STATE: CONTENT_STATE.SCHEDULED, "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): {"$lte": utcnow()}, }, ], } request = ParsedRequest() request.sort = "publish_sequence_no" request.max_results = 200 return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def get_packages(self, doc_id, not_package_id=None): """ Retrieves package(s) if an article identified by doc_id is referenced in a package. :param str doc_id: identifier of the item in the package :param str not_package_id: not package id :return: articles of type composite """ query = {'$and': [{ITEM_TYPE: CONTENT_TYPE.COMPOSITE}, {'groups.refs.residRef': doc_id}]} if not_package_id: query['$and'].append({config.ID_FIELD: {'$ne': not_package_id}}) request = ParsedRequest() request.max_results = 100 return get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query)
def on_create(self, docs): for doc in docs: if not doc.get('desk'): doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry', 0) == 0: doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES'] if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default need to remove the old default if doc.get('default_incoming', False): self.remove_old_default(doc.get('desk'), 'default_incoming')
def test_compare_repos(self): with self.app.app_context(): req = ParsedRequest() req.args = {} req.max_results = 25 items = get_resource_service('archive').get(req, {}) self.assertEquals(99, items.count()) consistency_record = CompareRepositories().run('archive', self.app.config['ELASTICSEARCH_URL'], self.app.config['ELASTICSEARCH_INDEX']) self.assertEquals(consistency_record['mongo'], 99) self.assertEquals(consistency_record['elastic'], 198) self.assertEquals(consistency_record['identical'], 99) self.assertEquals(consistency_record['mongo_only'], 0) self.assertEquals(consistency_record['elastic_only'], 99) self.assertEquals(consistency_record['inconsistent'], 0)
def get_publish_queue_items(self, page_size, expired_items=[]): """Get publish queue items that are not moved to legal :param int page_size: batch size :param list expired_items: :return list: publish queue items """ query = {"moved_to_legal": False} if expired_items: query["item_id"] = {"$in": expired_items} else: query["state"] = {"$in": [QueueState.SUCCESS.value, QueueState.CANCELED.value, QueueState.FAILED.value]} service = get_resource_service("publish_queue") req = ParsedRequest() req.sort = '[("_id", 1)]' req.where = json.dumps(query) cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) queue_id = cursor[0][config.ID_FIELD] logger.info("Number of items to move to legal archive publish queue: {}, pages={}".format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info( "Fetching publish queue items " "for page number: {}. queue_id: {}".format((page + 1), queue_id) ) req = ParsedRequest() req.sort = '[("_id", 1)]' query["_id"] = {"$gte": str(queue_id)} req.where = json.dumps(query) req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items) > 0: queue_id = items[len(items) - 1][config.ID_FIELD] logger.info( "Fetched No. of Items: {} for page: {} " "For import in to legal archive publish_queue.".format(len(items), (page + 1)) ) yield items