def test_query_sort_by_name_case_insensetive(self): service = get_resource_service("concept_items") names = [ "A Message to Garcia", "and then there were none", "Bootstrap: Responsive Web Development", "Hobbit", "Lord of the rings", "the Elegance of the Hedgehog", "The Little Prince", "Гайдамаки", ] req = ParsedRequest() req.sort = "name" req.args = ImmutableMultiDict([("collation", '{"locale": "en", "strength":"1"}')]) cursor = service.get_from_mongo(req=req, lookup={}) self.assertEqual([i["name"] for i in cursor], names) req = ParsedRequest() req.sort = "-name" req.args = ImmutableMultiDict([("collation", '{"locale": "en", "strength":"1"}')]) names.reverse() cursor = service.get_from_mongo(req=req, lookup={}) self.assertEqual([i["name"] for i in cursor], names)
def test_service_use_definition_text_instead_of_definition_html(self): service = get_resource_service("concept_items") definitions = [ "A Message to Garcia is a widely distributed essay written by Elbert Hubbard in 1899, " "expressing the value of individual initiative and conscientiousness in work.", "and then there were none dame Agatha Mary Clarissa Christie, Lady Mallowan, DBE " "(née Miller; 15 September 1890 – 12 January 1976) was an English writer.", "Bootstrap: Responsive Web Development. Discover how easy it is to design killer " "interfaces and responsive websites with the Bootstrap framework. ", "Hobbit is a children's fantasy novel by English author J. R. R. Tolkien.", "Lord of the rings is a children's fantasy novel by English author J. R. R. " "Tolkien.", "the Elegance of the Hedgehog is a novel about parallels and the concealment of one’s " "true passions in life.", "The Little Prince is a novella, the most famous work of French aristocrat, writer, " "poet, and pioneering aviator Antoine de Saint-Exupéry.", "Гайдамаки — історико-героїчна поема Шевченка, перший український історичний роман у " "віршах.", ] req = ParsedRequest() req.sort = "definition_html" cursor = service.get(req=req, lookup={}) self.assertEqual([i["definition_text"] for i in cursor], definitions) req = ParsedRequest() req.sort = "-definition_html" definitions.reverse() cursor = service.get(req=req, lookup={}) self.assertEqual([i["definition_text"] for i in cursor], definitions)
def get_items(self, now): """Get the items from the archive collection that have expiry in future and state is published, corrected, killed :param datetime now: current date time :return list: list of expired items """ logger.info('Fetching expired items from archive collection.') now = now + timedelta(minutes=self.expiry_minutes) query = { 'expiry': { '$gte': date_to_str(now) }, ITEM_STATE: { '$in': [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ] } } req = ParsedRequest() req.sort = '[("unique_id", 1)]' req.where = json.dumps(query) cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, self.default_page_size)) unique_id = cursor[0]['unique_id'] logger.info('Number of items to modify: {}, pages={}'.format( count, no_of_pages)) else: logger.info('No items to modify.') for page in range(0, no_of_pages): logger.info( 'Fetching items for page number: {} unique_id: {}'.format( (page + 1), unique_id)) req = ParsedRequest() req.sort = '[("unique_id", 1)]' if page == 0: query['unique_id'] = {'$gte': unique_id} else: query['unique_id'] = {'$gt': unique_id} req.where = json.dumps(query) req.max_results = self.default_page_size cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) items = list(cursor) if len(items) > 0: unique_id = items[len(items) - 1]['unique_id'] logger.info('Fetched No. of Items: {} for page: {}'.format( len(items), (page + 1))) yield items
def get_expired_items(self, page_size): """Get expired item that are not moved to legal :return: """ query = { "query": { "filtered": { "filter": { "and": [ {"range": {"expiry": {"lt": "now"}}}, {"term": {"moved_to_legal": False}}, {"not": {"term": {"state": CONTENT_STATE.SCHEDULED}}}, ] } } } } service = get_resource_service("published") req = ParsedRequest() req.args = {"source": json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) sequence_no = cursor[0]["publish_sequence_no"] logger.info("Number of items to move to legal archive: {}, pages={}".format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info( "Fetching published items " "for page number: {} sequence no: {}".format((page + 1), sequence_no) ) req = ParsedRequest() page_query = deepcopy(query) sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}} if page == 0: sequence_filter = {"range": {"publish_sequence_no": {"gte": sequence_no}}} else: sequence_filter = {"range": {"publish_sequence_no": {"gt": sequence_no}}} page_query["query"]["filtered"]["filter"]["and"].append(sequence_filter) req.args = {"source": json.dumps(page_query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items): sequence_no = items[len(items) - 1]["publish_sequence_no"] logger.info( "Fetched No. of Items: {} for page: {} " "For import into legal archive.".format(len(items), (page + 1)) ) yield items
def get_expired_items(self, page_size): """Get expired item that are not moved to legal :return: """ query = { 'query': { 'filtered': { 'filter': { 'and': [ {'range': {'expiry': {'lt': 'now'}}}, {'term': {'moved_to_legal': False}}, {'not': {'term': {'state': CONTENT_STATE.SCHEDULED}}} ] } } } } service = get_resource_service('published') req = ParsedRequest() req.args = {'source': json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) sequence_no = cursor[0]['publish_sequence_no'] logger.info('Number of items to move to legal archive: {}, pages={}'.format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info('Fetching published items ' 'for page number: {} sequence no: {}'. format((page + 1), sequence_no)) req = ParsedRequest() page_query = deepcopy(query) sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}} if page == 0: sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}} else: sequence_filter = {'range': {'publish_sequence_no': {'gt': sequence_no}}} page_query['query']['filtered']['filter']['and'].append(sequence_filter) req.args = {'source': json.dumps(page_query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items): sequence_no = items[len(items) - 1]['publish_sequence_no'] logger.info('Fetched No. of Items: {} for page: {} ' 'For import into legal archive.'.format(len(items), (page + 1))) yield items
def get_expired_items(self, page_size): """Get expired item that are not moved to legal :return: """ query = { 'query': { 'filtered': { 'filter': { 'and': [ {'range': {'expiry': {'lt': 'now'}}}, {'term': {'moved_to_legal': False}}, {'not': {'term': {'state': CONTENT_STATE.SCHEDULED}}} ] } } } } service = get_resource_service('published') req = ParsedRequest() req.args = {'source': json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) sequence_no = cursor[0]['publish_sequence_no'] logger.info('Number of items to move to legal archive: {}, pages={}'.format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info('Fetching published items ' 'for page number: {} sequence no: {}'. format((page + 1), sequence_no)) req = ParsedRequest() page_query = deepcopy(query) sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}} if page == 0: sequence_filter = {'range': {'publish_sequence_no': {'gte': sequence_no}}} else: sequence_filter = {'range': {'publish_sequence_no': {'gt': sequence_no}}} page_query['query']['filtered']['filter']['and'].append(sequence_filter) req.args = {'source': json.dumps(page_query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items): sequence_no = items[len(items) - 1]['publish_sequence_no'] logger.info('Fetched No. of Items: {} for page: {} ' 'For import into legal archive.'.format(len(items), (page + 1))) yield items
def get_publish_queue_items(self, page_size, expired_items=None): """Get publish queue items that are not moved to legal :param int page_size: batch size :param list expired_items: :return list: publish queue items """ if expired_items is None: expired_items = [] query = {"moved_to_legal": False} if expired_items: query["item_id"] = {"$in": expired_items} else: query["state"] = { "$in": [ QueueState.SUCCESS.value, QueueState.CANCELED.value, QueueState.FAILED.value ] } service = get_resource_service("publish_queue") req = ParsedRequest() req.sort = '[("_id", 1)]' req.where = json.dumps(query) cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) queue_id = cursor[0][config.ID_FIELD] logger.info( "Number of items to move to legal archive publish queue: {}, pages={}" .format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info("Fetching publish queue items " "for page number: {}. queue_id: {}".format((page + 1), queue_id)) req = ParsedRequest() req.sort = '[("_id", 1)]' query["_id"] = {"$gte": str(queue_id)} req.where = json.dumps(query) req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items) > 0: queue_id = items[len(items) - 1][config.ID_FIELD] logger.info("Fetched No. of Items: {} for page: {} " "For import in to legal archive publish_queue.".format( len(items), (page + 1))) yield items
def get_items(self, now): """Get the items from the archive collection that have expiry in future and state is published, corrected, killed :param datetime now: current date time :return list: list of expired items """ logger.info('Fetching expired items from archive collection.') now = now + timedelta(minutes=self.expiry_minutes) query = { 'expiry': {'$gte': date_to_str(now)}, ITEM_STATE: {'$in': [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ]} } req = ParsedRequest() req.sort = '[("unique_id", 1)]' req.where = json.dumps(query) cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, self.default_page_size)) unique_id = cursor[0]['unique_id'] logger.info('Number of items to modify: {}, pages={}'.format(count, no_of_pages)) else: logger.info('No items to modify.') for page in range(0, no_of_pages): logger.info('Fetching items for page number: {} unique_id: {}'. format((page + 1), unique_id)) req = ParsedRequest() req.sort = '[("unique_id", 1)]' if page == 0: query['unique_id'] = {'$gte': unique_id} else: query['unique_id'] = {'$gt': unique_id} req.where = json.dumps(query) req.max_results = self.default_page_size cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) items = list(cursor) if len(items) > 0: unique_id = items[len(items) - 1]['unique_id'] logger.info('Fetched No. of Items: {} for page: {}'.format(len(items), (page + 1))) yield items
def get_expired_items(self, now): logger.info('Get expired content from published') query_filter = self.get_query_for_expired_items(now) req = ParsedRequest() req.sort = '_created' req.max_results = 100 return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query_filter)
def get_next_order_sequence(self, blog_id): if blog_id is None: return 0 # get next order sequence and increment it blog = get_resource_service('blogs').find_and_modify( query={'_id': blog_id}, update={'$inc': {'posts_order_sequence': 1}}, upsert=False) if blog: order = blog and blog.get('posts_order_sequence') or None # support previous LB version when the sequence was not save into the blog if order is None: # find the highest order in the blog req = ParsedRequest() req.sort = '-order' req.max_results = 1 post = next(self.get_from_mongo(req=req, lookup={'blog': blog_id}), None) if post and post.get('order') is not None: order = post.get('order') + 1 # save the order into the blog get_resource_service('blogs').update(blog_id, {'posts_order_sequence': order + 1}, blog) else: order = 0 else: order = 0 return order
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info('Starting to purge audit logs of content items not in archive at {}'.format(utcnow())) # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append({'_updated': {'$lte': date_to_str(self.expiry)}}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "audit_id":1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) items = list([(item['_id'], item['audit_id']) for item in audits]) if len(items) == 0: logger.info('Finished purging audit logs of content items not in archive at {}'.format(utcnow())) return logger.info('Found {} orphaned audit items at {}'.format(len(items), utcnow())) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] logger.info('Deleting {} orphaned audit items at {}'.format(len(audit_ids), utcnow())) service.delete_ids_from_mongo(audit_ids)
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_date = None while True: lookup = { '$and': [ self.not_item_entry_query, { '_updated': { '$lte': date_to_str(self.expiry) } } ] } if current_date: lookup['$and'].append({'_updated': {'$gte': current_date}}) req = ParsedRequest() req.sort = '[("_updated", 1)]' req.projection = '{"_id": 1, "_updated": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) if audits.count() == 0: break items = list([(item['_id'], item['_updated']) for item in audits]) current_date = items[len(items) - 1][1] service.delete({'_id': {'$in': [i[0] for i in items]}})
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append( {'_updated': { '$lte': date_to_str(self.expiry) }}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "extra.guid": 1, "extra._id": 1, "extra.item_id": 1, "extra.item": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) if audits.count() == 0: break items = list([(item['_id'], self._extract_item_id(item)) for item in audits]) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] service.delete({'_id': {'$in': audit_ids}})
def get_expired_items(self, expired_date_time, limit=100): """ Fetches the expired articles from published collection. Expiry Conditions: 1. can_be_removed flag is True 2. Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and allow_post_publish_actions flag is True :param expired_date_time: :param limit: :return: expired articles from published collection """ logger.info("Get expired content from published") query = { "$or": [ {"can_be_removed": True}, { "$and": [ {"expiry": {"$lte": expired_date_time}}, {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}}, {"allow_post_publish_actions": True}, ] }, ] } req = ParsedRequest() req.sort = "_created" req.max_results = limit return superdesk.get_resource_service("published").get_from_mongo(req=req, lookup=query)
def get_next_order_sequence(self, blog_id): if blog_id is None: return 0 # get next order sequence and increment it blog = get_resource_service('blogs').find_and_modify( query={'_id': blog_id}, update={'$inc': { 'posts_order_sequence': 1 }}, upsert=False) if blog: order = blog and blog.get('posts_order_sequence') or None # support previous LB version when the sequence was not save into the blog if order is None: # find the highest order in the blog req = ParsedRequest() req.sort = '-order' req.max_results = 1 post = next( self.get_from_mongo(req=req, lookup={'blog': blog_id}), None) if post and post.get('order') is not None: order = post.get('order') + 1 # save the order into the blog get_resource_service('blogs').update( blog_id, {'posts_order_sequence': order + 1}, blog) else: order = 0 else: order = 0 return order
def get_history_items(self, last_id, gte, item_id, chunk_size=0): history_service = get_resource_service('archive_history') last_processed_id = last_id while True: req = ParsedRequest() req.sort = '[("_id", 1), ("version", 1)]' query = {'$and': []} if gte: query['$and'].append({'_created': {'$gte': date_to_str(gte)}}) if item_id: query['$and'].append({'item_id': str(item_id)}) if last_processed_id: query['$and'].append({'_id': {'$gt': str(last_processed_id)}}) req.where = json.dumps(query) if chunk_size > 0: req.max_results = int(chunk_size) items = list(history_service.get(req=req, lookup=None)) if len(items) < 1: break last_processed_id = items[-1][config.ID_FIELD] yield items
def get_filters(self): """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter records. :return: """ # Get the most recent update time to the filter conditions and content_filters req = ParsedRequest() req.sort = '-_updated' req.max_results = 1 mindate = datetime.min.replace(tzinfo=pytz.UTC) latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) if not self.filters or \ latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \ latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate: logger.debug('Getting content filters and filter conditions') self.filters = dict() self.filters['filter_conditions'] = dict() self.filters['content_filters'] = dict() for fc in get_resource_service('filter_conditions').get(req=None, lookup={}): self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc} self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get( 'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate) for cf in get_resource_service('content_filters').get(req=None, lookup={}): self.filters['content_filters'][cf.get('_id')] = {'cf': cf} self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get( 'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate) else: logger.debug('Using chached content filters and filters conditions')
def get_expired_items(expired_date_time, limit=100): """ Fetches the expired articles from published collection. Expiry Conditions: 1. can_be_removed flag is True 2. Item Expiry is less than or equal to expired_date_time, State of the Item is not SCHEDULED and allow_post_publish_actions flag is True :param expired_date_time: :param limit: :return: expired articles from published collection """ logger.info('Get expired content from published') query = { '$or': [ {'can_be_removed': True}, {'$and': [ {'expiry': {'$lte': expired_date_time}}, {ITEM_STATE: {'$ne': CONTENT_STATE.SCHEDULED}}, {'allow_post_publish_actions': True} ]} ] } req = ParsedRequest() req.sort = '_created' req.max_results = limit return superdesk.get_resource_service('published').get_from_mongo(req=req, lookup=query)
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info('Starting to purge audit logs of none content items at {}'.format(utcnow())) while True: lookup = {'$and': [self.not_item_entry_query, {'_updated': {'$lte': date_to_str(self.expiry)}}]} if current_id: lookup['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) items = list(item.get('_id') for item in audits) if len(items) == 0: logger.info('Finished purging audit logs of none content items at {}'.format(utcnow())) return logger.info('Found {} audit items at {}'.format(len(items), utcnow())) current_id = items[len(items) - 1] logger.info('Deleting {} old audit items'.format(len(items))) service.delete_ids_from_mongo(items)
def get_filters(self): """Retrieve all of the available filter conditions and content filters if they have not yet been retrieved or they have been updated. This avoids the filtering functions having to repeatedly retireve the individual filter records. :return: """ # Get the most recent update time to the filter conditions and content_filters req = ParsedRequest() req.sort = '-_updated' req.max_results = 1 mindate = datetime.min.replace(tzinfo=pytz.UTC) latest_fc = next(get_resource_service('filter_conditions').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) latest_cf = next(get_resource_service('content_filters').get_from_mongo(req=req, lookup=None), {}).get('_updated', mindate) if not self.filters or \ latest_fc > self.filters.get('latest_filter_conditions', mindate) or latest_fc == mindate or \ latest_cf > self.filters.get('latest_content_filters', mindate) or latest_cf == mindate: logger.debug('Getting content filters and filter conditions') self.filters = dict() self.filters['filter_conditions'] = dict() self.filters['content_filters'] = dict() for fc in get_resource_service('filter_conditions').get(req=None, lookup={}): self.filters['filter_conditions'][fc.get('_id')] = {'fc': fc} self.filters['latest_filter_conditions'] = fc.get('_updated') if fc.get('_updated') > self.filters.get( 'latest_filter_conditions', mindate) else self.filters.get('latest_filter_conditions', mindate) for cf in get_resource_service('content_filters').get(req=None, lookup={}): self.filters['content_filters'][cf.get('_id')] = {'cf': cf} self.filters['latest_content_filters'] = cf.get('_updated') if cf.get('_updated') > self.filters.get( 'latest_content_filters', mindate) else self.filters.get('latest_content_filters', mindate) else: logger.debug('Using chached content filters and filters conditions')
def get_published_takes(self, takes_package): """ Get all the published takes in the takes packages. :param takes_package: takes package :return: List of publishes takes. """ refs = self.get_package_refs(takes_package) if not refs: return [] takes = [ref.get(RESIDREF) for ref in refs] query = { '$and': [{ config.ID_FIELD: { '$in': takes } }, { ITEM_STATE: { '$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED] } }] } request = ParsedRequest() request.sort = SEQUENCE return list( get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query))
def get_overdue_scheduled_items(expired_date_time, resource, limit=100): """ Fetches the overdue scheduled articles from given collection. Overdue Conditions: 1. it should be in 'scheduled' state 2. publish_schedule is less than or equal to expired_date_time :param expired_date_time: DateTime that scheduled tate will be checked against :param resource: Name of the resource to check the data from :param limit: Number of return items :return: overdue scheduled articles from published collection """ logger.info('Get overdue scheduled content from {}'.format(resource)) query = { '$and': [{ 'publish_schedule': { '$lte': expired_date_time } }, { ITEM_STATE: CONTENT_STATE.SCHEDULED }] } req = ParsedRequest() req.sort = '_modified' req.max_results = limit return superdesk.get_resource_service(resource).get_from_mongo( req=req, lookup=query)
def find_articles_to_kill(self, lookup, include_other_takes=True): """Finds the article to kill. If the article is associated with Digital Story then Digital Story will also be fetched. If the Digital Story has more takes then all of them would be fetched. :param lookup: query to find the main article to be killed :type lookup: dict :return: list of articles to be killed :rtype: list """ archived_doc = self.find_one(req=None, **lookup) if not archived_doc: return req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION archived_doc = list(self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0] articles_to_kill = [archived_doc] takes_package_id = self._get_take_package_id(archived_doc) if takes_package_id: takes_package = self.get_archived_takes_package(takes_package_id, archived_doc['item_id'], archived_doc['_current_version'], include_other_takes) articles_to_kill.append(takes_package) if include_other_takes: for takes_ref in self._get_package_refs(takes_package): if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]: take = list(self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0] articles_to_kill.append(take) return articles_to_kill
def on_create(self, docs): """ Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get('desk') if not desk: doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry') == 0: doc['content_expiry'] = None if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default then remove the old default if doc.get('working_stage', False): self.remove_old_default(desk, 'working_stage') if doc.get('default_incoming', False): self.remove_old_default(desk, 'default_incoming')
def _find_articles_to_kill(self, lookup): """ Finds the article to kill. If the article is associated with Digital Story then Digital Story will also be fetched. If the Digital Story has more takes then all of them would be fetched. :param lookup: query to find the main article to be killed :type lookup: dict :return: list of articles to be killed :rtype: list """ archived_doc = self.find_one(req=None, **lookup) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION archived_doc = list(self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0] articles_to_kill = [archived_doc] takes_package_service = TakesPackageService() takes_package_id = takes_package_service.get_take_package_id(archived_doc) if takes_package_id: takes_package = list(self.get(req=req, lookup={'item_id': takes_package_id}))[0] articles_to_kill.append(takes_package) for takes_ref in takes_package_service.get_package_refs(takes_package): if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]: take = list(self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0] articles_to_kill.append(take) return articles_to_kill
def _find_articles_to_kill(self, lookup): """ Finds the article to kill. If the article is associated with Digital Story then Digital Story will also be fetched. If the Digital Story has more takes then all of them would be fetched. :param lookup: query to find the main article to be killed :type lookup: dict :return: list of articles to be killed :rtype: list """ archived_doc = self.find_one(req=None, **lookup) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION archived_doc = list( self.get(req=req, lookup={'item_id': archived_doc['item_id']}))[0] articles_to_kill = [archived_doc] takes_package_service = TakesPackageService() takes_package_id = takes_package_service.get_take_package_id( archived_doc) if takes_package_id: takes_package = list( self.get(req=req, lookup={'item_id': takes_package_id}))[0] articles_to_kill.append(takes_package) for takes_ref in takes_package_service.get_package_refs( takes_package): if takes_ref[RESIDREF] != archived_doc[GUID_FIELD]: take = list( self.get(req=req, lookup={'item_id': takes_ref[RESIDREF]}))[0] articles_to_kill.append(take) return articles_to_kill
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def on_create(self, docs): """Runs on stage create. Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get("desk") if not desk: doc["desk_order"] = 1 continue req = ParsedRequest() req.sort = "-desk_order" req.max_results = 1 prev_stage = self.get(req=req, lookup={"desk": doc["desk"]}) if doc.get("content_expiry") == 0: doc["content_expiry"] = None if prev_stage.count() == 0: doc["desk_order"] = 1 else: doc["desk_order"] = prev_stage[0].get("desk_order", 1) + 1 # if this new one is default then remove the old default if doc.get("working_stage", False): self.remove_old_default(desk, "working_stage") if doc.get("default_incoming", False): self.remove_old_default(desk, "default_incoming")
def on_create(self, docs): """ Overriding this to set desk_order and expiry settings. Also, if this stage is defined as either working or incoming stage or both then removes the old incoming and working stages. """ for doc in docs: desk = doc.get('desk') if not desk: doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry') == 0: doc['content_expiry'] = None if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default then remove the old default if doc.get('working_stage', False): self.remove_old_default(desk, 'working_stage') if doc.get('default_incoming', False): self.remove_old_default(desk, 'default_incoming')
def purge_old_entries(self): """ Purge entries older than the expiry :return: """ service = superdesk.get_resource_service("audit") logger.info("Starting to purge audit logs at {}".format(utcnow())) for _ in range(100): # make sure we don't get stuck lookup = { "$and": [{ "_id": { "$lt": ObjectId.from_datetime(self.expiry) } }] } req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) items = list(item.get("_id") for item in audits) if len(items) == 0: logger.info("Finished purging audit logs at {}".format( utcnow())) return logger.info("Found {} audit items at {}".format( len(items), utcnow())) service.delete_ids_from_mongo(items) logger.warning("Audit purge didn't finish in 100 iterations.")
def get_published_items(self): """ Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed. """ query = { QUEUE_STATE: PUBLISH_STATE.PENDING, "$or": [ { ITEM_STATE: { "$ne": CONTENT_STATE.SCHEDULED } }, { ITEM_STATE: CONTENT_STATE.SCHEDULED, "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): { "$lte": utcnow() }, }, ], } request = ParsedRequest() request.sort = "publish_sequence_no" request.max_results = 200 return list( get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def get_expired_items(self, expiry_datetime): """ Get the expired items where content state is not scheduled and :param datetime expiry_datetime: expiry datetime :return pymongo.cursor: expired non published items. """ query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def get_queue_items(retries=False): if retries: lookup = { '$and': [{ 'state': QueueState.RETRYING.value }, { 'next_retry_attempt_at': { '$lte': utcnow() } }, { 'destination.delivery_type': { '$ne': 'pull' } }] } else: lookup = { '$and': [{ 'state': QueueState.PENDING.value }, { 'destination.delivery_type': { '$ne': 'pull' } }] } request = ParsedRequest() request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 500) # ensure we publish in the correct sequence request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_queue_items(retries=False, subscriber_id=None, priority=None): lookup = _get_queue_lookup(retries, priority) if subscriber_id: lookup['$and'].append({'subscriber_id': subscriber_id}) request = ParsedRequest() request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 100) # limit per subscriber now request.sort = '[("_created", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_published_items(): """ Returns a list of items marked for publishing. """ query = {QUEUE_STATE: PUBLISH_STATE.PENDING} request = ParsedRequest() request.sort = 'publish_sequence_no' request.max_results = 100 return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def get_last_published_item(self, item_id): """Get the last published item :param item_id: Id of the planning item or event ite, :return: """ req = ParsedRequest() req.sort = '-version' return self.find_one(req=req, item_id=item_id)
def get_archived_takes_package(self, package_id, take_id, version, include_other_takes=True): req = ParsedRequest() req.sort = '[("%s", -1)]' % config.VERSION take_packages = list(self.get(req=req, lookup={'item_id': package_id})) for take_package in take_packages: for ref in self._get_package_refs(take_package): if ref[RESIDREF] == take_id and (include_other_takes or ref['_current_version'] == version): return take_package
def get_publish_queue_items(self, page_size, expired_items=[]): """Get publish queue items that are not moved to legal :param int page_size: batch size :param list expired_items: :return list: publish queue items """ query = {"moved_to_legal": False} if expired_items: query["item_id"] = {"$in": expired_items} else: query["state"] = {"$in": [QueueState.SUCCESS.value, QueueState.CANCELED.value, QueueState.FAILED.value]} service = get_resource_service("publish_queue") req = ParsedRequest() req.sort = '[("_id", 1)]' req.where = json.dumps(query) cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, page_size)) queue_id = cursor[0][config.ID_FIELD] logger.info("Number of items to move to legal archive publish queue: {}, pages={}".format(count, no_of_pages)) for page in range(0, no_of_pages): logger.info( "Fetching publish queue items " "for page number: {}. queue_id: {}".format((page + 1), queue_id) ) req = ParsedRequest() req.sort = '[("_id", 1)]' query["_id"] = {"$gte": str(queue_id)} req.where = json.dumps(query) req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) if len(items) > 0: queue_id = items[len(items) - 1][config.ID_FIELD] logger.info( "Fetched No. of Items: {} for page: {} " "For import in to legal archive publish_queue.".format(len(items), (page + 1)) ) yield items
def test_query_sort_by_name_case_sensetive(self): service = get_resource_service('concept_items') names = [ 'A Message to Garcia', 'Bootstrap: Responsive Web Development', 'Hobbit', 'Lord of the rings', 'The Little Prince', 'and then there were none', 'the Elegance of the Hedgehog', 'Гайдамаки' ] req = ParsedRequest() req.sort = 'name' cursor = service.get_from_mongo(req=req, lookup={}) self.assertEqual([i['name'] for i in cursor], names) req = ParsedRequest() req.sort = '-name' names.reverse() cursor = service.get_from_mongo(req=req, lookup={}) self.assertEqual([i['name'] for i in cursor], names)
def _get_max_date_from_publish_queue(self): """ Get the max _updated date from legal_publish_queue collection :return datetime: _updated time """ legal_publish_queue_service = get_resource_service(LEGAL_PUBLISH_QUEUE_NAME) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.LAST_UPDATED req.max_results = 1 queue_item = list(legal_publish_queue_service.get(req=req, lookup={})) return queue_item[0][config.LAST_UPDATED] if queue_item else None
def get_published_items(): """ Returns a list of items marked for publishing. """ query = {QUEUE_STATE: PUBLISH_STATE.PENDING} request = ParsedRequest() request.sort = 'publish_sequence_no' request.max_results = 100 return list( get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def get_mongo_items(self, mongo_collection_name, page_size): """Generate list of items from given mongo collection per page size. :param mongo_collection_name: Name of the collection to get the items :param page_size: Size of every list in each iteration :return: list of items """ bucket_size = int(page_size) if page_size else self.default_page_size print('Indexing data from mongo/{} to elastic/{}'.format( mongo_collection_name, mongo_collection_name)) service = superdesk.get_resource_service(mongo_collection_name) req = ParsedRequest() req.sort = '[("%s", 1)]' % config.ID_FIELD cursor = service.get_from_mongo(req, {}) count = cursor.count() no_of_buckets = len(range(0, count, bucket_size)) water_mark = cursor[0][config.ID_FIELD] print('Number of items to index: {}, pages={}'.format( count, no_of_buckets)) for x in range(0, no_of_buckets): print('{} Page : {}'.format(time.strftime('%X %x %Z'), x + 1)) s = time.time() req = ParsedRequest() req.sort = '[("%s", 1)]' % config.ID_FIELD req.max_results = bucket_size if x == 0: lookup = {config.ID_FIELD: {'$gte': water_mark}} else: lookup = {config.ID_FIELD: {'$gt': water_mark}} cursor = service.get_from_mongo(req, lookup) items = list(cursor) water_mark = items[len(items) - 1][config.ID_FIELD] print('{} Retrieved from Mongo in {:.3f} seconds to {}'.format( time.strftime('%X %x %Z'), time.time() - s, water_mark)) yield items
def _get_max_date_from_publish_queue(self): """ Get the max _updated date from legal_publish_queue collection :return datetime: _updated time """ legal_publish_queue_service = get_resource_service( LEGAL_PUBLISH_QUEUE_NAME) req = ParsedRequest() req.sort = '[("%s", -1)]' % config.LAST_UPDATED req.max_results = 1 req.page = 1 queue_item = list(legal_publish_queue_service.get(req=req, lookup={})) return queue_item[0][config.LAST_UPDATED] if queue_item else None
def get_expired_items(self, page_size): """ Get expired item that are not moved to legal :return: """ query = { "query": { "filtered": { "filter": { "and": [ {"range": {"expiry": {"lt": "now"}}}, {"term": {"moved_to_legal": False}}, {"not": {"term": {"state": CONTENT_STATE.SCHEDULED}}}, ] } } } } service = get_resource_service("published") req = ParsedRequest() req.args = {"source": json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' cursor = service.get(req=req, lookup=None) count = cursor.count() no_of_pages = len(range(0, count, page_size)) logger.info("Number of items to move to legal archive: {}, pages={}".format(count, no_of_pages)) for page in range(0, no_of_pages): req = ParsedRequest() req.args = {"source": json.dumps(query)} req.sort = '[("publish_sequence_no", 1)]' req.max_results = page_size cursor = service.get(req=req, lookup=None) items = list(cursor) logger.info("Fetched No. of Items: {} import in to legal archive.".format(len(items))) yield items
def get_published_takes(self, takes_package): """ Get all the published takes in the takes packages. :param takes_package: takes package :return: List of publishes takes. """ refs = self.get_package_refs(takes_package) if not refs: return [] takes = [ref.get(RESIDREF) for ref in refs] query = self._get_published_items_query(takes) request = ParsedRequest() request.sort = SEQUENCE return list(get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query))
def get_mongo_items(self, mongo_collection_name, page_size): """ Generates list of items from given mongo collection per page size :param mongo_collection_name: Name of the collection to get the items :param page_size: Size of every list in each iteration :return: list of items """ bucket_size = int(page_size) if page_size else self.default_page_size print('Indexing data from mongo/{} to elastic/{}'.format(mongo_collection_name, mongo_collection_name)) service = superdesk.get_resource_service(mongo_collection_name) req = ParsedRequest() req.sort = '[("%s", 1)]' % config.ID_FIELD cursor = service.get_from_mongo(req, {}) count = cursor.count() no_of_buckets = len(range(0, count, bucket_size)) water_mark = cursor[0][config.ID_FIELD] print('Number of items to index: {}, pages={}'.format(count, no_of_buckets)) for x in range(0, no_of_buckets): print('{} Page : {}'.format(time.strftime('%X %x %Z'), x + 1)) s = time.time() req = ParsedRequest() req.sort = '[("%s", 1)]' % config.ID_FIELD req.max_results = bucket_size if x == 0: lookup = {config.ID_FIELD: {'$gte': water_mark}} else: lookup = {config.ID_FIELD: {'$gt': water_mark}} cursor = service.get_from_mongo(req, lookup) items = list(cursor) water_mark = items[len(items) - 1][config.ID_FIELD] print('{} Retrieved from Mongo in {:.3f} seconds to {}'.format(time.strftime('%X %x %Z'), time.time() - s, water_mark)) yield items
def on_create(self, docs): for doc in docs: if not doc.get('desk'): doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry', 0) == 0: doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES'] if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1
def get_queue_items(retries=False): if retries: lookup = { "$and": [ {"state": QueueState.RETRYING.value}, {"next_retry_attempt_at": {"$lte": utcnow()}}, {"destination.delivery_type": {"$ne": "pull"}}, ] } else: lookup = {"$and": [{"state": QueueState.PENDING.value}, {"destination.delivery_type": {"$ne": "pull"}}]} request = ParsedRequest() request.max_results = app.config.get("MAX_TRANSMIT_QUERY_LIMIT", 500) # ensure we publish in the correct sequence request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True): """Get the expired items. Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration. :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()` :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS` :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT` :param boolean include_children: Include only root item if False, otherwise include the entire item chain :return list: expired content_api items """ if expiry_datetime is None: expiry_datetime = utcnow() if expiry_days is None: expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS'] if max_results is None: max_results = app.settings['MAX_EXPIRY_QUERY_LIMIT'] last_id = None expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days)) while True: query = {'$and': [{'_updated': {'$lte': expire_at}}]} if last_id is not None: query['$and'].append({'_id': {'$gt': last_id}}) if not include_children: query['$and'].append({'ancestors': {'$exists': False}}) req = ParsedRequest() req.sort = '_id' req.where = json.dumps(query) req.max_results = max_results items = list(self.get_from_mongo(req=req, lookup=None)) if not items: break last_id = items[-1]['_id'] yield items
def get_published_items(): """ Get all items with queue state: "pending" that are not scheduled or scheduled time has lapsed. """ query = { QUEUE_STATE: PUBLISH_STATE.PENDING, "$or": [ {ITEM_STATE: {"$ne": CONTENT_STATE.SCHEDULED}}, { ITEM_STATE: CONTENT_STATE.SCHEDULED, "{}.utc_{}".format(SCHEDULE_SETTINGS, PUBLISH_SCHEDULE): {"$lte": utcnow()}, }, ], } request = ParsedRequest() request.sort = "publish_sequence_no" request.max_results = 200 return list(get_resource_service(PUBLISHED).get_from_mongo(req=request, lookup=query))
def on_create(self, docs): for doc in docs: if not doc.get('desk'): doc['desk_order'] = 1 continue req = ParsedRequest() req.sort = '-desk_order' req.max_results = 1 prev_stage = self.get(req=req, lookup={'desk': doc['desk']}) if doc.get('content_expiry', 0) == 0: doc['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES'] if prev_stage.count() == 0: doc['desk_order'] = 1 else: doc['desk_order'] = prev_stage[0].get('desk_order', 1) + 1 # if this new one is default need to remove the old default if doc.get('default_incoming', False): self.remove_old_default(doc.get('desk'), 'default_incoming')
def _get_items(self, resource, query, sort, keys, callback): req = ParsedRequest() cursor = get_resource_service(resource).get_from_mongo(req=req, lookup=query) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) items = {} req.sort = sort for bucket in range(0, no_of_buckets): skip = bucket * self.default_page_size logger.info('Page : {}, skip: {}'.format(bucket + 1, skip)) cursor = get_resource_service(resource).get_from_mongo(req=req, lookup=query) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) items.update({callback(item): {key: item.get(key) for key in keys if key in item} for item in cursor}) return items
def get_queue_items(retries=False): if retries: lookup = { '$and': [ {'state': QueueState.RETRYING.value}, {'next_retry_attempt_at': {'$lte': utcnow()}} ] } else: lookup = { '$and': [ {'state': QueueState.PENDING.value} ] } request = ParsedRequest() request.max_results = app.config.get('MAX_TRANSMIT_QUERY_LIMIT', 500) # ensure we publish in the correct sequence request.sort = '[("_created", 1), ("subscriber_id", 1), ("published_seq_num", 1)]' return get_resource_service(PUBLISH_QUEUE).get(req=request, lookup=lookup)
def get_published_takes(self, takes_package): """ Get all the published takes in the takes packages. :param takes_package: takes package :return: List of publishes takes. """ refs = self.get_package_refs(takes_package) if not refs: return [] takes = [ref.get(RESIDREF) for ref in refs] query = {'$and': [ {config.ID_FIELD: {'$in': takes}}, {ITEM_STATE: {'$in': [CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED]}} ]} request = ParsedRequest() request.sort = SEQUENCE return list(get_resource_service(ARCHIVE).get_from_mongo(req=request, lookup=query))
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ unique_id = 0 while True: req = ParsedRequest() req.sort = 'unique_id' query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } query['$and'].append({'unique_id': {'$gt': unique_id}}) if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req.where = json.dumps(query) req.max_results = config.MAX_EXPIRY_QUERY_LIMIT items = list(self.get_from_mongo(req=req, lookup=None)) if not len(items): break unique_id = items[-1]['unique_id'] yield items
def get_expired_items(self, expiry_datetime): """ Get the expired items where content state is not scheduled and :param datetime expiry_datetime: expiry datetime :return pymongo.cursor: expired non published items. """ query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=None, lookup=query)
def get_overdue_scheduled_items(expired_date_time, resource, limit=100): """ Fetches the overdue scheduled articles from given collection. Overdue Conditions: 1. it should be in 'scheduled' state 2. publish_schedule is less than or equal to expired_date_time :param expired_date_time: DateTime that scheduled tate will be checked against :param resource: Name of the resource to check the data from :param limit: Number of return items :return: overdue scheduled articles from published collection """ logger.info('Get overdue scheduled content from {}'.format(resource)) query = {'$and': [ {'publish_schedule': {'$lte': expired_date_time}}, {ITEM_STATE: CONTENT_STATE.SCHEDULED} ]} req = ParsedRequest() req.sort = '_modified' req.max_results = limit return superdesk.get_resource_service(resource).get_from_mongo(req=req, lookup=query)