def get_mongo_items(self, consistency_record): # get the records from mongo in chunks projection = dict(superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection']) superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'] = None service = superdesk.get_resource_service(self.resource_name) cursor = service.get_from_mongo(None, {}) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) mongo_items = [] updated_mongo_items = [] request = ParsedRequest() request.projection = json.dumps({'_etag': 1, '_updated': 1}) for x in range(0, no_of_buckets): skip = x * self.default_page_size print('Page : {}, skip: {}'.format(x + 1, skip)) # don't get any new records since the elastic items are retrieved cursor = service.get_from_mongo(request, {'_created': {'$lte': consistency_record['started_at']}}) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) mongo_items.extend([(mongo_item['_id'], mongo_item['_etag']) for mongo_item in cursor]) updated_mongo_items.extend([mongo_item['_id'] for mongo_item in cursor if mongo_item['_updated'] > consistency_record['started_at']]) superdesk.resources[self.resource_name].endpoint_schema['datasource']['projection'] = projection return mongo_items, updated_mongo_items
def on_delete_res_vocabularies(self, doc): req = ParsedRequest() req.projection = '{"label": 1}' res = self.get(req=req, lookup={'schema.' + doc[config.ID_FIELD]: {'$type': 3}}) if res.count(): payload = {'content_types': [doc_hateoas for doc_hateoas in map(self._build_hateoas, res)]} message = 'Vocabulary "%s" is used in %d content type(s)' % \ (doc.get('display_name'), res.count()) raise SuperdeskApiError.badRequestError(message, payload)
def find(self, resource, lookup, projection, **options): req = ParsedRequest() req.args = {} req.projection = projection if hasattr(self.data_layer, 'find'): return self.data_layer.find(resource, req, lookup) else: return self.data_layer.get(resource, req, lookup)
def on_delete_res_vocabularies(self, doc): req = ParsedRequest() req.projection = '{"label": 1}' res = self.get(req=req, lookup={'schema.' + doc[config.ID_FIELD]: { '$type': 3 }}) if res.count(): payload = { 'content_types': [doc_hateoas for doc_hateoas in map(self._build_hateoas, res)] } message = 'Vocabulary "%s" is used in %d content type(s)' % \ (doc.get('display_name'), res.count()) raise SuperdeskApiError.badRequestError(message, payload)
def on_delete_res_vocabularies(self, doc): req = ParsedRequest() req.projection = '{"label": 1}' res = self.get(req=req, lookup={"schema." + doc[config.ID_FIELD]: { "$type": 3 }}) if res.count(): payload = { "content_types": [doc_hateoas for doc_hateoas in map(self._build_hateoas, res)] } message = _( "Vocabulary {vocabulary} is used in {count} content type(s)" ).format(vocabulary=doc.get("display_name"), count=res.count()) raise SuperdeskApiError.badRequestError(message, payload)
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info( 'Starting to purge audit logs of content items not in archive at {}' .format(utcnow())) # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append( {'_updated': { '$lte': date_to_str(self.expiry) }}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "audit_id":1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) items = list([(item['_id'], item['audit_id']) for item in audits]) if len(items) == 0: logger.info( 'Finished purging audit logs of content items not in archive at {}' .format(utcnow())) return logger.info('Found {} orphaned audit items at {}'.format( len(items), utcnow())) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] logger.info('Deleting {} orphaned audit items at {}'.format( len(audit_ids), utcnow())) service.delete_ids_from_mongo(audit_ids)
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service("audit") current_id = None logger.info( "Starting to purge audit logs of content items not in archive at {}" .format(utcnow())) # Scan the audit collection for items to delete for _ in range(100): query = deepcopy(self.item_entry_query) query["$and"].append( {"_id": { "$lte": ObjectId.from_datetime(self.expiry) }}) if current_id: query["$and"].append({"_id": {"$gt": current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "audit_id":1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) items = list([(item["_id"], item["audit_id"]) for item in audits]) if len(items) == 0: logger.info( "Finished purging audit logs of content items not in archive at {}" .format(utcnow())) return logger.info("Found {} orphaned audit items at {}".format( len(items), utcnow())) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = batch_ids - archive_ids audit_ids = [i[0] for i in items if i[1] in ids] logger.info("Deleting {} orphaned audit items at {}".format( len(audit_ids), utcnow())) service.delete_ids_from_mongo(audit_ids)
def _prefetch_vocabularies(self): """ Prefetch items from vocabularies. """ # this method is called from `parse`, but it must be executed only once if self._vocabularies is not None: return self._vocabularies = {} req = ParsedRequest() req.projection = json.dumps({'items': 1}) # prefetch vocabularies -> anp_genres self._vocabularies['anp_genres'] = superdesk.get_resource_service( 'vocabularies').find_one(req=req, _id='anp_genres').get('items', []) # use qcode as a key to speed up work with it in the future methods self._vocabularies['anp_genres'] = { s['qcode']: s for s in self._vocabularies['anp_genres'] }
def _validate_language(self, doc): # fetch languages from CVs req = ParsedRequest() req.projection = json.dumps({"items.qcode": 1}) try: languages = ( superdesk.get_resource_service("vocabularies").find_one(req=req, _id="languages").get("items", []) ) except AttributeError: raise SuperdeskApiError.badRequestError( message="Request is not valid", payload={"language": "Concept items requires 'languages' vocabulary to be set"}, ) languages_qcodes = [lang["qcode"] for lang in languages] if doc["language"] not in languages_qcodes: raise SuperdeskApiError.badRequestError( message="Request is not valid", payload={"language": "unallowed value '{}'".format(doc["language"])} )
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info( 'Starting to purge audit logs of none content items at {}'.format( utcnow())) while True: lookup = { '$and': [ self.not_item_entry_query, { '_updated': { '$lte': date_to_str(self.expiry) } } ] } if current_id: lookup['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) items = list(item.get('_id') for item in audits) if len(items) == 0: logger.info( 'Finished purging audit logs of none content items at {}'. format(utcnow())) return logger.info('Found {} audit items at {}'.format( len(items), utcnow())) current_id = items[len(items) - 1] logger.info('Deleting {} old audit items'.format(len(items))) service.delete_ids_from_mongo(items)
def get_mongo_items(self, consistency_record): # get the records from mongo in chunks projection = dict(superdesk.resources[ self.resource_name].endpoint_schema['datasource']['projection']) superdesk.resources[self.resource_name].endpoint_schema['datasource'][ 'projection'] = None service = superdesk.get_resource_service(self.resource_name) cursor = service.get_from_mongo(None, {}) count = cursor.count() no_of_buckets = len(range(0, count, self.default_page_size)) mongo_items = [] updated_mongo_items = [] request = ParsedRequest() request.projection = json.dumps({'_etag': 1, '_updated': 1}) for x in range(0, no_of_buckets): skip = x * self.default_page_size print('Page : {}, skip: {}'.format(x + 1, skip)) # don't get any new records since the elastic items are retrieved cursor = service.get_from_mongo( request, {'_created': { '$lte': consistency_record['started_at'] }}) cursor.skip(skip) cursor.limit(self.default_page_size) cursor = list(cursor) mongo_items.extend([(mongo_item['_id'], mongo_item['_etag']) for mongo_item in cursor]) updated_mongo_items.extend([ mongo_item['_id'] for mongo_item in cursor if mongo_item['_updated'] > consistency_record['started_at'] ]) superdesk.resources[self.resource_name].endpoint_schema['datasource'][ 'projection'] = projection return mongo_items, updated_mongo_items
def find(self, resource, filter, projection, **options): req = ParsedRequest() req.args = {} req.projection = projection return self.data_layer.find(resource, req, filter)
def find_one(self, resource, filter, projection): req = ParsedRequest() req.args = {} req.projection = projection return self.data_layer.find_one(resource, req, **filter)
def _validate_associated_items(self, original_item, updates=None, validation_errors=None): """Validates associated items. This function will ensure that the unpublished content validates and none of the content is locked, also do not allow any killed or recalled or spiked content. :param package: :param validation_errors: validation errors are appended if there are any. """ if validation_errors is None: validation_errors = [] if updates is None: updates = {} # merge associations associations = deepcopy(original_item.get(ASSOCIATIONS, {})) associations.update(updates.get(ASSOCIATIONS, {})) items = [value for value in associations.values()] if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE and \ self.publish_type == ITEM_PUBLISH: items.extend(self.package_service.get_residrefs(original_item)) for item in items: if type(item) == dict and item.get(config.ID_FIELD): doc = item # enhance doc with lock_user req = ParsedRequest() req.args = {} req.projection = json.dumps({'lock_user': 1}) try: doc.update({ 'lock_user': super().find_one( req=req, _id=item[config.ID_FIELD])['lock_user'] }) except (TypeError, KeyError): pass elif item: doc = super().find_one(req=None, _id=item) else: continue if not doc: continue if original_item[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE: self._validate_associated_items( doc, validation_errors=validation_errors) # make sure no items are killed or recalled or spiked or scheduled doc_item_state = doc.get(ITEM_STATE, CONTENT_STATE.PUBLISHED) if doc_item_state in { CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED, CONTENT_STATE.SPIKED, CONTENT_STATE.SCHEDULED }: validation_errors.append( 'Item cannot contain associated {} item'.format( doc[ITEM_STATE])) if doc.get(EMBARGO): validation_errors.append( 'Item cannot have associated items with Embargo') # don't validate items that already have published if doc_item_state not in [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED ]: validate_item = { 'act': self.publish_type, 'type': doc[ITEM_TYPE], 'validate': doc } if type(item) == dict: validate_item['embedded'] = True errors = get_resource_service('validate').post([validate_item], headline=True) if errors[0]: pre_errors = [ 'Associated item %s %s' % (doc.get('slugline', ''), error) for error in errors[0] ] validation_errors.extend(pre_errors) if config.PUBLISH_ASSOCIATED_ITEMS: # check the locks on the items if doc.get('lock_user'): if original_item['lock_user'] != doc['lock_user']: validation_errors.extend([ '{}: {}'.format( doc.get('headline', doc['_id']), _('packaged item is locked by another user')) ]) elif original_item['lock_user'] == doc['lock_user']: validation_errors.extend([ '{}: {}'.format( doc.get('headline', doc['_id']), _('packaged item is locked by you. Unlock it and try again' )) ])
def _users_aggregation(self, desk_id: str) -> List[Dict]: desks_service = superdesk.get_resource_service("desks") es_query: Dict[str, Any] es_assign_query: Dict[str, Any] desk_filter: Dict[str, Any] if desk_id == "all": desk_filter = {} es_query = {} else: desk_filter = {"_id": ObjectId(desk_id)} es_query = {"filter": {"term": {"task.desk": desk_id}}} req = ParsedRequest() req.projection = json.dumps({"members": 1}) found = desks_service.get(req, desk_filter) members = set() for d in found: members.update({m["user"] for m in d["members"]}) users_aggregation = app.data.pymongo().db.users.aggregate([ { "$match": { "_id": { "$in": list(members) } } }, { "$group": { "_id": "$role", "authors": { "$addToSet": "$_id" } } }, ]) # first we check archives for locked items es_query["aggs"] = { "desk_authors": { "filter": { "terms": { "version_creator": [str(m) for m in members] } }, "aggs": { "authors": { "terms": { "field": "version_creator", }, "aggs": { "locked": { "filter": { "exists": { "field": "lock_user", } } }, }, } }, } } docs_agg = app.data.elastic.search(es_query, "archive", params={"size": 0}) stats_by_authors = {} for a in docs_agg.hits["aggregations"]["desk_authors"]["authors"][ "buckets"]: stats_by_authors[a["key"]] = { "locked": a["locked"]["doc_count"], "assigned": 0, } # then assignments if desk_id == "all": desk_filter = {} es_assign_query = {} else: desk_filter = {"_id": ObjectId(desk_id)} es_assign_query = { "filter": { "term": { "assigned_to.desk": desk_id } } } es_assign_query["aggs"] = { "desk_authors": { "filter": { "terms": { "assigned_to.user": [str(m) for m in members] } }, "aggs": { "authors": { "terms": { "field": "assigned_to.user", }, } }, } } try: assign_agg = app.data.elastic.search(es_assign_query, "assignments", params={"size": 0}) except KeyError: logger.warning( 'Can\'t access "assignments" collection, planning is probably not installed' ) else: for a in assign_agg.hits["aggregations"]["desk_authors"][ "authors"]["buckets"]: stats_by_authors.setdefault( a["key"], {"locked": 0})["assigned"] = a["doc_count"] overview = [] for a in users_aggregation: role = a["_id"] authors_dict: Dict[str, Any] = {} role_dict = { "role": role, "authors": authors_dict, } authors = a["authors"] for author in authors: author = str(author) try: authors_dict[author] = stats_by_authors[author] except KeyError: logger.debug( "No article found for {author}".format(author=author)) authors_dict[author] = {"assigned": 0, "locked": 0} overview.append(role_dict) return overview
def get_from_mongo(self, req, lookup, projection=None): if req is None: req = ParsedRequest() if not req.projection and projection: req.projection = json.dumps(projection) return self.backend.get_from_mongo(self.datasource, req=req, lookup=lookup)
def find(self, resource, lookup, projection, **options): req = ParsedRequest() req.args = {} req.projection = projection return self.data_layer.get(resource, req, lookup)