def set_if_not_running(pipe): last_updated = pipe.get(key) if last_updated: last_updated = get_date(str(last_updated)) delta = last_updated + update_schedule if delta < now: logger.warn('Overwriting running key for {}:{}'.format(name, id)) pipe.set(key, date_to_str(now)) return True else: logger.warn('Task {}:{} is already running. last_updated={}'.format(name, id, last_updated)) return False else: pipe.set(key, date_to_str(now)) return True
def _get_archive_items(self, start_date, end_date): """ Gets the archive items from the mongo database that were updated today :return: """ query = { '$and': [ {'_updated': {'$gte': date_to_str(start_date), '$lte': date_to_str(end_date)}}, {ITEM_STATE: {'$in': [CONTENT_STATE.CORRECTED, CONTENT_STATE.PUBLISHED, CONTENT_STATE.KILLED]}} ] } return self._get_items(ARCHIVE, query, '_created', [config.VERSION, 'versioncreated', 'state'], self.__get_key)
def _prepare_response(resource, dct, last_modified=None, etag=None, status=200): """ Prepares the response object according to the client request and available renderers, making sure that all accessory directives (caching, etag, last-modified) are present. :param resource: the resource involved. :param dct: the dict that should be sent back as a response. :param last_modified: Last-Modified header value. :param etag: ETag header value. :param status: response status. .. versionchanged:: 0.0.5 Support for Cross-Origin Resource Sharing (CORS). .. versionadded:: 0.0.4 """ if request.method == "OPTIONS": resp = app.make_default_options_response() else: # obtain the best match between client's request and available mime # types, along with the corresponding render function. mime, renderer = _best_mime() # invoke the render function and obtain the corresponding rendered item rendered = globals()[renderer](**dct) # build the main wsgi rensponse object resp = make_response(rendered, status) resp.mimetype = mime # cache directives if request.method == "GET": if resource: cache_control = config.DOMAIN[resource]["cache_control"] expires = config.DOMAIN[resource]["cache_expires"] else: cache_control = config.CACHE_CONTROL expires = config.CACHE_EXPIRES if cache_control: resp.headers.add("Cache-Control", cache_control) if expires: resp.expires = time.time() + expires # etag and last-modified if etag: resp.headers.add("ETag", etag) if last_modified: resp.headers.add("Last-Modified", date_to_str(last_modified)) if "Origin" in request.headers and config.X_DOMAINS is not None: if isinstance(config.X_DOMAINS, basestring): domains = [config.X_DOMAINS] else: domains = config.X_DOMAINS methods = app.make_default_options_response().headers["allow"] resp.headers.add("Access-Control-Allow-Origin", ", ".join(domains)) resp.headers.add("Access-Control-Allow-Methods", methods) resp.headers.add("Access-Control-Allow-Max-Age", 21600) return resp
def xml_dict(cls, data): """ Renders a dict as XML. :param data: the data stream to be rendered as xml. .. versionchanged:: 0.5 Always return ordered items (#441). .. versionchanged:: 0.2 Leaf values are now properly escaped. .. versionadded:: 0.0.3 """ xml = '' ordered_items = OrderedDict(sorted(data.items())) for k, v in ordered_items.items(): if isinstance(v, datetime.datetime): v = date_to_str(v) elif isinstance(v, (datetime.time, datetime.date)): v = v.isoformat() if not isinstance(v, list): v = [v] for value in v: if isinstance(value, dict): links = cls.xml_add_links(value) xml += "<%s>" % k xml += cls.xml_dict(value) xml += links xml += "</%s>" % k else: xml += "<%s>%s</%s>" % (k, utils.escape(value), k) return xml
def set_if_not_running(pipe): last_updated = pipe.get(key) if last_updated: last_updated = get_date(str(last_updated)) delta = last_updated + update_schedule if delta < now: logger.warn('Overwritting running key for provider {0}'.format(provider[superdesk.config.ID_FIELD])) pipe.set(key, date_to_str(now)) return True else: logger.warn('Update ingest already running for provider {0}, last_updated={1}'. format(provider[superdesk.config.ID_FIELD], last_updated)) return False else: pipe.set(key, date_to_str(now)) return True
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info('Starting to purge audit logs of none content items at {}'.format(utcnow())) while True: lookup = {'$and': [self.not_item_entry_query, {'_updated': {'$lte': date_to_str(self.expiry)}}]} if current_id: lookup['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) items = list(item.get('_id') for item in audits) if len(items) == 0: logger.info('Finished purging audit logs of none content items at {}'.format(utcnow())) return logger.info('Found {} audit items at {}'.format(len(items), utcnow())) current_id = items[len(items) - 1] logger.info('Deleting {} old audit items'.format(len(items))) service.delete_ids_from_mongo(items)
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [ {'expiry': {'$lte': date_to_str(expiry_datetime)}}, {'$or': [ {'task.desk': {'$ne': None}}, {ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None} ]} ] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def prepopulate_data(file_name, default_user=get_default_user()): placeholders = {'NOW()': date_to_str(utcnow())} users = {default_user['username']: default_user['password']} default_username = default_user['username'] file = os.path.join(superdesk.app.config.get('APP_ABSPATH'), 'apps', 'prepopulate', file_name) with open(file, 'rt', encoding='utf8') as app_prepopulation: json_data = json.load(app_prepopulation) for item in json_data: resource = item.get('resource', None) service = get_resource_service(resource) username = item.get('username', None) or default_username set_logged_user(username, users[username]) id_name = item.get('id_name', None) id_update = item.get('id_update', None) text = json.dumps(item.get('data', None)) text = apply_placeholders(placeholders, text) data = json.loads(text) if resource: app.data.mongo._mongotize(data, resource) if resource == 'users': users.update({data['username']: data['password']}) if id_update: id_update = apply_placeholders(placeholders, id_update) res = service.patch(ObjectId(id_update), data) if not res: raise Exception() else: ids = service.post([data]) if not ids: raise Exception() if id_name: placeholders[id_name] = str(ids[0]) if app.config['VERSION'] in data: insert_versioning_documents(resource, data)
def purge_old_entries(self): """ Purge entries older than the expiry that are not related to archive items :return: """ service = superdesk.get_resource_service('audit') current_date = None while True: lookup = { '$and': [ self.not_item_entry_query, { '_updated': { '$lte': date_to_str(self.expiry) } } ] } if current_date: lookup['$and'].append({'_updated': {'$gte': current_date}}) req = ParsedRequest() req.sort = '[("_updated", 1)]' req.projection = '{"_id": 1, "_updated": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=lookup) if audits.count() == 0: break items = list([(item['_id'], item['_updated']) for item in audits]) current_date = items[len(items) - 1][1] service.delete({'_id': {'$in': [i[0] for i in items]}})
def get_history_items(self, last_id, gte, item_id, chunk_size=0): history_service = get_resource_service('archive_history') last_processed_id = last_id while True: req = ParsedRequest() req.sort = '[("_id", 1), ("version", 1)]' query = {'$and': []} if gte: query['$and'].append({'_created': {'$gte': date_to_str(gte)}}) if item_id: query['$and'].append({'item_id': str(item_id)}) if last_processed_id: query['$and'].append({'_id': {'$gt': str(last_processed_id)}}) req.where = json.dumps(query) if chunk_size > 0: req.max_results = int(chunk_size) items = list(history_service.get(req=req, lookup=None)) if len(items) < 1: break last_processed_id = items[-1][config.ID_FIELD] yield items
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append( {'_updated': { '$lte': date_to_str(self.expiry) }}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "extra.guid": 1, "extra._id": 1, "extra.item_id": 1, "extra.item": 1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) if audits.count() == 0: break items = list([(item['_id'], self._extract_item_id(item)) for item in audits]) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] service.delete({'_id': {'$in': audit_ids}})
def xml_dict(data): """ Renders a dict as XML. :param data: the data stream to be rendered as xml. .. versionadded:: 0.0.3 """ xml = '' for k, v in data.items(): if isinstance(v, datetime.datetime): v = date_to_str(v) elif isinstance(v, (datetime.time, datetime.date)): v = v.isoformat() if not isinstance(v, list): v = [v] for value in v: if isinstance(value, dict): links = xml_add_links(value) xml += "<%s>" % k xml += xml_dict(value) xml += links xml += "</%s>" % k else: xml += "<%s>%s</%s>" % (k, value, k) return xml
def xml_dict(data): """ Renders a dict as XML. :param data: the data stream to be rendered as xml. .. versionchanged:: 0.5 Always return ordered items (#441). .. versionchanged:: 0.2 Leaf values are now properly escaped. .. versionadded:: 0.0.3 """ xml = '' ordered_items = OrderedDict(sorted(data.items())) for k, v in ordered_items.items(): if isinstance(v, datetime.datetime): v = date_to_str(v) elif isinstance(v, (datetime.time, datetime.date)): v = v.isoformat() if not isinstance(v, list): v = [v] for value in v: if isinstance(value, dict): links = xml_add_links(value) xml += "<%s>" % k xml += xml_dict(value) xml += links xml += "</%s>" % k else: xml += "<%s>%s</%s>" % (k, utils.escape(value), k) return xml
def update_overdue_scheduled(self): """ Updates the overdue scheduled content on archive collection. """ logger.info('Updating overdue scheduled content') if is_task_running("archive", "update_overdue_scheduled", UPDATE_OVERDUE_SCHEDULED_DEFAULT): return try: now = date_to_str(utcnow()) items = get_overdue_scheduled_items(now, ARCHIVE) item_update = {ITEM_STATE: CONTENT_STATE.PUBLISHED} for item in items: logger.info( 'updating overdue scheduled article with id {} and headline {} -- expired on: {} now: {}' .format(item[config.ID_FIELD], item['headline'], item['publish_schedule'], now)) superdesk.get_resource_service(ARCHIVE).patch( item[config.ID_FIELD], item_update) finally: mark_task_as_not_running("archive", "update_overdue_scheduled")
def render_xml(**d): """ XML render function. This could surely use some further tinkering. """ xml = '' for k, v in d.items(): if isinstance(v, datetime.datetime): v = date_to_str(v) elif isinstance(v, (datetime.time, datetime.date)): v = v.isoformat() if type(v) is dict: xml += "<%s>" % (k.rstrip('s')) xml += render_xml(**v) xml += "</%s>" % (k.rstrip('s')) else: original_list = False if type(v) is not list: v = [v] else: original_list = True xml += "<%s>" % k for value in v: if type(value) is dict: xml += "<%s>" % (k.rstrip('s')) xml += render_xml(**value) xml += "</%s>" % (k.rstrip('s')) else: xml += "<%s>%s</%s>" % (str(k.rstrip('s')), value, str(k.rstrip('s'))) if original_list: xml += "</%s>" % k return xml
def purge_orphaned_item_audits(self): """ Purge the audit items that do not have associated entries existing in archive :return: """ service = superdesk.get_resource_service('audit') current_id = None logger.info('Starting to purge audit logs of content items not in archive at {}'.format(utcnow())) # Scan the audit collection for items to delete while True: query = deepcopy(self.item_entry_query) query['$and'].append({'_updated': {'$lte': date_to_str(self.expiry)}}) if current_id: query['$and'].append({'_id': {'$gt': current_id}}) req = ParsedRequest() req.sort = '[("_id", 1)]' req.projection = '{"_id": 1, "audit_id":1}' req.max_results = 1000 audits = service.get_from_mongo(req=req, lookup=query) items = list([(item['_id'], item['audit_id']) for item in audits]) if len(items) == 0: logger.info('Finished purging audit logs of content items not in archive at {}'.format(utcnow())) return logger.info('Found {} orphaned audit items at {}'.format(len(items), utcnow())) current_id = items[len(items) - 1][0] batch_ids = set([i[1] for i in items]) archive_ids = self._get_archive_ids(batch_ids) ids = (batch_ids - archive_ids) audit_ids = [i[0] for i in items if i[1] in ids] logger.info('Deleting {} orphaned audit items at {}'.format(len(audit_ids), utcnow())) service.delete_ids_from_mongo(audit_ids)
def test_query_getting_expired_content(self): with self.app.app_context(): self.app.data.insert(ARCHIVE, [{ 'expiry': get_expiry_date(-10), 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'expiry': get_expiry_date(0), 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'expiry': get_expiry_date(10), 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'expiry': get_expiry_date(20), 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'expiry': get_expiry_date(30), 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'expiry': None, 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'unique_id': 97, 'state': 'spiked' }]) now = date_to_str(utcnow()) expired_items = RemoveExpiredSpikeContent().get_expired_items(now) self.assertEquals(2, expired_items.count())
def get_expired_items(self, expiry_datetime): """ Get the expired items where content state is not scheduled and :param datetime expiry_datetime: expiry datetime :return pymongo.cursor: expired non published items. """ query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } req = ParsedRequest() req.max_results = config.MAX_EXPIRY_QUERY_LIMIT req.sort = 'expiry,_created' return self.get_from_mongo(req=req, lookup=query)
def test_query_getting_overdue_scheduled_content(self): with self.app.app_context(): self.app.data.insert(ARCHIVE, [{ 'publish_schedule': get_expiry_date(-10), 'state': 'published' }]) self.app.data.insert(ARCHIVE, [{ 'publish_schedule': get_expiry_date(-10), 'state': 'scheduled' }]) self.app.data.insert(ARCHIVE, [{ 'publish_schedule': get_expiry_date(0), 'state': 'spiked' }]) self.app.data.insert(ARCHIVE, [{ 'publish_schedule': get_expiry_date(10), 'state': 'scheduled' }]) self.app.data.insert(ARCHIVE, [{ 'unique_id': 97, 'state': 'spiked' }]) now = date_to_str(utcnow()) overdueItems = get_overdue_scheduled_items(now, 'archive') self.assertEquals(1, overdueItems.count())
def update_overdue_scheduled(self): """ Updates the overdue scheduled content on published collection. """ logger.info("Updating overdue scheduled content") if is_task_running("publish", "update_overdue_scheduled", UPDATE_OVERDUE_SCHEDULED_DEFAULT): return try: now = date_to_str(utcnow()) items = get_overdue_scheduled_items(now, "published") for item in items: logger.info( "updating overdue scheduled article with id {} and headline {} -- expired on: {} now: {}".format( item[config.ID_FIELD], item["headline"], item["publish_schedule"], now ) ) superdesk.get_resource_service("published").update_published_items( item["item_id"], ITEM_STATE, CONTENT_STATE.PUBLISHED ) finally: mark_task_as_not_running("publish", "update_overdue_scheduled")
def get_items(self, now): """Get the items from the archive collection that have expiry in future and state is published, corrected, killed :param datetime now: current date time :return list: list of expired items """ logger.info('Fetching expired items from archive collection.') now = now + timedelta(minutes=self.expiry_minutes) query = { 'expiry': { '$gte': date_to_str(now) }, ITEM_STATE: { '$in': [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ] } } req = ParsedRequest() req.sort = '[("unique_id", 1)]' req.where = json.dumps(query) cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, self.default_page_size)) unique_id = cursor[0]['unique_id'] logger.info('Number of items to modify: {}, pages={}'.format( count, no_of_pages)) else: logger.info('No items to modify.') for page in range(0, no_of_pages): logger.info( 'Fetching items for page number: {} unique_id: {}'.format( (page + 1), unique_id)) req = ParsedRequest() req.sort = '[("unique_id", 1)]' if page == 0: query['unique_id'] = {'$gte': unique_id} else: query['unique_id'] = {'$gt': unique_id} req.where = json.dumps(query) req.max_results = self.default_page_size cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) items = list(cursor) if len(items) > 0: unique_id = items[len(items) - 1]['unique_id'] logger.info('Fetched No. of Items: {} for page: {}'.format( len(items), (page + 1))) yield items
def remove_expired_sessions(self): expiry_minutes = app.settings['SESSION_EXPIRY_MINUTES'] expiration_time = utcnow() - timedelta(minutes=expiry_minutes) logger.info('Deleting session not updated since {}'.format(expiration_time)) query = {'_updated': {'$lte': date_to_str(expiration_time)}} sessions = get_resource_service('auth').get(req=None, lookup=query) for session in sessions: get_resource_service('auth').delete_action({'_id': str(session['_id'])})
def remove_expired_sessions(self): expiry_minutes = app.settings["SESSION_EXPIRY_MINUTES"] expiration_time = utcnow() - timedelta(minutes=expiry_minutes) logger.info("Deleting session not updated since {}".format(expiration_time)) query = {"_updated": {"$lte": date_to_str(expiration_time)}} sessions = get_resource_service("auth").get(req=None, lookup=query) for session in sessions: get_resource_service("auth").delete_action({"_id": str(session["_id"])})
def get_query_for_expired_items(provider_id, expiration_date): query = {'and': [ {'term': {'ingest.ingest_provider': provider_id}}, {'range': {'ingest.versioncreated': {'lte': date_to_str(expiration_date)}}}, ] } return superdesk.json.dumps(query)
def remove_expired_content(self): logger.info('Removing expired content') now = date_to_str(utcnow()) items = self.get_expired_items(now) while items.count() > 0: for item in items: logger.info('deleting {} expiry: {} now:{}'.format(item['_id'], item['expiry'], now)) superdesk.get_resource_service('archive').delete_action({'_id': str(item['_id'])}) items = self.get_expired_items(now)
def remove_expired_spiked(self): logger.info('Expiring spiked content') now = date_to_str(utcnow()) items = self.get_expired_items(now) while items.count() > 0: for item in items: logger.info('deleting {} expiry: {} now:{}'.format(item['_id'], item['expiry'], now)) superdesk.get_resource_service('archive').delete_action({'_id': str(item['_id'])}) items = self.get_expired_items(now)
def remove_expired_content(self): logger.info("Removing expired content") now = date_to_str(utcnow()) items = self.get_expired_items(now) while items.count() > 0: for item in items: logger.info("deleting {} expiry: {} now:{}".format(item["_id"], item["expiry"], now)) superdesk.get_resource_service("archive").delete_action({"_id": str(item["_id"])}) items = self.get_expired_items(now)
def default(self, obj): if isinstance(obj, datetime.datetime): # convert any datetime to RFC 1123 format return date_to_str(obj) elif isinstance(obj, (datetime.time, datetime.date)): # should not happen since the only supported date-like format # supported at dmain schema level is 'datetime' . return obj.isoformat() return json.JSONEncoder.default(self, obj)
def remove_expired_sessions(self): expiry_minutes = app.settings['SESSION_EXPIRY_MINUTES'] expiration_time = utcnow() - timedelta(minutes=expiry_minutes) logger.info('Deleting session not updated since {}'.format(expiration_time)) query = {'_updated': {'$lte': date_to_str(expiration_time)}} sessions = get_resource_service('auth').get(req=None, lookup=query) for session in sessions: get_resource_service('auth').delete_action({'_id': str(session['_id'])}) self._update_online_users()
def set_if_not_running(pipe): last_updated = pipe.get(key) if last_updated: last_updated = get_date(str(last_updated)) delta = last_updated + update_schedule if delta < now: logger.warn('Overwriting running key for {}:{}'.format( name, id)) pipe.set(key, date_to_str(now)) return True else: logger.warn( 'Task {}:{} is already running. last_updated={}'.format( name, id, last_updated)) return False else: pipe.set(key, date_to_str(now)) return True
def test_query_getting_overdue_scheduled_content(self): self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(-10), 'state': 'published'}]) self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(-10), 'state': 'scheduled'}]) self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(0), 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(10), 'state': 'scheduled'}]) self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}]) now = date_to_str(utcnow()) overdueItems = get_overdue_scheduled_items(now, 'archive') self.assertEquals(1, overdueItems.count())
def remove_expired_items(self): """ Removes the expired items from the database """ now = date_to_str(utcnow()) items = self.get_expired_items(now) for item in items: logger.info('deleting article of type {} with id {} and headline {} -- expired on: {} now: {}'. format(item['type'], item[config.ID_FIELD], item['headline'], item['expiry'], now)) superdesk.get_resource_service('published').remove_expired(item)
def _bulk_find(resource, args): find_args = { 'filter': process_where(args.get('where')), 'projection': args.get('projection'), } try: limit = max(int(args['max_results']), 0) except (ValueError, KeyError): limit = 0 if limit: find_args['limit'] = limit try: page = max(int(args['page']), 1) except (ValueError, KeyError): page = 1 if limit and page > 1: find_args['skip'] = limit*(page-1) try: find_args['sort'] = [(args['sort_by'], args['sort_order'])] except: find_args['sort'] = None cursor = app.data.driver.db[resource].find(**find_args) total = cursor.count() pages = ((total + limit-1) // limit) if limit else 1 items = list(cursor) meta = {'max_results': limit, 'total': total, 'page': page} links = {} if page > 1: links['prev'] = {'page': (page-1)} if page < pages: links['next'] = {'page': (page+1)} links['last'] = {'page': pages} for item in items: for k, v in item.iteritems(): if isinstance(v, datetime): # date_to_str converts a datetime value to the format defined in the # configuration file item[k] = date_to_str(v) if isinstance(v, unicode): item[k] = str(v) msg = {'_items': items, '_meta': meta, '_links': links} msg_json = json.dumps(msg, default=json_util.default) return Response(response=msg_json, status=200, mimetype="application/json")
def remove_expired_sessions(self): auth_service = get_resource_service("auth") expiry_minutes = app.settings["SESSION_EXPIRY_MINUTES"] expiration_time = utcnow() - timedelta(minutes=expiry_minutes) logger.info( "Deleting session not updated since {}".format(expiration_time)) query = {"_updated": {"$lte": date_to_str(expiration_time)}} sessions = auth_service.get(req=None, lookup=query) for session in sessions: auth_service.delete({"_id": str(session["_id"])}) self._update_online_users()
def test_query_getting_overdue_scheduled_content(self): with self.app.app_context(): self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(-10), "state": "published"}]) self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(-10), "state": "scheduled"}]) self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(0), "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(10), "state": "scheduled"}]) self.app.data.insert(ARCHIVE, [{"unique_id": 97, "state": "spiked"}]) now = date_to_str(utcnow()) overdueItems = get_overdue_scheduled_items(now, "archive") self.assertEquals(1, overdueItems.count())
def test_getitem_ifmatch_disabled_if_mod_since(self): # Test that #239 is fixed. # IF_MATCH is disabled and If-Modified-Since request comes through. If # a 304 was expected, we would crash like a mofo. self.app.config['IF_MATCH'] = False # IMS needs to see as recent as possible since the test db has just # been built header = [("If-Modified-Since", date_to_str(datetime.now()))] r = self.test_client.get(self.item_id_url, headers=header) self.assert304(r.status_code)
def remove_expired_content(self): logger.info("Removing expired content if spiked") now = date_to_str(utcnow()) items = self.get_expired_items(now) while items.count() > 0: for item in items: logger.info("deleting {} expiry: {} now:{}".format(item["_id"], item["expiry"], now)) superdesk.get_resource_service(ARCHIVE).remove_expired(item) items = self.get_expired_items(now)
def remove_expired_items(self): """ Removes the expired items from the database """ now = date_to_str(utcnow()) items = self.get_expired_items(now) for item in items: logger.info( 'deleting article of type {} with id {} and headline {} -- expired on: {} now: {}' .format(item['type'], item[config.ID_FIELD], item['headline'], item['expiry'], now)) superdesk.get_resource_service('published').remove_expired(item)
def get_expired_items(self, expiry_datetime): """Get the expired items Where end date is in the past """ query = { 'query': { 'bool': { 'must_not': [{ 'term': { 'expired': True } }] } }, 'filter': { 'range': { 'dates.end': { 'lte': date_to_str(expiry_datetime) } } }, 'sort': [{ 'dates.start': 'asc' }], 'size': get_max_recurrent_events() } total_received = 0 total_events = -1 while True: query["from"] = total_received results = self.search(query) # If the total_events has not been set, then this is the first query # In which case we need to store the total hits from the search if total_events < 0: total_events = results.count() # If the search doesn't contain any results, return here if total_events < 1: break # If the last query doesn't contain any results, return here if not len(results.docs): break total_received += len(results.docs) # Yield the results for iteration by the callee yield list(results.docs)
def remove_expired_content(self): logger.info('Removing expired content from published') now = date_to_str(utcnow()) items = self.get_expired_items(now) while items.count() > 0: for item in items: logger.info('deleting article of type {} with id {} and headline {} -- expired on: {} now: {}'. format(item['type'], item['_id'], item['headline'], item['expiry'], now)) superdesk.get_resource_service('published').remove_expired(item) items = self.get_expired_items(now)
def test_query_getting_expired_content(self): with self.app.app_context(): self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(-10), "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(0), "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(10), "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(20), "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(30), "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"expiry": None, "state": "spiked"}]) self.app.data.insert(ARCHIVE, [{"unique_id": 97, "state": "spiked"}]) now = date_to_str(utcnow()) expired_items = RemoveExpiredSpikeContent().get_expired_items(now) self.assertEquals(2, expired_items.count())
def remove_expired_items(self): """ Removes the expired items from the database """ now = date_to_str(utcnow()) items = self.get_expired_items(now) for item in items: logger.info( 'Removing article {{id: {}, version: {}, type: {}, headline: {}, expired_on: {} }}' .format(item[config.ID_FIELD], item[config.VERSION], item[ITEM_TYPE], item.get('headline', ''), item['expiry'])) superdesk.get_resource_service('published').remove_expired(item)
def test_query_getting_expired_content(self): with self.app.app_context(): self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(-10), 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(0), 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(10), 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(20), 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(30), 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'expiry': None, 'state': 'spiked'}]) self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}]) now = date_to_str(utcnow()) expiredItems = RemoveExpiredSpikeContent().get_expired_items(now) self.assertEquals(2, expiredItems.count())
def get_items(self, now): """Get the items from the archive collection that have expiry in future and state is published, corrected, killed :param datetime now: current date time :return list: list of expired items """ logger.info('Fetching expired items from archive collection.') now = now + timedelta(minutes=self.expiry_minutes) query = { 'expiry': {'$gte': date_to_str(now)}, ITEM_STATE: {'$in': [ CONTENT_STATE.PUBLISHED, CONTENT_STATE.CORRECTED, CONTENT_STATE.KILLED, CONTENT_STATE.RECALLED ]} } req = ParsedRequest() req.sort = '[("unique_id", 1)]' req.where = json.dumps(query) cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) count = cursor.count() no_of_pages = 0 if count: no_of_pages = len(range(0, count, self.default_page_size)) unique_id = cursor[0]['unique_id'] logger.info('Number of items to modify: {}, pages={}'.format(count, no_of_pages)) else: logger.info('No items to modify.') for page in range(0, no_of_pages): logger.info('Fetching items for page number: {} unique_id: {}'. format((page + 1), unique_id)) req = ParsedRequest() req.sort = '[("unique_id", 1)]' if page == 0: query['unique_id'] = {'$gte': unique_id} else: query['unique_id'] = {'$gt': unique_id} req.where = json.dumps(query) req.max_results = self.default_page_size cursor = get_resource_service(ARCHIVE).get_from_mongo(req=req, lookup=None) items = list(cursor) if len(items) > 0: unique_id = items[len(items) - 1]['unique_id'] logger.info('Fetched No. of Items: {} for page: {}'.format(len(items), (page + 1))) yield items
def get_query_for_expired_items(provider_id, expiration_date): query = {'bool': { 'must': [ { 'range': {'ingest._updated': {'lte': date_to_str(expiration_date)}} }, { 'term': {'ingest.ingest_provider': provider_id} } ] } } return superdesk.json.dumps(query)
def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True): """Get the expired items. Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration. :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()` :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS` :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT` :param boolean include_children: Include only root item if False, otherwise include the entire item chain :return list: expired content_api items """ if expiry_datetime is None: expiry_datetime = utcnow() if expiry_days is None: expiry_days = app.settings["CONTENT_API_EXPIRY_DAYS"] if max_results is None: max_results = app.settings["MAX_EXPIRY_QUERY_LIMIT"] last_id = None expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days)) while True: query = {"$and": [{"_updated": {"$lte": expire_at}}]} if last_id is not None: query["$and"].append({"_id": {"$gt": last_id}}) if not include_children: query["$and"].append({"ancestors": {"$exists": False}}) req = ParsedRequest() req.sort = "_id" req.where = json.dumps(query) req.max_results = max_results items = list(self.get_from_mongo(req=req, lookup=None)) if not items: break last_id = items[-1]["_id"] yield items
def get_date_params(params: Dict[str, Any]): date_filter = (params.get('date_filter') or '').strip().lower() tz_offset = get_time_zone(params) try: start_date = params.get('start_date') if start_date: if isinstance(start_date, str): if not start_date.endswith('+0000'): params['start_date'] += '+0000' start_date = params['start_date'] str_to_date( params['start_date']) # validating if date can be parsed elif isinstance(start_date, datetime): start_date = date_to_str(start_date) except Exception as e: logger.exception(e) raise SuperdeskApiError.badRequestError('Invalid value for start date') try: end_date = params.get('end_date') if end_date: if isinstance(end_date, str): if not end_date.endswith('+0000'): params['end_date'] += '+0000' end_date = params['end_date'] str_to_date( params['end_date']) # validating if date can be parsed elif isinstance(end_date, datetime): end_date = date_to_str(end_date) except Exception as e: logger.exception(e) raise SuperdeskApiError.badRequestError('Invalid value for end date') return date_filter, start_date, end_date, tz_offset
def get_expired_items(self, expiry_datetime, invalid_only=False): """Get the expired items. Where content state is not scheduled and the item matches given parameters :param datetime expiry_datetime: expiry datetime :param bool invalid_only: True only invalid items :return pymongo.cursor: expired non published items. """ unique_id = 0 while True: req = ParsedRequest() req.sort = 'unique_id' query = { '$and': [{ 'expiry': { '$lte': date_to_str(expiry_datetime) } }, { '$or': [{ 'task.desk': { '$ne': None } }, { ITEM_STATE: CONTENT_STATE.SPIKED, 'task.desk': None }] }] } query['$and'].append({'unique_id': {'$gt': unique_id}}) if invalid_only: query['$and'].append({'expiry_status': 'invalid'}) else: query['$and'].append({'expiry_status': {'$ne': 'invalid'}}) req.where = json.dumps(query) req.max_results = config.MAX_EXPIRY_QUERY_LIMIT items = list(self.get_from_mongo(req=req, lookup=None)) if not len(items): break unique_id = items[-1]['unique_id'] yield items
def remove_expired_items(self): """ Removes the expired items from the database """ now = date_to_str(utcnow()) items = self.get_expired_items(now) for item in items: logger.info( "Removing article {{id: {}, version: {}, type: {}, headline: {}, expired_on: {} }}".format( item[config.ID_FIELD], item[config.VERSION], item[ITEM_TYPE], item.get("headline", ""), item["expiry"], ) ) superdesk.get_resource_service("published").remove_expired(item)
def get_expired_items(self, expiry_datetime=None, expiry_days=None, max_results=None, include_children=True): """Get the expired items. Returns a generator for the list of expired items, sorting by `_id` and returning `max_results` per iteration. :param datetime expiry_datetime: Expiry date/time used to retrieve the list of items, defaults to `utcnow()` :param int expiry_days: Number of days content expires, defaults to `CONTENT_API_EXPIRY_DAYS` :param int max_results: Maximum results to retrieve per iteration, defaults to `MAX_EXPIRY_QUERY_LIMIT` :param boolean include_children: Include only root item if False, otherwise include the entire item chain :return list: expired content_api items """ if expiry_datetime is None: expiry_datetime = utcnow() if expiry_days is None: expiry_days = app.settings['CONTENT_API_EXPIRY_DAYS'] if max_results is None: max_results = app.settings['MAX_EXPIRY_QUERY_LIMIT'] last_id = None expire_at = date_to_str(expiry_datetime - timedelta(days=expiry_days)) while True: query = {'$and': [{'_updated': {'$lte': expire_at}}]} if last_id is not None: query['$and'].append({'_id': {'$gt': last_id}}) if not include_children: query['$and'].append({'ancestors': {'$exists': False}}) req = ParsedRequest() req.sort = '_id' req.where = json.dumps(query) req.max_results = max_results items = list(self.get_from_mongo(req=req, lookup=None)) if not items: break last_id = items[-1]['_id'] yield items
def _prepare_response(resource, dct, last_modified=None, etag=None, status=200): """ Prepares the response object according to the client request and available renderers, making sure that all accessory directives (caching, etag, last-modified) are present. :param resource: the resource involved. :param dct: the dict that should be sent back as a response. :param last_modified: Last-Modified header value. :param etag: ETag header value. :param status: response status. .. versionadded:: 0.0.4 """ # obtain the best match between client's request and available mime types, # along with the corresponding render function. mime, renderer = _best_mime() # invoke the render function and obtain the corresponding rendered item rendered = globals()[renderer](**dct) # build the main wsgi rensponse object resp = make_response(rendered, status) resp.mimetype = mime # cache directives if request.method == 'GET': if resource: cache_control = config.DOMAIN[resource]['cache_control'] expires = config.DOMAIN[resource]['cache_expires'] else: cache_control = config.CACHE_CONTROL expires = config.CACHE_EXPIRES if cache_control: resp.headers.add('Cache-Control', cache_control) if expires: resp.expires = time.time() + expires # etag and last-modified if etag: resp.headers.add('ETag', etag) if last_modified: resp.headers.add('Last-Modified', date_to_str(last_modified)) return resp
def prepopulate_data(file_name): placeholders = {'NOW()': date_to_str(utcnow())} file = os.path.join(superdesk.app.config.get('APP_ABSPATH'), 'prepopulate', 'prepopulate-data', file_name) with open_with_report(file, 'rt', encoding='utf8') as app_prepopulation: json_data = json.load(app_prepopulation) for item in json_data: service = get_resource_service(item.get('resource', None)) id_name = item.get('id_name', None) text = json.dumps(item.get('data', None)) text = apply_placeholders(placeholders, text) data = json.loads(text) if item.get('resource'): app.data.mongo._mongotize(data, item.get('resource')) ids = service.post([data]) if not ids: raise Exception() if id_name: placeholders[id_name] = str(ids[0])