def find_one(self, endpoint_name, req, **lookup): """Find single item. :param endpoint_name: resource name :param req: parsed request :param lookup: additional filter """ backend = self._backend(endpoint_name) item = backend.find_one(endpoint_name, req=req, **lookup) search_backend = self._lookup_backend(endpoint_name, fallback=True) if search_backend: # set the parent for the parent child in elastic search self._set_parent(endpoint_name, item, lookup) item_search = search_backend.find_one(endpoint_name, req=req, **lookup) if item is None and item_search: item = item_search logger.warn(item_msg('item is only in elastic', item)) elif item_search is None and item: logger.warn(item_msg('item is only in mongo', item)) try: logger.info(item_msg('trying to add item to elastic', item)) search_backend.insert(endpoint_name, [item]) except RequestError as e: logger.error( item_msg( 'failed to add item into elastic error={}'.format( str(e)), item)) return item
def _validate(self, doc, **kwargs): use_headline = kwargs and 'headline' in kwargs validators = self._get_validators(doc) for validator in validators: validation_schema = self._get_validator_schema(validator) self._sanitize_fields(doc['validate'], validator) self._process_media(doc['validate'], validation_schema) self._process_sms(doc['validate'], validation_schema) v = SchemaValidator() v.allow_unknown = True try: v.validate(doc['validate'], validation_schema) except TypeError as e: logger.exception('Invalid validator schema value "%s" for ' % str(e)) error_list = v.errors response = [] for e in error_list: messages = [] # Ignore dateline if item is corrected because it can't be changed after the item is published if doc.get('act', None) == 'correct' and e == 'dateline': continue elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \ e in ('headline', 'abstract', 'body_html'): continue elif e == 'extra': for field in error_list[e]: display_name = self._get_vocabulary_display_name(field) if 'required' in error_list[e][field]: messages.append( REQUIRED_ERROR.format(display_name)) else: messages.append('{} {}'.format( display_name, error_list[e][field])) elif error_list[e] == 'required field' or type(error_list[e]) is dict or \ type(error_list[e]) is list: messages.append(REQUIRED_ERROR.format(e.upper())) elif 'min length is 1' == error_list[ e] or 'null value not allowed' in error_list[e]: messages.append(REQUIRED_ERROR.format(e.upper())) elif 'min length is' in error_list[e]: messages.append('{} is too short'.format(e.upper())) elif 'max length is' in error_list[e]: messages.append('{} is too long'.format(e.upper())) else: messages.append('{} {}'.format(e.upper(), error_list[e])) for message in messages: if use_headline: headline = '{}: {}'.format( doc['validate'].get('headline', doc['validate'].get('_id')), message) response.append(headline) else: response.append(message) return response else: logger.warn('validator was not found for {}'.format(doc['act'])) return []
def _change_request(self, endpoint_name, id, updates, original): backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) try: backend.update(endpoint_name, id, updates, original) except eve.io.base.DataLayer.OriginalChangedError: if not backend.find_one(endpoint_name, req=None, _id=id) and search_backend: # item is in elastic, not in mongo - not good logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id)) self.remove_from_search(endpoint_name, id) raise SuperdeskApiError.notFoundError() else: # item is there, but no change was done - ok logger.error('Item : {} not updated in collection {}. ' 'Updates are : {}'.format(id, endpoint_name, updates)) return updates if search_backend: doc = backend.find_one(endpoint_name, req=None, _id=id) if not doc: # there is no doc in mongo, remove it from elastic logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id)) self.remove_from_search(endpoint_name, id) raise SuperdeskApiError.notFoundError() search_backend.update(endpoint_name, id, doc) return updates
def _update(self, provider, update): updated = utcnow() last_updated = provider.get('last_updated') ttl_minutes = app.config['INGEST_EXPIRY_MINUTES'] if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes): last_updated = updated - datetime.timedelta(minutes=ttl_minutes) self.provider = provider provider_config = provider.get('config') if not provider_config: provider_config = {} provider['config'] = provider_config provider_config.setdefault('url', 'http://rmb.reuters.com/rmd/rest/xml') provider_config.setdefault('auth_url', 'https://commerce.reuters.com/rmd/rest/xml/login') self.URL = provider_config.get('url') for channel in self._get_channels(): ids = self._get_article_ids(channel, last_updated, updated) for id in ids: try: items = self.fetch_ingest(id) if items: yield items # if there was an exception processing the one of the bunch log it and continue except Exception as ex: logger.warn('Reuters item {} has not been retrieved'.format(id)) logger.exception(ex)
def delete(self, endpoint_name, lookup): """Delete method to delete by using mongo query syntax. :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}`` :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1} """ backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()) ids = [doc[config.ID_FIELD] for doc in docs] removed_ids = ids logger.info("total documents to be removed {}".format(len(ids))) if search_backend and ids: removed_ids = [] # first remove it from search backend, so it won't show up. when this is done - remove it from mongo for _id in ids: try: self.remove_from_search(endpoint_name, _id) removed_ids.append(_id) except NotFoundError: logger.warning('item missing from elastic _id=%s' % (_id, )) removed_ids.append(_id) except: logger.exception('item can not be removed from elastic _id=%s' % (_id, )) backend.remove(endpoint_name, {config.ID_FIELD: {'$in': removed_ids}}) logger.info("Removed {} documents from {}.".format(len(ids), endpoint_name)) if not ids: logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
def _get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload['token'] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) if not self.session: self.session = requests.Session() retries = 0 while True: try: response = self.session.get(url, params=payload, timeout=(30, 15)) except requests.exceptions.Timeout as ex: if retries < 3: logger.warn( 'Reuters API timeout retrying, retries {}'.format( retries)) retries += 1 continue raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError( _('Not found {payload}').format(payload=payload)) break try: return etree.fromstring( response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider)
def _validate(self, doc): validator = self._get_validator(doc) if validator is None: logger.warn('Validator was not found for type:{}'.format( doc[ITEM_TYPE])) return [] validation_schema = self._get_validator_schema( validator, doc.get('validate_on_post')) v = SchemaValidator() v.allow_unknown = True try: v.validate(doc['validate'], validation_schema) except TypeError as e: logger.exception('Invalid validator schema value "%s" for ' % str(e)) error_list = v.errors response = [] for field in error_list: error = error_list[field] # If error is a list, only return the first error if isinstance(error, list): error = error[0] if error == 'empty values not allowed' or error == 'required field': response.append(REQUIRED_ERROR.format(field.upper())) else: response.append('{} {}'.format(field.upper(), error)) return response
def find_one(self, endpoint_name, req, **lookup): """Find single item. :param endpoint_name: resource name :param req: parsed request :param lookup: additional filter """ backend = self._backend(endpoint_name) item = backend.find_one(endpoint_name, req=req, **lookup) search_backend = self._lookup_backend(endpoint_name, fallback=True) if search_backend: # set the parent for the parent child in elastic search self._set_parent(endpoint_name, item, lookup) item_search = search_backend.find_one(endpoint_name, req=req, **lookup) if item is None and item_search: item = item_search logger.warn(item_msg('item is only in elastic', item)) elif item_search is None and item: logger.warn(item_msg('item is only in mongo', item)) try: logger.info(item_msg('trying to add item to elastic', item)) search_backend.insert(endpoint_name, [item]) except RequestError as e: logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item)) return item
def _change_request(self, endpoint_name, id, updates, original): backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) try: backend.update(endpoint_name, id, updates, original) except eve.io.base.DataLayer.OriginalChangedError: if not backend.find_one(endpoint_name, req=None, _id=id) and search_backend: # item is in elastic, not in mongo - not good logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id)) item = search_backend.find_one(endpoint_name, req=None, _id=id) if item: self.remove_from_search(endpoint_name, item) raise SuperdeskApiError.notFoundError() else: # item is there, but no change was done - ok logger.error('Item : {} not updated in collection {}. ' 'Updates are : {}'.format(id, endpoint_name, updates)) return updates if search_backend: doc = backend.find_one(endpoint_name, req=None, _id=id) if not doc: # there is no doc in mongo, remove it from elastic logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id)) item = search_backend.find_one(endpoint_name, req=None, _id=id) if item: self.remove_from_search(endpoint_name, item) raise SuperdeskApiError.notFoundError() search_backend.update(endpoint_name, id, doc) return updates
def delete_docs(self, endpoint_name, docs): """Delete using list of documents.""" backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) ids = [doc[config.ID_FIELD] for doc in docs] removed_ids = ids logger.info("total documents to be removed {}".format(len(ids))) if search_backend and ids: removed_ids = [] # first remove it from search backend, so it won't show up. when this is done - remove it from mongo for doc in docs: try: self.remove_from_search(endpoint_name, doc) removed_ids.append(doc[config.ID_FIELD]) except NotFoundError: logger.warning('item missing from elastic _id=%s' % (doc[config.ID_FIELD], )) removed_ids.append(doc[config.ID_FIELD]) except Exception: logger.exception( 'item can not be removed from elastic _id=%s' % (doc[config.ID_FIELD], )) if len(removed_ids): backend.remove(endpoint_name, {config.ID_FIELD: { '$in': removed_ids }}) logger.info("Removed %d documents from %s.", len(removed_ids), endpoint_name) else: logger.warn("No documents for %s resource were deleted.", endpoint_name) return removed_ids
def _update(self, provider, update): updated = utcnow() last_updated = provider.get("last_updated") ttl_minutes = app.config["INGEST_EXPIRY_MINUTES"] if not last_updated or last_updated < updated - datetime.timedelta(minutes=ttl_minutes): last_updated = updated - datetime.timedelta(minutes=ttl_minutes) self.provider = provider provider_config = provider.get("config") if not provider_config: provider_config = {} provider["config"] = provider_config provider_config.setdefault("url", "http://rmb.reuters.com/rmd/rest/xml") provider_config.setdefault("auth_url", "https://commerce.reuters.com/rmd/rest/xml/login") self.URL = provider_config.get("url") for channel in self._get_channels(): ids = self._get_article_ids(channel, last_updated, updated) for id in ids: try: items = self.fetch_ingest(id) if items: yield items # if there was an exception processing the one of the bunch log it and continue except Exception as ex: logger.warn("Reuters item {} has not been retrieved".format(id)) logger.exception(ex)
def _change_request(self, endpoint_name, id, updates, original, change_request=False, push_notification=True): backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) try: if change_request: # allows using mongo operations other than $set backend._change_request(endpoint_name, id, updates, original) else: backend.update(endpoint_name, id, updates, original) if push_notification: self._push_resource_notification("updated", endpoint_name, _id=str(id), fields=get_diff_keys( updates, original)) except eve.io.base.DataLayer.OriginalChangedError: if not backend.find_one(endpoint_name, req=None, _id=id) and search_backend: # item is in elastic, not in mongo - not good logger.warn( "Item is missing in mongo resource={} id={}".format( endpoint_name, id)) item = search_backend.find_one(endpoint_name, req=None, _id=id) if item: self.remove_from_search(endpoint_name, item) raise SuperdeskApiError.notFoundError() else: # item is there, but no change was done logger.error( "Item was not updated in mongo, it has changed from the original.", extra=dict( id=id, resource=endpoint_name, updates=updates, original=original, ), ) return updates if search_backend: doc = backend.find_one(endpoint_name, req=None, _id=id) if not doc: # there is no doc in mongo, remove it from elastic logger.warn( "Item is missing in mongo resource={} id={}".format( endpoint_name, id)) item = search_backend.find_one(endpoint_name, req=None, _id=id) if item: self.remove_from_search(endpoint_name, item) raise SuperdeskApiError.notFoundError() search_backend.update(endpoint_name, id, doc) return updates
def _validate(self, doc, **kwargs): use_headline = kwargs and 'headline' in kwargs validators = self._get_validators(doc) for validator in validators: validation_schema = self._get_validator_schema(validator) self._sanitize_fields(doc['validate'], validator) self._set_default_subject_scheme(doc['validate']) self._process_media(doc['validate'], validation_schema) self._process_sms(doc['validate'], validation_schema) v = SchemaValidator() v.allow_unknown = True try: v.validate(doc['validate'], validation_schema) except TypeError as e: logger.exception('Invalid validator schema value "%s" for ' % str(e)) error_list = v.errors response = [] for e in error_list: messages = [] # Ignore dateline if item is corrected because it can't be changed after the item is published if doc.get('act', None) == 'correct' and e == 'dateline': continue elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \ e in ('headline', 'abstract', 'body_html'): continue elif e == 'extra': for field in error_list[e]: display_name = self._get_vocabulary_display_name(field) if 'required' in error_list[e][field]: messages.append(REQUIRED_ERROR.format(display_name)) else: messages.append('{} {}'.format(display_name, error_list[e][field])) elif error_list[e] == 'required field' or type(error_list[e]) is dict or \ type(error_list[e]) is list: messages.append(REQUIRED_ERROR.format(e.upper())) elif 'min length is 1' == error_list[e] or 'null value not allowed' in error_list[e]: messages.append(REQUIRED_ERROR.format(e.upper())) elif 'min length is' in error_list[e]: messages.append('{} is too short'.format(e.upper())) elif 'max length is' in error_list[e]: messages.append('{} is too long'.format(e.upper())) else: messages.append('{} {}'.format(e.upper(), error_list[e])) for message in messages: if use_headline: headline = '{}: {}'.format(doc['validate'].get('headline', doc['validate'].get('_id')), message) response.append(headline) else: response.append(message) return response else: logger.warn('validator was not found for {}'.format(doc['act'])) return []
def _get_tree(self, endpoint, payload=None): """Get xml response for given API endpoint and payload. :param: endpoint :type endpoint: str :param: payload :type payload: str """ if payload is None: payload = {} payload['token'] = self._get_auth_token(self.provider, update=True) url = self._get_absolute_url(endpoint) if not self.session: self.session = requests.Session() retries = 0 while True: try: response = self.session.get(url, params=payload, timeout=(30, 15)) except requests.exceptions.Timeout as ex: if retries < 3: logger.warn('Reuters API timeout retrying, retries {}'.format(retries)) retries += 1 continue raise IngestApiError.apiTimeoutError(ex, self.provider) except requests.exceptions.TooManyRedirects as ex: # Tell the user their URL was bad and try a different one raise IngestApiError.apiRedirectError(ex, self.provider) except requests.exceptions.RequestException as ex: # catastrophic error. bail. raise IngestApiError.apiRequestError(ex, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider) if response.status_code == 404: raise LookupError('Not found %s' % payload) break try: return etree.fromstring(response.content) # workaround for http mock lib except UnicodeEncodeError as error: traceback.print_exc() raise IngestApiError.apiUnicodeError(error, self.provider) except ParseError as error: traceback.print_exc() raise IngestApiError.apiParseError(error, self.provider) except Exception as error: traceback.print_exc() raise IngestApiError.apiGeneralError(error, self.provider)
def delete(self, endpoint_name, lookup): """Delete method to delete by using mongo query syntax. :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. ``{'_id':123}``, 2. ``{'item_id': {'$in': [123, 234]}}`` :returns: Returns list of ids which were removed. """ docs = list(self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()).sort("_id", 1)) removed_ids = self.delete_docs(endpoint_name, docs) if len(docs) and not len(removed_ids): logger.warn("No documents for %s resource were deleted using lookup %s", endpoint_name, lookup) return removed_ids
def search(self, endpoint_name, source): """Search for items using search backend :param string endpoint_name :param dict source """ req = ParsedRequest() req.args = {'source': json.dumps(source)} search_backend = self._lookup_backend(endpoint_name) if search_backend: return search_backend.find(endpoint_name, req, {}) else: logger.warn('there is no search backend for %s' % endpoint_name)
def find_one(self, endpoint_name, req, **lookup): backend = self._backend(endpoint_name) item = backend.find_one(endpoint_name, req=req, **lookup) search_backend = self._lookup_backend(endpoint_name, fallback=True) if search_backend: item_search = search_backend.find_one(endpoint_name, req=req, **lookup) if item is None and item_search: item = item_search logger.warn(item_msg('item is only in elastic', item)) elif item_search is None and item: logger.warn(item_msg('item is only in mongo', item)) try: logger.info(item_msg('trying to add item to elastic', item)) search_backend.insert(endpoint_name, [item]) except RequestError as e: logger.error(item_msg('failed to add item into elastic error={}'.format(str(e)), item)) return item
def delete(self, endpoint_name, lookup): """ Delete method to delete by using mongo query syntax :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. {'_id':123}, 2. {'item_id': {'$in': [123, 234]}} :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1} """ backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()) ids = [doc[config.ID_FIELD] for doc in docs] backend.remove(endpoint_name, {config.ID_FIELD: {'$in': ids}}) if search_backend and ids: self._remove_documents_from_search_backend(endpoint_name, ids) if not ids: logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup))
def get_parent_id(doc): if not doc.get('rewrite_of'): return doc[config.ID_FIELD] elif doc['rewrite_of'] not in docs: # If the parent item was not part of this stats iteration # then load it now parent = service.find_one(req=None, _id=doc['rewrite_of']) if not parent: # Stats entry for the parent item was not found logger.warn('Failed to get parent item {}'.format( doc['rewrite_of'])) return None else: docs[doc['rewrite_of']] = parent return get_parent_id(docs[doc['rewrite_of']])
def _validate(self, doc, **kwargs): use_headline = kwargs and 'headline' in kwargs validators = self._get_validators(doc) for validator in validators: self._sanitize_fields(doc['validate'], validator) v = SchemaValidator() v.allow_unknown = True try: v.validate(doc['validate'], self._get_validator_schema(validator)) except TypeError as e: logger.exception('Invalid validator schema value "%s" for ' % str(e)) error_list = v.errors response = [] for e in error_list: # Ignore dateline if item is corrected because it can't be changed after the item is published if doc.get('act', None) == 'correct' and e == 'dateline': continue elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \ e in ('headline', 'abstract', 'body_html'): continue elif error_list[e] == 'required field' or type(error_list[e]) is dict or \ type(error_list[e]) is list: message = '{} is a required field'.format(e.upper()) elif 'min length is 1' == error_list[e]: message = '{} is a required field'.format(e.upper()) elif 'min length is' in error_list[e]: message = '{} is too short'.format(e.upper()) elif 'max length is' in error_list[e]: message = '{} is too long'.format(e.upper()) else: message = '{} {}'.format(e.upper(), error_list[e]) if use_headline: response.append('{}: {}'.format( doc['validate'].get('headline', doc['validate'].get('_id')), message)) else: response.append(message) return response else: logger.warn('validator was not found for {}'.format(doc['act'])) return []
def __get_redis(app_ctx): """Constructs Redis Client object. :return: Redis Client object """ redis_url = app_ctx.config['REDIS_URL'] try: return redis.from_url(redis_url) except ValueError as e: logger.warn( 'Failed to connect to redis using a connection string: {}'.format( e)) # Newer Redis clients will not accept 'amqp' as the scheme # Attempt to mock a redis scheme instead protocol = redis_url.split('//')[0] new_url = redis_url.replace(protocol, 'redis:') return redis.from_url(new_url)
def _validate(self, doc, **kwargs): use_headline = kwargs and "headline" in kwargs validators = self._get_validators(doc) for validator in validators: self._sanitize_fields(doc["validate"], validator) v = SchemaValidator() v.allow_unknown = True try: v.validate(doc["validate"], self._get_validator_schema(validator)) except TypeError as e: logger.exception('Invalid validator schema value "%s" for ' % str(e)) error_list = v.errors response = [] for e in error_list: # Ignore dateline if item is corrected because it can't be changed after the item is published if doc.get("act", None) == "correct" and e == "dateline": continue elif ( doc.get("act", None) == "kill" and doc["validate"].get("profile", None) and e in ("headline", "abstract", "body_html") ): continue elif error_list[e] == "required field" or type(error_list[e]) is dict or type(error_list[e]) is list: message = "{} is a required field".format(e.upper()) elif "min length is 1" == error_list[e]: message = "{} is a required field".format(e.upper()) elif "min length is" in error_list[e]: message = "{} is too short".format(e.upper()) elif "max length is" in error_list[e]: message = "{} is too long".format(e.upper()) else: message = "{} {}".format(e.upper(), error_list[e]) if use_headline: response.append( "{}: {}".format(doc["validate"].get("headline", doc["validate"].get("_id")), message) ) else: response.append(message) return response else: logger.warn("validator was not found for {}".format(doc["act"])) return []
def delete(self, endpoint_name, lookup): """ Delete method to delete by using mongo query syntax :param endpoint_name: Name of the endpoint :param lookup: User mongo query syntax. example 1. {'_id':123}, 2. {'item_id': {'$in': [123, 234]}} :returns: Returns the mongo remove command response. {'n': 12, 'ok': 1} """ backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) docs = self.get_from_mongo(endpoint_name, lookup=lookup, req=ParsedRequest()) ids = [doc[config.ID_FIELD] for doc in docs] res = backend.remove(endpoint_name, {config.ID_FIELD: {'$in': ids}}) if res and res.get('n', 0) > 0 and search_backend is not None: self._remove_documents_from_search_backend(endpoint_name, ids) if res and res.get('n', 0) == 0: logger.warn("No documents for {} resource were deleted using lookup {}".format(endpoint_name, lookup)) return res
def _change_request(self, endpoint_name, id, updates, original): backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) try: backend.update(endpoint_name, id, updates, original) except eve.io.base.DataLayer.OriginalChangedError: if not backend.find_one(endpoint_name, req=None, _id=id): # item is in elastic, not in mongo - not good logger.warn("Item is missing in mongo resource=%s id=%s".format(endpoint_name, id)) self._remove_documents_from_search_backend(endpoint_name, [id]) raise SuperdeskApiError.notFoundError() else: # item is there, but no change was done - ok return updates if search_backend: doc = backend.find_one(endpoint_name, req=None, _id=id) search_backend.update(endpoint_name, id, doc) return updates
def _change_request(self, endpoint_name, id, updates, original): backend = self._backend(endpoint_name) search_backend = self._lookup_backend(endpoint_name) try: backend.update(endpoint_name, id, updates, original) except eve.io.base.DataLayer.OriginalChangedError: if not backend.find_one(endpoint_name, req=None, _id=id): # item is in elastic, not in mongo - not good logger.warn( "Item is missing in mongo resource=%s id=%s".format( endpoint_name, id)) self._remove_documents_from_search_backend(endpoint_name, [id]) raise SuperdeskApiError.notFoundError() else: # item is there, but no change was done - ok return updates if search_backend: doc = backend.find_one(endpoint_name, req=None, _id=id) search_backend.update(endpoint_name, id, doc) return updates
def _update(self, provider): updated = utcnow() last_updated = provider.get('last_updated') ttl_minutes = app.config['INGEST_EXPIRY_MINUTES'] if not last_updated or last_updated < updated - datetime.timedelta( minutes=ttl_minutes): last_updated = updated - datetime.timedelta(minutes=ttl_minutes) self.provider = provider provider_config = provider.get('config') if not provider_config: provider_config = {} provider['config'] = provider_config if 'url' not in provider_config: provider_config['url'] = 'http://rmb.reuters.com/rmd/rest/xml' if 'auth_url' not in provider_config: provider_config[ 'auth_url'] = 'https://commerce.reuters.com/rmd/rest/xml/login' self.URL = provider_config.get('url') for channel in self._get_channels(): ids = self._get_article_ids(channel, last_updated, updated) for id in ids: try: items = self.fetch_ingest(id) if items: yield items # if there was an exception processing the one of the bunch log it and continue except Exception as ex: logger.warn( 'Reuters item {} has not been retrieved'.format(id)) logger.exception(ex)
def get_attachment_public_url(attachment: Dict[str, Any]) -> Optional[str]: """Returns the file url for the attachment provided :param dict attachment: The attachment to get the file URL :rtype: str :return: None if the attachment is not public, otherwise the public URL to the file """ if attachment.get("attachment"): # retrieve object reference attachment = superdesk.get_resource_service("attachments").find_one( req=None, _id=attachment["attachment"]) if attachment.get("internal"): return None if not attachment.get("media"): # All attachments should have a `media` attribute set # The provided attachment dict must be invalid attachment_id = str(attachment.get("_id")) logger.warn(f'Attachment "{attachment_id}" has no media attribute set') return None return current_app.media.url_for_external(attachment["media"], RESOURCE)
def _get_article_ids(self, channel, last_updated, updated): """ Get article ids which should be upserted also save the poll token that is returned. """ ids = set() payload = {"channel": channel, "fieldsRef": "id"} # check if the channel has a pollToken if not fall back to dateRange last_poll_token = self._get_poll_token(channel) if last_poll_token is not None: logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token)) payload["pollToken"] = last_poll_token else: payload["dateRange"] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated)) logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload["dateRange"])) tree = self._get_tree("items", payload) status_code = tree.find("status").get("code") if tree.tag == "results" else tree.get("code") # check the returned status if status_code != "10": logger.warn("Reuters channel request returned status code {}".format(status_code)) # status code 30 indicates failure if status_code == "30": # invalid token logger.warn( "Reuters error on channel {} code {} {}".format( channel, tree.find("error").get("code"), tree.find("error").text ) ) if tree.find("error").get("code") == "2100": self._save_poll_token(channel, None) logger.warn("Reuters channel invalid token reseting {}".format(status_code)) return ids # extract the returned poll token if there is one poll_token = tree.find("pollToken") if poll_token is not None: # a new token indicated new content if poll_token.text != last_poll_token: logger.info("Reuters channel {} new token {}".format(channel, poll_token.text)) self._save_poll_token(channel, poll_token.text) else: # the token has not changed, so nothing new logger.info("Reuters channel {} nothing new".format(channel)) return ids else: logger.info("Reuters channel {} retrieved no token".format(channel)) return ids for result in tree.findall("result"): id = result.find("id").text ids.add(id) logger.info("Reuters id : {}".format(id)) return ids
def _get_article_ids(self, channel, last_updated, updated): """ Get article ids which should be upserted also save the poll token that is returned. """ ids = set() payload = {'channel': channel, 'fieldsRef': 'id'} # check if the channel has a pollToken if not fall back to dateRange last_poll_token = self._get_poll_token(channel) if last_poll_token is not None: logger.info("Reuters requesting channel {} with poll token {}".format(channel, last_poll_token)) payload['pollToken'] = last_poll_token else: payload['dateRange'] = "%s-%s" % (self._format_date(last_updated), self._format_date(updated)) logger.info("Reuters requesting channel {} with dateRange {}".format(channel, payload['dateRange'])) tree = self._get_tree('items', payload) status_code = tree.find('status').get('code') if tree.tag == 'results' else tree.get('code') # check the returned status if status_code != '10': logger.warn("Reuters channel request returned status code {}".format(status_code)) # status code 30 indicates failure if status_code == '30': # invalid token logger.warn("Reuters error on channel {} code {} {}".format(channel, tree.find('error').get('code'), tree.find('error').text)) if tree.find('error').get('code') == '2100': self._save_poll_token(channel, None) logger.warn("Reuters channel invalid token reseting {}".format(status_code)) return ids # extract the returned poll token if there is one poll_token = tree.find('pollToken') if poll_token is not None: # a new token indicated new content if poll_token.text != last_poll_token: logger.info("Reuters channel {} new token {}".format(channel, poll_token.text)) self._save_poll_token(channel, poll_token.text) else: # the token has not changed, so nothing new logger.info("Reuters channel {} nothing new".format(channel)) return ids else: logger.info("Reuters channel {} retrieved no token".format(channel)) return ids for result in tree.findall('result'): id = result.find('id').text ids.add(id) logger.info("Reuters id : {}".format(id)) return ids
def _validate(self, doc, fields=False, **kwargs): item = deepcopy( doc['validate'] ) # make a copy for signal before validation processing use_headline = kwargs and 'headline' in kwargs validators = self._get_validators(doc) for validator in validators: validation_schema = self._get_validator_schema(validator) self._sanitize_fields(doc['validate'], validator) self._set_default_subject_scheme(doc['validate']) self._process_media(doc['validate'], validation_schema) self._process_sms(doc['validate'], validation_schema) self._process_media_metadata(doc['validate'], validation_schema) v = SchemaValidator() v.allow_unknown = True try: v.validate(doc['validate'], validation_schema) except TypeError as e: logger.exception('Invalid validator schema value "%s" for ' % str(e)) error_list = v.errors response = [] for e in error_list: messages = [] # Ignore dateline if item is corrected because it can't be changed after the item is published if doc.get('act', None) == 'correct' and e == 'dateline': continue elif doc.get('act', None) == 'kill' and doc['validate'].get('profile', None) and \ e in ('headline', 'abstract', 'body_html'): continue elif e == 'extra': for field in error_list[e]: display_name = self._get_vocabulary_display_name(field) if 'required' in error_list[e][field]: messages.append( ERROR_MESSAGES[REQUIRED_ERROR].format( display_name)) else: error_field = self.get_error_field_name( display_name) messages.append('{} {}'.format( error_field, error_list[e][field])) elif 'required field' in error_list[e] or type( error_list[e]) is dict or type(error_list[e]) is list: display_name = self._get_vocabulary_display_name(e) error_field = self.get_error_field_name(display_name) messages.append(ERROR_MESSAGES[REQUIRED_ERROR].format( error_field.upper())) elif 'min length is 1' == error_list[ e] or 'null value not allowed' in error_list[e]: messages.append(ERROR_MESSAGES[REQUIRED_ERROR].format( e.upper())) elif 'min length is' in error_list[e]: error_field = self.get_error_field_name(e) messages.append(ERROR_MESSAGES[TOO_SHORT].format( error_field.upper())) elif 'max length is' in error_list[e]: error_field = self.get_error_field_name(e) messages.append(ERROR_MESSAGES[TOO_LONG].format( error_field.upper())) else: error_field = self.get_error_field_name(e) messages.append('{} {}'.format( error_field.upper(), ERROR_MESSAGES[error_list[e]] if ERROR_MESSAGES.get(error_list[e]) else error_list[e])) for message in messages: if use_headline: headline = '{}: {}'.format( doc['validate'].get('headline', doc['validate'].get('_id')), message) response.append(headline) else: response.append(message) # let custom code do additional validation item_validate.send(self, item=item, response=response, error_fields=v.errors) if fields: return response, v.errors return response else: logger.warn('validator was not found for {}'.format(doc['act'])) if fields: return [], {} return []