def apply_rule_set(item, provider, rule_set=None): """Applies rules set on the item to be ingested into the system. If there's no rule set then the item will be returned without any change. :param item: Item to be ingested :param provider: provider object from whom the item was received :return: item """ try: if rule_set is None and provider.get('rule_set') is not None: rule_set = superdesk.get_resource_service('rule_sets').find_one(_id=provider['rule_set'], req=None) if rule_set and 'body_html' in item: body = item['body_html'] for rule in rule_set['rules']: body = body.replace(rule['old'], rule['new']) item['body_html'] = body return item except Exception as ex: raise ProviderError.ruleError(ex, provider)
def create(self, docs, **kwargs): new_guids = [] provider = self.get_provider() for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.") try: archived_doc = self.fetch(doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc['_id'] = new_id generate_unique_id_and_name(dest_doc) if provider: dest_doc['ingest_provider'] = str(provider[superdesk.config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc['_id'] dest_doc[FAMILY_ID] = archived_doc['_id'] remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get('_id')) return new_guids
def create(self, docs, **kwargs): new_guids = [] provider = self.get_provider() for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError( _("Destination desk cannot be empty.")) try: archived_doc = self.fetch(doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = fetch_item(archived_doc, doc.get('desk'), doc.get('stage'), state=doc.get('state')) new_guids.append(dest_doc['guid']) if provider: dest_doc['ingest_provider'] = str( provider[superdesk.config.ID_FIELD]) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get('_id')) if new_guids: get_resource_service('search_providers').system_update( provider.get(config.ID_FIELD), {'last_item_update': utcnow()}, provider) return new_guids
def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: logger.info('No path') return [] for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: filepath = os.path.join(self.path, filename) if os.path.isfile(filepath): stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): item = self.parser.parse_file(filepath, self) self.move_file(self.path, filename, provider=provider, success=True) yield [item] else: self.move_file(self.path, filename, provider=provider, success=True) except ParserError.ZCZCParserError as ex: logger.exception("Ingest Type: Teletype - File: {0} could not be processed".format(filename)) self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.ZCZCParserError(ex, provider) except ParserError as ex: self.move_file(self.path, filename, provider=provider, success=False) except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ProviderError.ingestError(ex, provider)
def apply_rule_set(item, provider, rule_set=None): """ Applies rules set on the item to be ingested into the system. If there's no rule set then the item will be returned without any change. :param item: Item to be ingested :param provider: provider object from whom the item was received :return: item """ try: if rule_set is None and provider.get('rule_set') is not None: rule_set = superdesk.get_resource_service('rule_sets').find_one( _id=provider['rule_set'], req=None) if rule_set and 'body_html' in item: body = item['body_html'] for rule in rule_set['rules']: body = body.replace(rule['old'], rule['new']) item['body_html'] = body return item except Exception as ex: raise ProviderError.ruleError(ex, provider)
def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: return for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: if os.path.isfile(os.path.join(self.path, filename)): filepath = os.path.join(self.path, filename) stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): with open(os.path.join(self.path, filename), 'r') as f: item = self.parser.parse_message(etree.fromstring(f.read()), provider) self.add_timestamps(item) self.move_file(self.path, filename, provider=provider, success=True) yield [item] else: self.move_file(self.path, filename, provider=provider, success=True) except etreeParserError as ex: logger.exception("Ingest Type: AFP - File: {0} could not be processed".format(filename), ex) self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.newsmlOneParserError(ex, provider) except ParserError as ex: self.move_file(self.path, filename, provider=provider, success=False) except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ProviderError.ingestError(ex, provider) push_notification('ingest:update')
def remove_expired(self, provider): try: remove_expired_data(provider) push_notification('ingest:cleaned') except (Exception) as err: logger.exception(err) raise ProviderError.expiredContentError(err, provider)
def filter_expired_items(provider, items): try: days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP) expiration_date = utcnow() - timedelta(days=days_to_keep_content) return [item for item in items if item.get('versioncreated', utcnow()) > expiration_date] except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def filter_expired_items(provider, items): """ Filters out the item from the list of articles to be ingested if they are expired and item['type'] not in provider['content_types']. :param provider: Ingest Provider Details. :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource` :param items: list of items received from the provider :type items: list :return: list of items which can be saved into ingest collection :rtype: list """ def is_not_expired(item): if item.get('expiry') or item.get('versioncreated'): expiry = item.get('expiry', item['versioncreated'] + delta) if expiry.tzinfo: return expiry > utcnow() else: return expiry > datetime.now() return False try: delta = timedelta(minutes=provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES'])) filtered_items = [item for item in items if is_not_expired(item) and item.get(ITEM_TYPE, 'text') in provider.get('content_types', [])] if len(items) != len(filtered_items): logger.debug('Received {0} articles from provider {1}, but only {2} are eligible to be saved in ingest' .format(len(items), provider['name'], len(filtered_items))) return filtered_items except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def process_iptc_codes(item, provider): """ Ensures that the higher level IPTC codes are present by inserting them if missing, for example if given 15039001 (Formula One) make sure that 15039000 (motor racing) and 15000000 (sport) are there as well :param item: A story item :return: A story item with possible expanded subjects """ try: def iptc_already_exists(code): for entry in item['subject']: if 'qcode' in entry and code == entry['qcode']: return True return False for subject in item['subject']: if 'qcode' in subject and len(subject['qcode']) == 8: top_qcode = subject['qcode'][:2] + '000000' if not iptc_already_exists(top_qcode): item['subject'].append({ 'qcode': top_qcode, 'name': subject_codes[top_qcode] }) mid_qcode = subject['qcode'][:5] + '000' if not iptc_already_exists(mid_qcode): item['subject'].append({ 'qcode': mid_qcode, 'name': subject_codes[mid_qcode] }) except Exception as ex: raise ProviderError.iptcError(ex, provider)
def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: return [] for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: filepath = os.path.join(self.path, filename) if os.path.isfile(filepath): stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): with open(filepath, 'r') as f: item = self.parser.parse_message(etree.fromstring(f.read()), provider) self.move_file(self.path, filename, provider=provider, success=True) yield [item] else: self.move_file(self.path, filename, provider=provider, success=True) except etreeParserError as ex: logger.exception("Ingest Type: AAP - File: {0} could not be processed".format(filename)) self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.nitfParserError(ex, provider) except ParserError as ex: self.move_file(self.path, filename, provider=provider, success=False) except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ProviderError.ingestError(ex, provider) push_notification('ingest:update')
def process_iptc_codes(item, provider): """ Ensures that the higher level IPTC codes are present by inserting them if missing, for example if given 15039001 (Formula One) make sure that 15039000 (motor racing) and 15000000 (sport) are there as well :param item: A story item :return: A story item with possible expanded subjects """ try: def iptc_already_exists(code): for entry in item['subject']: if 'qcode' in entry and code == entry['qcode']: return True return False for subject in item['subject']: if 'qcode' in subject and len(subject['qcode']) == 8: top_qcode = subject['qcode'][:2] + '000000' if not iptc_already_exists(top_qcode): item['subject'].append({'qcode': top_qcode, 'name': subject_codes[top_qcode]}) mid_qcode = subject['qcode'][:5] + '000' if not iptc_already_exists(mid_qcode): item['subject'].append({'qcode': mid_qcode, 'name': subject_codes[mid_qcode]}) except Exception as ex: raise ProviderError.iptcError(ex, provider)
def create(self, docs, **kwargs): new_guids = [] provider = get_resource_service("ingest_providers").find_one(source="aapmm", req=None) if provider and "config" in provider and "username" in provider["config"]: self.backend.set_credentials(provider["config"]["username"], provider["config"]["password"]) for doc in docs: if not doc.get("desk"): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw(doc["guid"], doc["guid"]) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc["_id"] = new_id generate_unique_id_and_name(dest_doc) if provider: dest_doc["ingest_provider"] = str(provider[superdesk.config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get("desk"), stage_id=doc.get("stage")) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc["_id"] dest_doc[FAMILY_ID] = archived_doc["_id"] remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get("_id")) return new_guids
def filter_expired_items(provider, items): """Filter out expired items from the list of articles to be ingested. Filte both expired and `item['type'] not in provider['content_types']`. :param provider: Ingest Provider Details. :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource` :param items: list of items received from the provider :type items: list :return: list of items which can be saved into ingest collection :rtype: list """ def is_not_expired(item): if item.get('expiry') or item.get('versioncreated'): expiry = item.get('expiry', item['versioncreated'] + delta) if expiry.tzinfo: return expiry > utcnow() else: return expiry > datetime.now() return False try: delta = timedelta(minutes=provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES'])) filtered_items = [item for item in items if is_not_expired(item) and item.get(ITEM_TYPE, 'text') in provider.get('content_types', [])] if len(items) != len(filtered_items): logger.debug('Received {0} articles from provider {1}, but only {2} are eligible to be saved in ingest' .format(len(items), provider['name'], len(filtered_items))) return filtered_items except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
class TestProviderService(FeedingService): NAME = 'test' ERRORS = [ProviderError.anpaError(None, None).get_error_description()] def _update(self, provider, update): return []
def test_ingest_provider_closed_when_critical_error_raised(self): provider_name = 'AAP' provider = self._get_provider(provider_name) self.assertFalse(provider.get('is_closed')) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.close_provider(provider, ProviderError.anpaError()) provider = self._get_provider(provider_name) self.assertTrue(provider.get('is_closed'))
def test_ingest_provider_closed_when_critical_error_raised(self): provider_name = "AAP" provider = self._get_provider(provider_name) self.assertFalse(provider.get("is_closed")) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.close_provider(provider, ProviderError.anpaError()) provider = self._get_provider(provider_name) self.assertTrue(provider.get("is_closed"))
def filter_expired_items(provider, items): try: days_to_keep_content = provider.get('days_to_keep', DAYS_TO_KEEP) expiration_date = utcnow() - timedelta(days=days_to_keep_content) return [ item for item in items if item.get('versioncreated', utcnow()) > expiration_date ] except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def run(self, provider_type=None): for provider in superdesk.get_resource_service('ingest_providers').get(req=None, lookup={}): if not provider_type or provider_type == provider.get('type'): try: remove_expired_data(provider) except (Exception) as err: logger.exception(err) raise ProviderError.expiredContentError(err, provider) finally: push_notification('ingest:cleaned')
def run(self, provider=None): if provider: try: data = {} data = superdesk.json.loads(provider) data.setdefault('content_expiry', superdesk.app.config['INGEST_EXPIRY_MINUTES']) validator = superdesk.app.validator(superdesk.app.config['DOMAIN']['ingest_providers']['schema'], 'ingest_providers') validation = validator.validate(data) if validation: get_resource_service('ingest_providers').post([data]) return data else: ex = Exception('Failed to add Provider as the data provided is invalid. Errors: {}' .format(str(validator.errors))) raise ProviderError.providerAddError(exception=ex, provider=data) except Exception as ex: raise ProviderError.providerAddError(ex, data)
def test_ingest_provider_closed_when_critical_error_raised(self): provider_name = 'AAP' with self.app.app_context(): provider = self._get_provider(provider_name) self.assertFalse(provider.get('is_closed')) provider_service = self._get_provider_service(provider) provider_service.provider = provider provider_service.close_provider(provider, ProviderError.anpaError()) provider = self._get_provider(provider_name) self.assertTrue(provider.get('is_closed'))
def process_anpa_category(item, provider): try: anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories') if anpa_categories: for anpa_category in anpa_categories['items']: if anpa_category['is_active'] is True \ and item['anpa-category']['qcode'].lower() == anpa_category['value'].lower(): item['anpa-category'] = {'qcode': item['anpa-category']['qcode'], 'name': anpa_category['name']} break except Exception as ex: raise ProviderError.anpaError(ex, provider)
def create(self, docs, **kwargs): search_provider = get_resource_service('search_providers').find_one( search_provider=PROVIDER_NAME, req=None) if not search_provider or search_provider.get('is_closed', False): raise SuperdeskApiError.badRequestError( 'No search provider found or the search provider is closed.') if 'config' in search_provider: self.backend.set_credentials(search_provider['config']) new_guids = [] for doc in docs: if not doc.get( 'desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError( "Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw( doc['guid'], doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, search_provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc[config.ID_FIELD] = new_id generate_unique_id_and_name(dest_doc) if search_provider: dest_doc['ingest_provider'] = str( search_provider[config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD] dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc[config.ID_FIELD]) get_resource_service('search_providers').system_update( search_provider[config.ID_FIELD], {'last_item_update': utcnow()}, search_provider) return new_guids
def run(self, provider_type=None): for provider in superdesk.get_resource_service('ingest_providers').get( req=None, lookup={}): if not provider_type or provider_type == provider.get('type'): try: remove_expired_data(provider) except (Exception) as err: logger.exception(err) raise ProviderError.expiredContentError(err, provider) finally: push_notification('ingest:cleaned')
def filter_expired_items(provider, items): def is_not_expired(item): if item.get("expiry") or item.get("versioncreated"): expiry = item.get("expiry", item["versioncreated"] + delta) if expiry.tzinfo: return expiry > utcnow() return False try: delta = timedelta(minutes=provider.get("content_expiry", INGEST_EXPIRY_MINUTES)) return [item for item in items if is_not_expired(item)] except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def run(self, provider=None): if provider: try: data = {} data = superdesk.json.loads(provider) data.setdefault('name', data['type']) data.setdefault('source', data['type']) data.setdefault('content_expiry', INGEST_EXPIRY_MINUTES) db = superdesk.get_db() db['ingest_providers'].save(data) return data except Exception as ex: raise ProviderError.providerAddError(ex, data)
def run(self, provider=None): if provider: try: data = {} data = superdesk.json.loads(provider) data.setdefault('name', data['type']) data.setdefault('source', data['type']) data.setdefault('days_to_keep', DAYS_TO_KEEP) db = superdesk.get_db() db['ingest_providers'].save(data) return data except Exception as ex: raise ProviderError.providerAddError(ex, data)
def filter_expired_items(provider, items): def is_not_expired(item): if item.get('expiry') or item.get('versioncreated'): expiry = item.get('expiry', item['versioncreated'] + delta) if expiry.tzinfo: return expiry > utcnow() return False try: delta = timedelta(minutes=provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES'])) return [item for item in items if is_not_expired(item)] except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def process_anpa_category(item, provider): try: anpa_categories = superdesk.get_resource_service("vocabularies").find_one(req=None, _id="categories") if anpa_categories: for anpa_category in anpa_categories["items"]: if ( anpa_category["is_active"] is True and item["anpa-category"]["qcode"].lower() == anpa_category["value"].lower() ): item["anpa-category"] = {"qcode": item["anpa-category"]["qcode"], "name": anpa_category["name"]} break except Exception as ex: raise ProviderError.anpaError(ex, provider)
def filter_expired_items(provider, items): def is_not_expired(item): if item.get('expiry') or item.get('versioncreated'): expiry = item.get('expiry', item['versioncreated'] + delta) if expiry.tzinfo: return expiry > utcnow() return False try: delta = timedelta(minutes=provider.get( 'content_expiry', app.config['INGEST_EXPIRY_MINUTES'])) return [item for item in items if is_not_expired(item)] except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def _request(self, url, data): """Perform GET request to given url. It adds predefined headers and auth token if available. :param url :param data """ r = requests.post(url, data=json.dumps(data), headers=self._headers, auth=(self._user, self._password)) if r.status_code < 200 or r.status_code >= 300: logger.error('error fetching url=%s status=%s content=%s' % (url, r.status_code, r.content or '')) raise ProviderError.externalProviderError("Scanpix request can't be performed") return r
def process_anpa_category(item, provider): try: anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories') if anpa_categories: for item_category in item['anpa_category']: for anpa_category in anpa_categories['items']: if anpa_category['is_active'] is True \ and item_category['qcode'].lower() == anpa_category['qcode'].lower(): item_category['name'] = anpa_category['name'] # make the case of the qcode match what we hold in our dictionary item_category['qcode'] = anpa_category['qcode'] break except Exception as ex: raise ProviderError.anpaError(ex, provider)
def run(self, provider): try: data = {} data = superdesk.json.loads(provider) data.setdefault("content_expiry", superdesk.app.config["INGEST_EXPIRY_MINUTES"]) validator = superdesk.app.validator( superdesk.app.config["DOMAIN"]["ingest_providers"]["schema"], "ingest_providers") validation = validator.validate(data) if validation: get_resource_service("ingest_providers").post([data]) return data else: ex = Exception( "Failed to add Provider as the data provided is invalid. Errors: {}" .format(str(validator.errors))) raise ProviderError.providerAddError(exception=ex, provider=data) except Exception as ex: raise ProviderError.providerAddError(ex, data)
def remove_expired(self, provider): lock_name = 'ingest:gc' if not lock(lock_name, expire=300): return try: remove_expired_data(provider) push_notification('ingest:cleaned') except Exception as err: logger.exception(err) raise ProviderError.expiredContentError(err, provider) finally: unlock(lock_name)
def process_anpa_category(item, provider): try: anpa_categories = superdesk.get_resource_service( 'vocabularies').find_one(req=None, _id='categories') if anpa_categories: for anpa_category in anpa_categories['items']: if anpa_category['is_active'] is True \ and item['anpa-category']['qcode'].lower() == anpa_category['value'].lower(): item['anpa-category'] = { 'qcode': item['anpa-category']['qcode'], 'name': anpa_category['name'] } break except Exception as ex: raise ProviderError.anpaError(ex, provider)
def test_raise_ruleError(self): with assert_raises(ProviderError) as error_context: try: ex = Exception("Testing ruleError") raise ex except Exception: raise ProviderError.ruleError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 2003) self.assertTrue(exception.message == "Rule could not be applied") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing ruleError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "ProviderError Error 2003 - Rule could not be applied: " "Testing ruleError on channel TestProvider")
def test_raise_anpaError(self): with assert_raises(ProviderError) as error_context: try: ex = Exception("Testing anpaError") raise ex except Exception: raise ProviderError.anpaError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 2005) self.assertTrue(exception.message == "Anpa category error") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing anpaError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "ProviderError Error 2005 - Anpa category error: " "Testing anpaError on channel TestProvider")
def test_raise_providerFilterExpiredContentError(self): with assert_raises(ProviderError) as error_context: try: ex = Exception("Testing providerFilterExpiredContentError") raise ex except Exception: raise ProviderError.providerFilterExpiredContentError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 2006) self.assertTrue(exception.message == "Expired content could not be filtered") self.assertIsNotNone(exception.system_exception) self.assertEqual(exception.system_exception.args[0], "Testing providerFilterExpiredContentError") self.assertEqual(len(self.mock_logger_handler.messages['error']), 1) self.assertEqual(self.mock_logger_handler.messages['error'][0], "ProviderError Error 2006 - Expired content could not be filtered: " "Testing providerFilterExpiredContentError on channel TestProvider")
def process_anpa_category(item, provider): try: anpa_categories = superdesk.get_resource_service('vocabularies').find_one(req=None, _id='categories') if anpa_categories: for item_category in item['anpa_category']: mapped_category = [c for c in anpa_categories['items'] if c['is_active'] is True and item_category['qcode'].lower() == c['qcode'].lower()] # if the category is not known to the system remove it from the item if len(mapped_category) == 0: item['anpa_category'].remove(item_category) else: item_category['name'] = mapped_category[0]['name'] # make the case of the qcode match what we hold in our dictionary item_category['qcode'] = mapped_category[0]['qcode'] except Exception as ex: raise ProviderError.anpaError(ex, provider)
def process_iptc_codes(item, provider): """Ensures that the higher level IPTC codes are present by inserting them if missing. For example if given 15039001 (Formula One) make sure that 15039000 (motor racing) and 15000000 (sport) are there as well. :param item: A story item :return: A story item with possible expanded subjects """ try: def iptc_already_exists(code): for entry in item["subject"]: if "qcode" in entry and code == entry["qcode"]: return True return False for subject in item["subject"]: if "qcode" in subject and len( subject["qcode"]) == 8 and subject["qcode"].isdigit(): top_qcode = subject["qcode"][:2] + "000000" if not iptc_already_exists(top_qcode): try: item["subject"].append({ "qcode": top_qcode, "name": subject_codes[top_qcode] }) except KeyError: logger.warning( "missing qcode in subject_codes: {qcode}".format( qcode=top_qcode)) continue mid_qcode = subject["qcode"][:5] + "000" if not iptc_already_exists(mid_qcode): try: item["subject"].append({ "qcode": mid_qcode, "name": subject_codes[mid_qcode] }) except KeyError: logger.warning( "missing qcode in subject_codes: {qcode}".format( qcode=mid_qcode)) continue except Exception as ex: raise ProviderError.iptcError(ex, provider)
def test_raise_providerAddError(self): with assert_raises(ProviderError) as error_context: try: ex = Exception("Testing providerAddError") raise ex except Exception: raise ProviderError.providerAddError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 2001) self.assertTrue(exception.message == "Provider could not be saved") self.assertIsNotNone(exception.system_exception) self.assertEquals(exception.system_exception.args[0], "Testing providerAddError") self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1) self.assertEqual( self.mock_logger_handler.messages["error"][0], "ProviderError Error 2001 - Provider could not be saved: " "Testing providerAddError on channel TestProvider", )
def test_raise_ingestError(self): with assert_raises(ProviderError) as error_context: try: ex = Exception("Testing ingestError") raise ex except Exception: raise ProviderError.ingestError(ex, self.provider) exception = error_context.exception self.assertTrue(exception.code == 2004) self.assertTrue(exception.message == "Ingest error") self.assertTrue(exception.provider_name == "TestProvider") self.assertIsNotNone(exception.system_exception) self.assertEquals(exception.system_exception.args[0], "Testing ingestError") self.assertEqual(len(self.mock_logger_handler.messages["error"]), 1) self.assertEqual( self.mock_logger_handler.messages["error"][0], "ProviderError Error 2004 - Ingest error: " "Testing ingestError on channel TestProvider", )
def create(self, docs, **kwargs): search_provider = get_resource_service('search_providers').find_one(search_provider=PROVIDER_NAME, req=None) if not search_provider or search_provider.get('is_closed', False): raise SuperdeskApiError.badRequestError('No search provider found or the search provider is closed.') if 'config' in search_provider: self.backend.set_credentials(search_provider['config']) new_guids = [] for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw(doc['guid'], doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, search_provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc[config.ID_FIELD] = new_id generate_unique_id_and_name(dest_doc) if search_provider: dest_doc['ingest_provider'] = str(search_provider[config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD] dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc[config.ID_FIELD]) get_resource_service('search_providers').system_update(search_provider[config.ID_FIELD], {'last_item_update': utcnow()}, search_provider) return new_guids
def create(self, docs, **kwargs): new_guids = [] provider = self.get_provider() for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError( _("Destination desk cannot be empty.")) try: archived_doc = self.fetch(doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc['_id'] = new_id generate_unique_id_and_name(dest_doc) if provider: dest_doc['ingest_provider'] = str( provider[superdesk.config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc['_id'] dest_doc[FAMILY_ID] = archived_doc['_id'] dest_doc[ITEM_OPERATION] = ITEM_FETCH remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get('_id')) if new_guids: get_resource_service('search_providers').system_update( provider.get(config.ID_FIELD), {'last_item_update': utcnow()}, provider) return new_guids
def create(self, docs, **kwargs): new_guids = [] provider = get_resource_service('ingest_providers').find_one( source='aapmm', req=None) if provider and 'config' in provider and 'username' in provider[ 'config']: self.backend.set_credentials(provider['config']['username'], provider['config']['password']) for doc in docs: if not doc.get('desk'): # if no desk is selected then it is bad request raise SuperdeskApiError.badRequestError( "Destination desk cannot be empty.") try: archived_doc = self.backend.find_one_raw( doc['guid'], doc['guid']) except FileNotFoundError as ex: raise ProviderError.externalProviderError(ex, provider) dest_doc = dict(archived_doc) new_id = generate_guid(type=GUID_TAG) new_guids.append(new_id) dest_doc['_id'] = new_id generate_unique_id_and_name(dest_doc) if provider: dest_doc['ingest_provider'] = str( provider[superdesk.config.ID_FIELD]) dest_doc[config.VERSION] = 1 send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage')) dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED) dest_doc[INGEST_ID] = archived_doc['_id'] dest_doc[FAMILY_ID] = archived_doc['_id'] remove_unwanted(dest_doc) set_original_creator(dest_doc) superdesk.get_resource_service(ARCHIVE).post([dest_doc]) insert_into_versions(dest_doc.get('_id')) return new_guids
def filter_expired_items(provider, items): """Filter out expired items from the list of articles to be ingested. Filte both expired and `item['type'] not in provider['content_types']`. :param provider: Ingest Provider Details. :type provider: dict :py:class: `superdesk.io.ingest_provider_model.IngestProviderResource` :param items: list of items received from the provider :type items: list :return: list of items which can be saved into ingest collection :rtype: list """ try: try: content_expiry = int(provider['content_expiry']) except ValueError: logger.warning( 'invalid content_expiry: content_expiry={value}'.format( value=provider['content_expiry'])) del provider['content_expiry'] content_expiry = None except (TypeError, KeyError): content_expiry = None else: if content_expiry < 0: del provider['content_expiry'] content_expiry = None delta = timedelta( minutes=content_expiry or app.config['INGEST_EXPIRY_MINUTES']) filtered_items = [ item for item in items if is_not_expired(item, delta) and item.get( ITEM_TYPE, 'text') in provider.get('content_types', []) ] if len(items) != len(filtered_items): logger.warning( 'Received {0} articles from provider {1}, but only {2} are eligible to be saved in ingest' .format(len(items), provider['name'], len(filtered_items))) return filtered_items except Exception as ex: raise ProviderError.providerFilterExpiredContentError(ex, provider)
def ingest_item(item, provider, rule_set=None): try: item.setdefault('_id', item['guid']) providers[provider.get('type')].provider = provider item['ingest_provider'] = str(provider['_id']) item.setdefault('source', provider.get('source', '')) set_default_state(item, STATE_INGESTED) if 'anpa-category' in item: process_anpa_category(item, provider) apply_rule_set(item, provider, rule_set) ingest_service = superdesk.get_resource_service('ingest') if item.get('ingest_provider_sequence') is None: ingest_service.set_ingest_provider_sequence(item, provider) rend = item.get('renditions', {}) if rend: baseImageRend = rend.get('baseImage') or next(iter(rend.values())) if baseImageRend: href = providers[provider.get('type')].prepare_href(baseImageRend['href']) update_renditions(item, href) old_item = ingest_service.find_one(_id=item['guid'], req=None) if old_item: ingest_service.put(item['guid'], item) else: try: ingest_service.post([item]) except HTTPException as e: logger.error("Exception while persisting item in ingest collection", e) ingest_service.put(item['guid'], item) except ProviderError: raise except Exception as ex: raise ProviderError.ingestError(ex, provider)
def mock_update(provider): raise ProviderError.anpaError()
from superdesk.errors import SuperdeskApiError, ProviderError from superdesk.io import register_feeding_service, registered_feeding_services from .tests import setup_providers, teardown_providers from superdesk.io.feeding_services import FeedingService from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running from test_factory import SuperdeskTestCase class TestProviderService(FeedingService): def _update(self, provider): return [] register_feeding_service('test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()]) class CeleryTaskRaceTest(SuperdeskTestCase): def test_the_second_update_fails_if_already_running(self): provider = {'_id': 'abc', 'name': 'test provider', 'update_schedule': {'minutes': 1}} removed = mark_task_as_not_running(provider['name'], provider['_id']) self.assertFalse(removed) failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertFalse(failed_to_mark_as_running, 'Failed to mark ingest update as running') failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertTrue(failed_to_mark_as_running, 'Ingest update marked as running, possible race condition')