def step_impl_fetch_from_provider_ingest(context, provider_name, guid): with context.app.test_request_context(context.app.config['URL_PREFIX']): ingest_provider_service = get_resource_service('ingest_providers') provider = ingest_provider_service.find_one(name=provider_name, req=None) provider_service = get_feeding_service(provider['feeding_service']) file_path = os.path.join(provider.get('config', {}).get('path', ''), guid) feeding_parser = provider_service.get_feed_parser(provider) if isinstance(feeding_parser, XMLFeedParser): with open(file_path, 'rb') as f: xml_string = etree.etree.fromstring(f.read()) parsed = feeding_parser.parse(xml_string, provider) else: parsed = feeding_parser.parse(file_path, provider) items = [parsed] if not isinstance(parsed, list) else parsed for item in items: item['versioncreated'] = utcnow() item['expiry'] = utcnow() + timedelta(minutes=20) failed = context.ingest_items(items, provider, provider_service) assert len(failed) == 0, failed provider = ingest_provider_service.find_one(name=provider_name, req=None) ingest_provider_service.system_update(provider['_id'], {LAST_ITEM_UPDATE: utcnow()}, provider) for item in items: set_placeholder(context, '{}.{}'.format(provider_name, item['guid']), item['_id'])
def update_provider(provider, rule_set=None, routing_scheme=None, sync=False): """Fetch items from ingest provider, ingest them into Superdesk and update the provider. :param provider: Ingest Provider data :param rule_set: Translation Rule Set if one is associated with Ingest Provider. :param routing_scheme: Routing Scheme if one is associated with Ingest Provider. :param sync: Running in sync mode from cli. """ lock_name = get_lock_id('ingest', provider['name'], provider[superdesk.config.ID_FIELD]) if not lock(lock_name, expire=UPDATE_TTL + 10): if sync: logger.error('update is already running for %s', provider['name']) return try: feeding_service = get_feeding_service(provider['feeding_service']) update = {LAST_UPDATED: utcnow()} if sync: provider[LAST_UPDATED] = utcnow() - timedelta(days=9999) # import everything again generator = feeding_service.update(provider, update) if isinstance(generator, list): generator = (items for items in generator) failed = None while True: try: items = generator.send(failed) failed = ingest_items(items, provider, feeding_service, rule_set, routing_scheme) update_last_item_updated(update, items) except StopIteration: break # Some Feeding Services update the collection and by this time the _etag might have been changed. # So it's necessary to fetch it once again. Otherwise, OriginalChangedError is raised. ingest_provider_service = superdesk.get_resource_service('ingest_providers') provider = ingest_provider_service.find_one(req=None, _id=provider[superdesk.config.ID_FIELD]) ingest_provider_service.system_update(provider[superdesk.config.ID_FIELD], update, provider) if LAST_ITEM_UPDATE not in update and get_is_idle(provider): admins = superdesk.get_resource_service('users').get_users_by_user_type('administrator') notify_and_add_activity( ACTIVITY_EVENT, 'Provider {{name}} has gone strangely quiet. Last activity was on {{last}}', resource='ingest_providers', user_list=admins, name=provider.get('name'), last=provider[LAST_ITEM_UPDATE].replace(tzinfo=timezone.utc).astimezone(tz=None).strftime("%c")) logger.info('Provider {0} updated'.format(provider[superdesk.config.ID_FIELD])) if LAST_ITEM_UPDATE in update: # Only push a notification if there has been an update push_notification('ingest:update', provider_id=str(provider[superdesk.config.ID_FIELD])) except Exception as e: logger.error("Failed to ingest file: {error}".format(error=e)) raise IngestFileError(3000, e, provider) finally: unlock(lock_name)
def _test_config(self, updates, original=None): provider = original.copy() if original else {} provider.update(updates) try: service = get_feeding_service(provider['feeding_service']) except KeyError: return service.config_test(provider)
def remove_expired_data(provider): """Remove expired data for provider""" logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items')) try: feeding_service = get_feeding_service(provider['feeding_service']) ingest_collection = feeding_service.service if hasattr( feeding_service, 'service') else 'ingest' except KeyError: ingest_collection = 'ingest' ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item['_id'] for item in items] items.rewind() file_ids = [ rend.get('media') for item in items for rend in item.get('renditions', {}).values() if not item.get('archived') and rend.get('media') ] if ids: logger.info('Removing items %s' % ids) ingest_service.delete({'_id': {'$in': ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info('Deleting file: %s' % file_id) superdesk.app.media.delete(file_id) logger.info('Removed expired content for provider: {0} count: {1}'.format( provider.get('_id', 'Detached items'), len(ids))) remove_expired_from_elastic(ingest_collection)
def remove_expired_data(provider): """Remove expired data for provider""" logger.info("Removing expired content for provider: %s" % provider.get("_id", "Detached items")) try: feeding_service = get_feeding_service(provider["feeding_service"]) ingest_collection = feeding_service.service if hasattr( feeding_service, "service") else "ingest" except KeyError: ingest_collection = "ingest" ingest_service = superdesk.get_resource_service(ingest_collection) items = get_expired_items(provider, ingest_collection) ids = [item["_id"] for item in items] items.rewind() file_ids = [ rend.get("media") for item in items for rend in item.get("renditions", {}).values() if not item.get("archived") and rend.get("media") ] if ids: logger.info("Removing items %s" % ids) ingest_service.delete({"_id": {"$in": ids}}) push_expired_notification(ids) for file_id in file_ids: logger.info("Deleting file: %s" % file_id) superdesk.app.media.delete(file_id) logger.info("Removed expired content for provider: {0} count: {1}".format( provider.get("_id", "Detached items"), len(ids))) remove_expired_from_elastic(ingest_collection)
def _get_provider_service(self, provider): return get_feeding_service(provider["feeding_service"])
def test_ingest_update_same_event(self): xml = ET.fromstring( """<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?> <document> <guid>NTB-123456</guid> <time>2016-08-10T15:02:02</time> <publiseres>True</publiseres> <ntbId>NBRP160810_144545_ja_00</ntbId> <service>newscalendar</service> <title>Original Content</title> <location>Fr. Nansens plass 17, Tromsø, Troms</location> <timeStart>2016-09-05T09:00:00</timeStart> <timeEnd>2016-09-05T16:00:00</timeEnd> <alldayevent>False</alldayevent> <priority>5</priority> <regions> <region>Norge</region> </regions> <districts> <district parent="Norge">Troms</district> </districts> <category>Innenriks</category> <subcategory>Redplan element</subcategory> <subjects> <subject>Kriminalitet og rettsvesen</subject> <subject parent="Kriminalitet">Drap;Rettssaker</subject> </subjects> <emailwriter>[email protected]</emailwriter> <messagetype>Redplan redaksjon</messagetype> <geo> <latitude>69.65482639999999</latitude> <longitude>18.96509590000005</longitude> </geo> <content>Original Content</content> <mediaList> <media id="" mediaType="" mimeType="ukjent"> <caption></caption> </media> </mediaList> </document>""") with self.app.test_request_context(self.app.config['URL_PREFIX']): # ingest event events = self.get_parsed_documents( registered_feed_parsers.get('ntb_event_xml'), xml) provider = get_resource_service('ingest_providers').find_one( req=None, _id=self.providers.get('ntbevent')) self.ingest_items(events, provider, get_feeding_service('event_file')) ingested_event = get_resource_service('events').find_one( req=None, _id='NTB-123456') self.assertTrue(ingested_event['_id'], 'NTB-123456') self.assertTrue(ingested_event['name'], 'Original Content') self.assertTrue(ingested_event['dates']['start'], '2016-09-05T09:00:00') self.assertTrue(ingested_event['dates']['end'], '2016-09-05T16:00:00') self.assertTrue( ingested_event['_planning_schedule'][0]['scheduled'], '2016-09-05T09:00:00') # ingest updated event events = self.get_parsed_documents( registered_feed_parsers.get('ntb_event_xml'), xml) events[0]['dates']['start'] = '2016-09-06T10:00:00' events[0]['dates']['end'] = '2016-09-06T14:00:00' events[0]['name'] = 'Updated Content' self.ingest_items(events, provider, get_feeding_service('event_file')) ingested_event = get_resource_service('events').find_one( req=None, _id='NTB-123456') self.assertTrue(ingested_event['_id'], 'NTB-123456') self.assertTrue(ingested_event['name'], 'Updated Content') self.assertTrue(ingested_event['dates']['start'], '2016-09-05T09:00:00') self.assertTrue(ingested_event['dates']['end'], '2016-09-05T16:00:00') self.assertTrue( ingested_event['_planning_schedule'][0]['scheduled'], '2016-09-16T16:00:00')