def __new__(metacls, name, bases, attrs): provider_name = attrs.get('PROVIDER') if provider_name is not None: if 'ERRORS' not in attrs: raise AttributeError("Provider class {} must define " "the ERRORS list attribute.".format(name)) if provider_name in providers: raise TypeError("PROVIDER {} already exists ({}).".format( provider_name, providers[provider_name])) new_cls = super().__new__(metacls, name, bases, attrs) if provider_name is not None: register_provider(provider_name, new_cls, new_cls.ERRORS) return new_cls
def __new__(metacls, name, bases, attrs): provider_name = attrs.get('PROVIDER') if provider_name is not None: if 'ERRORS' not in attrs: raise AttributeError( "Provider class {} must define " "the ERRORS list attribute.".format(name)) if provider_name in providers: raise TypeError( "PROVIDER {} already exists ({}).".format( provider_name, providers[provider_name]) ) new_cls = super().__new__(metacls, name, bases, attrs) if provider_name is not None: register_provider(provider_name, new_cls, new_cls.ERRORS) return new_cls
except Exception as error: traceback.print_exc() raise error if response.status_code == 404: raise LookupError('Not found %s' % payload) try: # workaround for httmock lib # return etree.fromstring(response.text.encode('utf-8')) return etree.fromstring(response.content) except UnicodeEncodeError as error: traceback.print_exc() raise error def get_url(self, endpoint): """Get API url for given endpoint.""" return '/'.join([self.URL, endpoint]) def format_date(self, date): """Format date for API usage.""" return date.strftime(self.DATE_FORMAT) def prepare_href(self, href): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, '', '', '')) return '%s?auth_token=%s' % (new_href, self.get_token()) register_provider(PROVIDER, ReutersIngestService())
from superdesk.errors import SuperdeskApiError, ProviderError from superdesk.io import register_provider from .tests import setup_providers, teardown_providers from superdesk.io.ingest_service import IngestService from superdesk.io.commands.remove_expired_content import get_expired_items, RemoveExpiredContent from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running from test_factory import SuperdeskTestCase class TestProviderService(IngestService): def update(self, provider): return [] register_provider('test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()]) class CeleryTaskRaceTest(SuperdeskTestCase): def test_the_second_update_fails_if_already_running(self): provider = {'_id': 'abc', 'name': 'test provider', 'update_schedule': {'minutes': 1}} removed = mark_task_as_not_running(provider['name'], provider['_id']) self.assertFalse(removed) failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertFalse(failed_to_mark_as_running, 'Failed to mark ingest update as running') failed_to_mark_as_running = is_task_running(provider['name'], provider['_id'], {'minutes': 1}) self.assertTrue(failed_to_mark_as_running, 'Ingest update marked as running, possible race condition')
from nose.tools import assert_raises from superdesk import get_resource_service from superdesk.utc import utcnow from superdesk.tests import setup from superdesk.errors import SuperdeskApiError from superdesk.io import register_provider from superdesk.io.tests import setup_providers, teardown_providers from superdesk.io.ingest_service import IngestService class TestProviderService(IngestService): def update(self, provider): return [] register_provider('test', TestProviderService()) class UpdateIngestTest(TestCase): def setUp(self): setup(context=self) setup_providers(self) def tearDown(self): teardown_providers(self) def _get_provider(self, provider_name): return get_resource_service('ingest_providers').find_one( name=provider_name, req=None) def _get_provider_service(self, provider):
"""Create a new content package from given content items. The package's `main` group contains only the references to given items, not the items themselves. In the list of references, the reference to the text item preceeds the references to image items. :param dict text_item: item representing the text content :param list image_items: list of items (dicts) representing the images related to the text content :return: the created content package :rtype: dict """ package = { "type": "composite", "groups": [ {"id": "root", "role": "grpRole:NEP", "refs": [{"idRef": "main"}]}, {"id": "main", "role": "main", "refs": []}, ], } item_references = package["groups"][1]["refs"] item_references.append({"residRef": text_item["guid"]}) for image in image_items: item_references.append({"residRef": image["guid"]}) return package register_provider(PROVIDER, RssIngestService(), errors)
def _update(self, provider): self.provider = provider self.path = provider.get('config', {}).get('path', None) if not self.path: return for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created): try: if os.path.isfile(os.path.join(self.path, filename)): filepath = os.path.join(self.path, filename) stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('last_updated')): with open(os.path.join(self.path, filename), 'r') as f: item = self.parser.parse_message(etree.fromstring(f.read())) self.add_timestamps(item) self.move_file(self.path, filename, success=True) yield [item] else: self.move_file(self.path, filename, success=True) except Exception as err: logger.exception(err) self.move_file(self.path, filename, success=False) push_notification('ingest:update') register_provider(PROVIDER, AFPIngestService())
# -*- coding: utf-8; -*- # # This file is part of Superdesk. # # Copyright 2013, 2014 Sourcefabric z.u. and contributors. # # For the full copyright and license information, please see the # AUTHORS and LICENSE files distributed with this source code, or # at https://www.sourcefabric.org/superdesk/license import apps.io.aap # NOQA import apps.io.afp # NOQA import apps.io.dpa # NOQA import apps.io.reuters # NOQA from superdesk.io import register_provider register_provider('search', None, [])
if not filename.lower().endswith(self.FILE_SUFFIX): continue if last_updated: item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated < last_updated: continue dest = os.path.join(config['dest_path'], filename) try: with open(dest, 'xb') as f: ftp.retrbinary('RETR %s' % filename, f.write) except FileExistsError: continue xml = etree.parse(dest).getroot() parser = get_xml_parser(xml) if not parser: raise IngestFtpError.ftpUnknownParserError(Exception('Parser not found'), provider, filename) items.append(parser.parse_message(xml, provider)) return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider) register_provider('ftp', FTPService())
except imaplib.IMAP4.error: raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider) rv, data = imap.select(config.get('mailbox', None), readonly=False) if rv == 'OK': rv, data = imap.search(None, config.get('filter', None)) if rv == 'OK': new_items = [] for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK': try: new_items.append( self.parser.parse_email(data, provider)) except IngestEmailError: continue imap.close() imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href): return url_for_media(href) register_provider(PROVIDER, EmailReaderService(), errors)
from superdesk.utc import utcnow from superdesk.tests import setup from superdesk.errors import SuperdeskApiError from superdesk.io import register_provider from superdesk.io.tests import setup_providers, teardown_providers from superdesk.io.ingest_service import IngestService from superdesk.io.commands.update_ingest import is_scheduled, update_provider, filter_expired_items, apply_rule_set class TestProviderService(IngestService): def update(self, provider): return [] register_provider('test', TestProviderService()) class UpdateIngestTest(TestCase): def setUp(self): setup(context=self) setup_providers(self) def tearDown(self): teardown_providers(self) def _get_provider(self, provider_name): return get_resource_service('ingest_providers').find_one(name=provider_name, req=None) def _get_provider_service(self, provider): return self.provider_services[provider.get('type')]
self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated < last_updated: continue dest = os.path.join(config['dest_path'], filename) try: with open(dest, 'xb') as f: ftp.retrbinary('RETR %s' % filename, f.write) except FileExistsError: continue xml = etree.parse(dest).getroot() parser = get_xml_parser(xml) if not parser: raise IngestFtpError.ftpUnknownParserError( Exception('Parser not found'), provider, filename) parsed = parser.parse_message(xml, provider) if isinstance(parsed, dict): parsed = [parsed] items.append(parsed) return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider) register_provider('ftp', FTPService(), errors)
:param dict data: parsed data of a single feed entry :param field_aliases: (optional) field name aliases. Used for content fields that are named differently in retrieved data. :type field_aliases: dict or None :return: created content item :rtype: dict """ if field_aliases is None: field_aliases = {} else: field_aliases = merge_dicts(field_aliases) item = dict(type='text') for field in self.item_fields: data_field_name = field_aliases.get( field.name_in_data, field.name_in_data ) field_value = data.get(data_field_name) if (field.type is datetime) and field_value: field_value = utcfromtimestamp(timegm(field_value)) item[field.name] = field_value return item register_provider(PROVIDER, RssIngestService(), errors)
if last_updated: item_last_updated = datetime.strptime( facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc) if item_last_updated < last_updated: continue dest = '%s/%s' % (config['dest_path'], filename) try: with open(dest, 'xb') as f: ftp.retrbinary('RETR %s' % filename, f.write) except FileExistsError: continue xml = etree.parse(dest).getroot() parser = get_xml_parser(xml) if not parser: raise IngestFtpError.ftpUnknownParserError( Exception('Parser not found'), provider, filename) items.append(parser.parse_message(xml, provider)) return items except IngestFtpError: raise except Exception as ex: raise IngestFtpError.ftpError(ex, provider) register_provider('ftp', FTPService())
from superdesk.tests import setup from superdesk.errors import SuperdeskApiError, ProviderError from superdesk.io import register_provider from superdesk.io.tests import setup_providers, teardown_providers from superdesk.io.ingest_service import IngestService from superdesk.io.commands.remove_expired_content import get_expired_items from superdesk.celery_task_utils import mark_task_as_not_running, is_task_running class TestProviderService(IngestService): def update(self, provider): return [] register_provider( 'test', TestProviderService(), [ProviderError.anpaError(None, None).get_error_description()]) class CeleryTaskRaceTest(TestCase): def setUp(self): setup(context=self) def test_the_second_update_fails_if_already_running(self): provider = { '_id': 'abc', 'name': 'test provider', 'update_schedule': { 'minutes': 1 } }
for filename in os.listdir(self.path): try: if os.path.isfile(os.path.join(self.path, filename)): filepath = os.path.join(self.path, filename) stat = os.lstat(filepath) last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc) if self.is_latest_content(last_updated, provider.get('updated')): with open(os.path.join(self.path, filename), 'r') as f: item = self.parser.parse_message( etree.fromstring(f.read())) item['firstcreated'] \ = normalize_date(item.get('firstcreated'), self.tz) item['versioncreated'] \ = normalize_date(item.get('versioncreated'), self.tz) self.move_file(self.path, filename, success=True) yield [item] else: self.move_file(self.path, filename, success=True) except Exception as err: logger.exception(err) self.move_file(self.path, filename, success=False) push_notification('ingest:update') register_provider(PROVIDER, AAPIngestService())
def parse_file(self, filename, provider): try: path = provider.get('config', {}).get('path', None) if not path: return [] item = self.parser.parse_file(os.path.join(path, filename)) return [item] except Exception as ex: self.move_file(self.path, filename, provider=provider, success=False) raise ParserError.parseFileError('Teletype', filename, ex, provider) def parse_file(self, filename, provider): try: path = provider.get('config', {}).get('path', None) if not path: return [] item = self.parser.parse_file(os.path.join(path, filename), provider) return [item] except Exception as ex: raise ParserError.parseFileError('Teletype', filename, ex, provider) register_provider(PROVIDER, TeletypeIngestService(), errors)
except Exception as error: traceback.print_exc() raise error if response.status_code == 404: raise LookupError('Not found %s' % payload) try: # workaround for httmock lib # return etree.fromstring(response.text.encode('utf-8')) return etree.fromstring(response.content) except UnicodeEncodeError as error: traceback.print_exc() raise error def get_url(self, endpoint): """Get API url for given endpoint.""" return '/'.join([self.URL, endpoint]) def format_date(self, date): """Format date for API usage.""" return date.strftime(self.DATE_FORMAT) def prepare_href(self, href): (scheme, netloc, path, params, query, fragment) = urlparse(href) new_href = urlunparse((scheme, netloc, path, '', '', '')) return '%s?auth_token=%s' % (new_href, self.get_token()) register_provider(PROVIDER, ReutersIngestService())
imap = imaplib.IMAP4_SSL(host=server, port=port) try: imap.login(config.get('user', None), config.get('password', None)) except imaplib.IMAP4.error: raise IngestEmailError.emailLoginError(imaplib.IMAP4.error, provider) rv, data = imap.select(config.get('mailbox', None), readonly=False) if rv == 'OK': rv, data = imap.search(None, config.get('filter', None)) if rv == 'OK': new_items = [] for num in data[0].split(): rv, data = imap.fetch(num, '(RFC822)') if rv == 'OK': try: new_items.append(self.parser.parse_email(data, provider)) except IngestEmailError: continue imap.close() imap.logout() except IngestEmailError: raise except Exception as ex: raise IngestEmailError.emailError(ex, provider) return new_items def prepare_href(self, href): return url_for_media(href) register_provider(PROVIDER, EmailReaderService(), errors)