Пример #1
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.item = NewsMLOneParser().parse_message(
             etree.fromstring(f.read()), provider)
Пример #2
0
class AFPIngestService(FileIngestService):
    """AFP Ingest Service"""

    PROVIDER = 'afp'

    ERRORS = [ParserError.newsmlOneParserError().get_error_description(),
              ProviderError.ingestError().get_error_description()]

    def __init__(self):
        self.parser = NewsMLOneParser()

    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)
        if not self.path:
            return

        for filename in get_sorted_files(self.path, sort_by=FileSortAttributes.created):
            try:
                if os.path.isfile(os.path.join(self.path, filename)):
                    filepath = os.path.join(self.path, filename)
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime, tz=utc)
                    if self.is_latest_content(last_updated, provider.get('last_updated')):
                        with open(os.path.join(self.path, filename), 'r') as f:
                            item = self.parser.parse_message(etree.fromstring(f.read()), provider)

                            self.add_timestamps(item)
                            self.move_file(self.path, filename, provider=provider, success=True)
                            yield [item]
                    else:
                        self.move_file(self.path, filename, provider=provider, success=True)
            except etreeParserError as ex:
                logger.exception("Ingest Type: AFP - File: {0} could not be processed".format(filename), ex)
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ParserError.newsmlOneParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
            except Exception as ex:
                self.move_file(self.path, filename, provider=provider, success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')
Пример #3
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
     provider = {'name': 'Test'}
     with open(fixture) as f:
         self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)
Пример #4
0
class TestCase(unittest.TestCase):
    def setUp(self):
        dirname = os.path.dirname(os.path.realpath(__file__))
        fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
        provider = {'name': 'Test'}
        with open(fixture) as f:
            self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)

    def test_headline(self):
        self.assertEquals(self.item.get('headline'), 'Sweden court accepts receivership for Saab carmaker')

    def test_dateline(self):
        self.assertEquals(self.item.get('dateline'), 'STOCKHOLM, Aug 29, 2014 (AFP) -')

    def test_slugline(self):
        self.assertEquals(self.item.get('slugline'), 'Sweden-SAAB')

    def test_byline(self):
        self.assertEquals(self.item.get('byline'), '')

    def test_language(self):
        self.assertEquals(self.item.get('language'), 'en')

    def test_guid(self):
        self.assertEquals(self.item.get('guid'), 'urn:newsml:afp.com:20140829T135002Z:TX-PAR-FXW86:1')

    def test_coreitemvalues(self):
        self.assertEquals(self.item.get('type'), 'text')
        self.assertEquals(self.item.get('urgency'), '4')
        self.assertEquals(self.item.get('version'), '1')
        self.assertEquals(self.item.get('versioncreated'), datetime.datetime(2014, 8, 29, 13, 49, 51))
        self.assertEquals(self.item.get('firstcreated'), datetime.datetime(2014, 8, 29, 13, 49, 51))
        self.assertEquals(self.item.get('pubstatus'), 'usable')

    def test_subjects(self):
        self.assertEquals(len(self.item.get('subject')), 5)
        self.assertIn({'name': 'automotive equipment', 'qcode': '04011002'}, self.item.get('subject'))
        self.assertIn({'name': 'bankruptcy', 'qcode': '04016007'}, self.item.get('subject'))
        self.assertIn({'name': 'economy, business and finance', 'qcode': '04000000'}, self.item.get('subject'))
        self.assertIn({'name': 'quarterly or semiannual financial statement', 'qcode': '04016038'},
                      self.item.get('subject'))
        self.assertIn({'name': 'manufacturing and engineering', 'qcode': '04011000'}, self.item.get('subject'))

    def test_usageterms(self):
        self.assertEquals(self.item.get('usageterms'), 'NO ARCHIVAL USE')

    def test_genre(self):
        self.assertIn({'name': 'business'}, self.item.get('genre'))
        self.assertIn({'name': 'bankruptcy'}, self.item.get('genre'))

    def test_content_is_text(self):
        self.assertIsInstance(self.item.get('body_html'), type(''))
        self.assertNotRegex(self.item.get('body_html'), '<body.content>')
Пример #5
0
 def setUp(self):
     dirname = os.path.dirname(os.path.realpath(__file__))
     fixture = os.path.join(dirname, "fixtures", "afp.xml")
     provider = {"name": "Test"}
     with open(fixture) as f:
         self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)
Пример #6
0
class TestCase(unittest.TestCase):
    def setUp(self):
        dirname = os.path.dirname(os.path.realpath(__file__))
        fixture = os.path.join(dirname, "fixtures", "afp.xml")
        provider = {"name": "Test"}
        with open(fixture) as f:
            self.item = NewsMLOneParser().parse_message(etree.fromstring(f.read()), provider)

    def test_headline(self):
        self.assertEquals(self.item.get("headline"), "Sweden court accepts receivership for Saab carmaker")

    def test_dateline(self):
        self.assertEquals(self.item.get("dateline"), "STOCKHOLM, Aug 29, 2014 (AFP) -")

    def test_slugline(self):
        self.assertEquals(self.item.get("slugline"), "Sweden-SAAB")

    def test_byline(self):
        self.assertEquals(self.item.get("byline"), "")

    def test_language(self):
        self.assertEquals(self.item.get("language"), "en")

    def test_guid(self):
        self.assertEquals(self.item.get("guid"), "urn:newsml:afp.com:20140829T135002Z:TX-PAR-FXW86:1")

    def test_coreitemvalues(self):
        self.assertEquals(self.item.get("type"), "text")
        self.assertEquals(self.item.get("urgency"), "4")
        self.assertEquals(self.item.get("version"), "1")
        self.assertEquals(self.item.get("versioncreated"), datetime.datetime(2014, 8, 29, 13, 49, 51))
        self.assertEquals(self.item.get("firstcreated"), datetime.datetime(2014, 8, 29, 13, 49, 51))
        self.assertEquals(self.item.get("pubstatus"), "Usable")

    def test_subjects(self):
        self.assertEquals(len(self.item.get("subject")), 5)
        self.assertIn({"name": "automotive equipment", "qcode": "04011002"}, self.item.get("subject"))
        self.assertIn({"name": "bankruptcy", "qcode": "04016007"}, self.item.get("subject"))
        self.assertIn({"name": "economy, business and finance", "qcode": "04000000"}, self.item.get("subject"))
        self.assertIn(
            {"name": "quarterly or semiannual financial statement", "qcode": "04016038"}, self.item.get("subject")
        )
        self.assertIn({"name": "manufacturing and engineering", "qcode": "04011000"}, self.item.get("subject"))

    def test_usageterms(self):
        self.assertEquals(self.item.get("usageterms"), "NO ARCHIVAL USE")

    def test_genre(self):
        self.assertIn({"name": "business"}, self.item.get("genre"))
        self.assertIn({"name": "bankruptcy"}, self.item.get("genre"))

    def test_content_is_text(self):
        self.assertIsInstance(self.item.get("body_html"), type(""))
        self.assertNotRegex(self.item.get("body_html"), "<body.content>")
Пример #7
0
 def __init__(self):
     self.parser = NewsMLOneParser()
Пример #8
0
class TestCase(unittest.TestCase):
    def setUp(self):
        dirname = os.path.dirname(os.path.realpath(__file__))
        fixture = os.path.join(dirname, 'fixtures', 'afp.xml')
        with open(fixture) as f:
            self.item = NewsMLOneParser().parse_message(
                etree.fromstring(f.read()))

    def test_headline(self):
        self.assertEquals(
            self.item.get('headline'),
            'Sweden court accepts receivership for Saab carmaker')

    def test_dateline(self):
        self.assertEquals(self.item.get('dateline'),
                          'STOCKHOLM, Aug 29, 2014 (AFP) -')

    def test_slugline(self):
        self.assertEquals(self.item.get('slugline'), 'Sweden-SAAB')

    def test_byline(self):
        self.assertEquals(self.item.get('byline'), '')

    def test_language(self):
        self.assertEquals(self.item.get('language'), 'en')

    def test_guid(self):
        self.assertEquals(
            self.item.get('guid'),
            'urn:newsml:afp.com:20140829T135002Z:TX-PAR-FXW86:1')

    def test_coreitemvalues(self):
        self.assertEquals(self.item.get('type'), 'text')
        self.assertEquals(self.item.get('urgency'), '4')
        self.assertEquals(self.item.get('version'), '1')
        self.assertEquals(self.item.get('versioncreated'),
                          datetime.datetime(2014, 8, 29, 13, 49, 51))
        self.assertEquals(self.item.get('firstcreated'),
                          datetime.datetime(2014, 8, 29, 13, 49, 51))
        self.assertEquals(self.item.get('pubstatus'), 'Usable')

    def test_subjects(self):
        self.assertEquals(len(self.item.get('subject')), 5)
        self.assertIn({
            'name': 'automotive equipment',
            'qcode': '04011002'
        }, self.item.get('subject'))
        self.assertIn({
            'name': 'bankruptcy',
            'qcode': '04016007'
        }, self.item.get('subject'))
        self.assertIn(
            {
                'name': 'economy, business and finance',
                'qcode': '04000000'
            }, self.item.get('subject'))
        self.assertIn(
            {
                'name': 'quarterly or semiannual financial statement',
                'qcode': '04016038'
            }, self.item.get('subject'))
        self.assertIn(
            {
                'name': 'manufacturing and engineering',
                'qcode': '04011000'
            }, self.item.get('subject'))

    def test_usageterms(self):
        self.assertEquals(self.item.get('usageterms'), 'NO ARCHIVAL USE')

    def test_genre(self):
        self.assertIn({'name': 'business'}, self.item.get('genre'))
        self.assertIn({'name': 'bankruptcy'}, self.item.get('genre'))
Пример #9
0
 def __init__(self):
     self.parser = NewsMLOneParser()
Пример #10
0
class AFPIngestService(FileIngestService):
    """AFP Ingest Service"""

    PROVIDER = 'afp'

    ERRORS = [
        ParserError.newsmlOneParserError().get_error_description(),
        ProviderError.ingestError().get_error_description()
    ]

    def __init__(self):
        self.parser = NewsMLOneParser()

    def _update(self, provider):
        self.provider = provider
        self.path = provider.get('config', {}).get('path', None)
        if not self.path:
            return

        for filename in get_sorted_files(self.path,
                                         sort_by=FileSortAttributes.created):
            try:
                if os.path.isfile(os.path.join(self.path, filename)):
                    filepath = os.path.join(self.path, filename)
                    stat = os.lstat(filepath)
                    last_updated = datetime.fromtimestamp(stat.st_mtime,
                                                          tz=utc)
                    if self.is_latest_content(last_updated,
                                              provider.get('last_updated')):
                        with open(os.path.join(self.path, filename), 'r') as f:
                            item = self.parser.parse_message(
                                etree.fromstring(f.read()), provider)

                            self.add_timestamps(item)
                            self.move_file(self.path,
                                           filename,
                                           provider=provider,
                                           success=True)
                            yield [item]
                    else:
                        self.move_file(self.path,
                                       filename,
                                       provider=provider,
                                       success=True)
            except etreeParserError as ex:
                logger.exception(
                    "Ingest Type: AFP - File: {0} could not be processed".
                    format(filename), ex)
                self.move_file(self.path,
                               filename,
                               provider=provider,
                               success=False)
                raise ParserError.newsmlOneParserError(ex, provider)
            except ParserError as ex:
                self.move_file(self.path,
                               filename,
                               provider=provider,
                               success=False)
            except Exception as ex:
                self.move_file(self.path,
                               filename,
                               provider=provider,
                               success=False)
                raise ProviderError.ingestError(ex, provider)

        push_notification('ingest:update')