示例#1
0
    def _update(self, provider):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        items = []
        with ftplib.FTP(config.get('host')) as ftp:
            ftp.login(config.get('username'), config.get('password'))
            ftp.cwd(config.get('path', ''))

            for filename, facts in ftp.mlsd():
                if not filename.endswith(self.FILE_SUFFIX):
                    continue

                if last_updated:
                    item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc)
                    if item_last_updated < last_updated:
                        continue

                dest = '%s/%s' % (config['dest_path'], filename)

                try:
                    with open(dest, 'xb') as f:
                        ftp.retrbinary('RETR %s' % filename, f.write)
                except FileExistsError:
                    continue

                xml = etree.parse(dest).getroot()
                items.append(get_xml_parser(xml).parse_message(xml))
        return items
示例#2
0
    def _update(self, provider):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        items = []
        try:
            with ftplib.FTP(config.get('host')) as ftp:
                ftp.login(config.get('username'), config.get('password'))
                ftp.cwd(config.get('path', ''))
                ftp.set_pasv(config.get('passive', False))

                items = []
                for filename, facts in ftp.mlsd():
                    if facts.get('type', '') != 'file':
                        continue

                    if not filename.lower().endswith(self.FILE_SUFFIX):
                        continue

                    if last_updated:
                        item_last_updated = datetime.strptime(
                            facts['modify'],
                            self.DATE_FORMAT).replace(tzinfo=utc)
                        if item_last_updated < last_updated:
                            continue

                    dest = os.path.join(config['dest_path'], filename)

                    try:
                        with open(dest, 'xb') as f:
                            ftp.retrbinary('RETR %s' % filename, f.write)
                    except FileExistsError:
                        continue

                    xml = etree.parse(dest).getroot()
                    parser = get_xml_parser(xml)
                    if not parser:
                        raise IngestFtpError.ftpUnknownParserError(
                            Exception('Parser not found'), provider, filename)
                    parsed = parser.parse_message(xml, provider)
                    if isinstance(parsed, dict):
                        parsed = [parsed]

                    items.append(parsed)
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
示例#3
0
    def _update(self, provider):
        config = provider.get('config', {})
        last_updated = provider.get('last_updated')

        if 'dest_path' not in config:
            config['dest_path'] = tempfile.mkdtemp(prefix='superdesk_ingest_')

        items = []
        try:
            with ftplib.FTP(config.get('host')) as ftp:
                ftp.login(config.get('username'), config.get('password'))
                ftp.cwd(config.get('path', ''))
                ftp.set_pasv(config.get('passive', False))

                items = []
                for filename, facts in ftp.mlsd():
                    if facts.get('type', '') != 'file':
                        continue

                    if not filename.lower().endswith(self.FILE_SUFFIX):
                        continue

                    if last_updated:
                        item_last_updated = datetime.strptime(facts['modify'], self.DATE_FORMAT).replace(tzinfo=utc)
                        if item_last_updated < last_updated:
                            continue

                    dest = os.path.join(config['dest_path'], filename)

                    try:
                        with open(dest, 'xb') as f:
                            ftp.retrbinary('RETR %s' % filename, f.write)
                    except FileExistsError:
                        continue

                    xml = etree.parse(dest).getroot()
                    parser = get_xml_parser(xml)
                    if not parser:
                        raise IngestFtpError.ftpUnknownParserError(Exception('Parser not found'),
                                                                   provider, filename)
                    parsed = parser.parse_message(xml, provider)
                    if isinstance(parsed, dict):
                        parsed = [parsed]

                    items.append(parsed)
            return items
        except IngestFtpError:
            raise
        except Exception as ex:
            raise IngestFtpError.ftpError(ex, provider)
示例#4
0
 def setUpFixture(self, filename):
     self.tree = get_etree(filename)
     provider = {'name': 'Test'}
     self.item = get_xml_parser(self.tree).parse_message(self.tree, provider)[0]
示例#5
0
 def test_get_xml_parser_newsml12(self):
     etree = get_etree('afp.xml')
     self.assertIsInstance(get_xml_parser(etree), NewsMLOneParser)
示例#6
0
 def test_get_xml_parser_nitf(self):
     etree = get_etree('nitf-fishing.xml')
     self.assertIsInstance(get_xml_parser(etree), NITFParser)
示例#7
0
 def test_get_xml_parser_newsmlg2(self):
     etree = get_etree('snep.xml')
     self.assertIsInstance(get_xml_parser(etree), NewsMLTwoParser)
示例#8
0
 def setUpFixture(self, filename):
     self.tree = get_etree(filename)
     self.item = get_xml_parser(self.tree).parse_message(self.tree)[0]