Пример #1
0
def init_app(app):
    # we have to set URL field here, because config is not available at time
    # of parsing
    url = join(app.config['SERVER_URL'], 'newsworthy')
    url_field = NewsworthyFeedingService.fields[0]
    assert url_field['id'] == 'url'
    url_field['default_value'] = url
    # init_app can be called several times during tests
    # so we skip registration if we have an AlreadyExistsError
    try:
        register_feeding_service(NewsworthyFeedingService)
    except superdesk.errors.AlreadyExistsError:
        pass
    else:
        register_feeding_service_parser(NewsworthyFeedingService.NAME, 'ninjs')
        service = NewsworthyWebhookService()
        resource = NewsworthyWebhookResource("newsworthy", app=app, service=service)
        resource.authentication = NewsworthyFeedingServiceAuth
Пример #2
0
        # we need to access config to set the URL, so we do it here
        field = next(f for f in cls.fields if f["type"] == "url_request")
        field["url"] = join(app.config["SERVER_URL"], "login", "google",
                            "{URL_ID}")

    def _test(self, provider):
        self._update(provider, update=None, test=True)

    def authenticate(self, provider: dict, config: dict) -> imaplib.IMAP4_SSL:
        oauth2_token_service = superdesk.get_resource_service("oauth2_token")
        token = oauth2_token_service.find_one(req=None, _id=provider["url_id"])
        if token is None:
            raise IngestEmailError.notConfiguredError(ValueError(
                l_("You need to log in first")),
                                                      provider=provider)
        imap = imaplib.IMAP4_SSL("imap.gmail.com")

        if token["expires_at"].timestamp() < time.time() + 600:
            logger.info("Refreshing token for {provider_name}".format(
                provider_name=provider["name"]))
            token = oauth.refresh_google_token(token["_id"])

        auth_string = "user={email}\x01auth=Bearer {token}\x01\x01".format(
            email=token["email"], token=token["access_token"])
        imap.authenticate("XOAUTH2", lambda __: auth_string.encode())
        return imap


register_feeding_service(GMailFeedingService)
register_feeding_service_parser(GMailFeedingService.NAME, "email_rfc822")
Пример #3
0
        """

        payload = {'id': id}
        tree = self._get_tree('item', payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get('groups', []):
            for ref in group.get('refs', []):
                if 'residRef' in ref:
                    items.extend(self._parse_items(ref.get('residRef')))

        return items

    def prepare_href(self, href, mimetype=None):
        (scheme, netloc, path, params, query, fragment) = urlparse(href)
        new_href = urlunparse((scheme, netloc, path, '', '', ''))
        return '%s?auth_token=%s' % (new_href, self._get_auth_token(self.provider, update=True))


register_feeding_service(ReutersHTTPFeedingService)
register_feeding_service_parser(ReutersHTTPFeedingService.NAME, 'newsml2')
Пример #4
0
                                    "media":
                                    media_id,
                                    "filename":
                                    fileName,
                                    "title":
                                    'attachment',
                                    "description":
                                    "email's attachment"
                                }])
                                if ids:
                                    attachments.append(
                                        {'attachment': next(iter(ids), None)})
                            except Exception as ex:
                                logger.error(
                                    "cannot add attachment for %s, %s" %
                                    (fileName, ex.args[0]))
                                app.media.delete(media_id)

                if attachments:
                    for item in items:
                        if item['type'] == 'text':
                            item['attachments'] = attachments
                            item[
                                'ednote'] = 'The story has %s attachment(s)' % str(
                                    len(attachments))


register_feeding_service(EmailBelgaFeedingService)
register_feeding_service_parser(EmailBelgaFeedingService.NAME,
                                EMailRFC822FeedParser.NAME)
Пример #5
0
        :param items: dict with events, ntbId used as a key
        :type items: dict
        :return: a list of events
        """

        req = ParsedRequest()
        req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1})
        req.max_results = len(items)

        existing_items = superdesk.get_resource_service('events').get_from_mongo(
            req,
            {
                'ntb_id': {
                    '$in': [ntb_id for ntb_id in items.keys()]
                }
            }
        )
        for existing_item in existing_items:
            if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED:
                # update event
                items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD]
            else:
                # remove event when it has a state different from 'ingested'
                del items[existing_item['ntb_id']]

        return [items[i] for i in items.keys()]


register_feeding_service(NTBEventsApiFeedingService)
register_feeding_service_parser(NTBEventsApiFeedingService.NAME, 'ntb_events_api_xml')
Пример #6
0
        for item in items:
            if embed:
                urls = re.findall(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-;]|[\[\]?@_~]|'
                                  r'(?:%[0-9a-fA-F][0-9a-fA-F]))+',
                                  item.get('body_html', ''))
                for url in set(urls):
                    embed_content = self._create_embed(url, key)
                    if embed_content:
                        item['body_html'] += '<!-- EMBED START Twitter -->'
                        item['body_html'] += embed_content
                        item['body_html'] += '<!-- EMBED END Twitter -->'
        return [items]

    def _create_embed(self, url, key):
        """
        Get embed html from iframely service for provided url
        """
        response = requests.get('https://iframe.ly/api/oembed?url={}&api_key={}'.format(url, key))
        content = response.json()
        if response.status_code == 200:
            return content.get('html', '')
        elif response.status_code == 403:
            raise IngestTwitterBelgaError.TwitterInvalidIframelyKey()
        # when turn off setting: On URL errors, don't repeat it as HTTP status (use code 200 instead)
        # iframely will return 417 response on URL error
        return ''


register_feeding_service(TwitterBelgaFeedingService)
register_feeding_service_parser(TwitterBelgaFeedingService.NAME, None)
Пример #7
0
    ]

    def __init__(self):
        self.fields_cache = {}

    def _update(self, provider, update):
        user = provider['config']['wufoo_username']
        wufoo_data = {
            "url": WUFOO_URL.format(subdomain=user),
            "user": user,
            "api_key": provider['config']['wufoo_api_key'],
            "form_query_entries_tpl": WUFOO_QUERY_FORM + WUFOO_QUERY_ENTRIES,
            "update": update}
        try:
            parser = self.get_feed_parser(provider, None)
        except requests.exceptions.Timeout as ex:
            raise IngestApiError.apiTimeoutError(ex, provider)
        except requests.exceptions.TooManyRedirects as ex:
            raise IngestApiError.apiRedirectError(ex, provider)
        except requests.exceptions.RequestException as ex:
            raise IngestApiError.apiRequestError(ex, provider)
        except Exception as error:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(error, self.provider)
        items = parser.parse(wufoo_data, provider)
        return [items]


register_feeding_service(WufooFeedingService)
register_feeding_service_parser(WufooFeedingService.NAME, 'wufoo')
Пример #8
0
                try:
                    rv, data = imap.search(None,
                                           config.get('filter', '(UNSEEN)'))
                    if rv != 'OK':
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK' and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)


register_feeding_service(EmailFeedingService)
register_feeding_service_parser(EmailFeedingService.NAME, 'email_rfc822')
                                if not byline.startswith('By '):
                                    byline_prefix = 'By '
                                byline_found = elem_text.lower().startswith(
                                    '{}{}'.format(byline_prefix,
                                                  byline).lower())
                            else:
                                byline_found = elem_text.startswith('By ')
                                if byline_found:
                                    item['byline'] = elem_text

                    # remove the byline from the body text
                    if not byline_found:
                        elements.append('<%s>%s</%s>' % (tag, elem_text, tag))

                    line_counter += 1

            content = dict()
            content['contenttype'] = tree.attrib['contenttype']
            if len(elements) > 0:
                content['content'] = "\n".join(elements)
            elif body.text:
                content['content'] = '<pre>' + body.text + '</pre>'
                content['format'] = CONTENT_TYPE.PREFORMATTED
            return content


register_feed_parser(ReutersNewsMLTwoFeedParser.NAME,
                     ReutersNewsMLTwoFeedParser())
register_feeding_service_parser(ReutersHTTPFeedingService.NAME,
                                ReutersNewsMLTwoFeedParser.NAME)
Пример #10
0
                    raise IngestEmailError.emailMailboxError()
                try:
                    rv, data = imap.search(None, config.get('filter', '(UNSEEN)'))
                    if rv != 'OK':
                        raise IngestEmailError.emailFilterError()
                    for num in data[0].split():
                        rv, data = imap.fetch(num, '(RFC822)')
                        if rv == 'OK' and not test:
                            try:
                                parser = self.get_feed_parser(provider, data)
                                new_items.append(parser.parse(data, provider))
                                rv, data = imap.store(num, '+FLAGS', '\\Seen')
                            except IngestEmailError:
                                continue
                finally:
                    imap.close()
            finally:
                imap.logout()
        except IngestEmailError:
            raise
        except Exception as ex:
            raise IngestEmailError.emailError(ex, provider)
        return new_items

    def prepare_href(self, href, mimetype=None):
        return url_for_media(href, mimetype)


register_feeding_service(EmailFeedingService)
register_feeding_service_parser(EmailFeedingService.NAME, 'email_rfc822')
Пример #11
0
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                try:
                    requests.get(URL_ACK, params=ack_params)
                except Exception:
                    raise IngestApiError.apiRequestError(Exception('error while doing the request'))

        return [items]


register_feeding_service(RitzauFeedingService)
register_feeding_service_parser(RitzauFeedingService.NAME, 'ritzau')
Пример #12
0
#
# This file is part of Superdesk.
#
# Copyright 2013, 2014 Sourcefabric z.u. and contributors.
#
# For the full copyright and license information, please see the
# AUTHORS and LICENSE files distributed with this source code, or
# at https://www.sourcefabric.org/superdesk/license

from superdesk.io.feeding_services import FTPFeedingService
from superdesk.io.registry import register_feeding_service, register_feeding_service_parser


class NTBEventsFTPFeedingService(FTPFeedingService):
    """
    Feeding Service class which can read events from NTB via FTP
    """

    NAME = 'ntb_events_ftp'
    label = 'NTB Events FTP'
    service = 'events'


register_feeding_service(NTBEventsFTPFeedingService)
register_feeding_service_parser(NTBEventsFTPFeedingService.NAME,
                                'ntb_events_api_xml')
register_feeding_service_parser(NTBEventsFTPFeedingService.NAME,
                                'ntb_event_xml')
register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ics20')
register_feeding_service_parser(NTBEventsFTPFeedingService.NAME, 'ntb_nifs')
Пример #13
0
                item_details['media_link'] = media_link

        return item_details

    def _fetch_media_link(self, source_id, item_id):
        """
        Fetch a list of all available renditions for an item and return a link to file

        :param source_id:
        :param item_id:
        :return str or None: link to the image or None
        """
        # fetch media renditions
        media_renditions = self.get_url(
            url=self.HTTP_ITEM_MEDIA_LIST_URL.format(source_id=source_id, item_id=item_id)
        )
        for rend in media_renditions:
            if rend.get('kind') in self.ALLOWED_MEDIA_KINDS and rend.get('mimeType') in self.ALLOWED_MEDIA_MIMETYPES:
                media_id = rend.get('id')
                if media_id:
                    return self.HTTP_ITEM_MEDIA_DETAILS_URL.format(
                        source_id=source_id,
                        item_id=item_id,
                        media_id=media_id
                    )
                return None


register_feeding_service(ANPNewsApiFeedingService)
register_feeding_service_parser(ANPNewsApiFeedingService.NAME, 'anp_news_api')
Пример #14
0
def init_app(_app):
    register_feeding_service(CNAFeedingService)
    register_feeding_service_parser(CNAFeedingService.NAME, None)
Пример #15
0
            'firstcreated':
            text_item['firstcreated'],
            'versioncreated':
            text_item['versioncreated'],
            'headline':
            text_item.get('headline', ''),
            'groups': [{
                'id': 'root',
                'role': 'grpRole:NEP',
                'refs': [{
                    'idRef': 'main'
                }],
            }, {
                'id': 'main',
                'role': 'main',
                'refs': [],
            }]
        }

        item_references = package['groups'][1]['refs']
        item_references.append({'residRef': text_item['guid']})

        for image in image_items:
            item_references.append({'residRef': image['guid']})

        return package


register_feeding_service(RSSFeedingService)
register_feeding_service_parser(RSSFeedingService.NAME, None)
Пример #16
0
        try:
            root_elt = etree.fromstring(r.text)
        except Exception:
            raise IngestApiError.apiRequestError(Exception('error while parsing the request answer'))

        try:
            if root_elt.xpath('(//error/text())[1]')[0] != '0':
                err_msg = root_elt.xpath('(//errormsg/text())[1]')[0]
                raise IngestApiError.apiRequestError(Exception('error code returned by API: {msg}'.format(msg=err_msg)))
        except IndexError:
            raise IngestApiError.apiRequestError(Exception('Invalid XML, <error> element not found'))

        parser = self.get_feed_parser(provider)
        items = []
        for elt in root_elt.xpath('//RBNews'):
            item = parser.parse(elt, provider)
            items.append(item)
            if not url_override:
                try:
                    queue_id = elt.xpath('.//ServiceQueueId/text()')[0]
                except IndexError:
                    raise IngestApiError.apiRequestError(Exception('missing ServiceQueueId element'))
                ack_params = {'user': user, 'password': password, 'servicequeueid': queue_id}
                self.get_url(URL_ACK, params=ack_params)

        return [items]


register_feeding_service(RitzauFeedingService)
register_feeding_service_parser(RitzauFeedingService.NAME, 'ritzau')
Пример #17
0
                        location[0]['name'],
                        'address.line':
                        location[0]['address']['line'],
                        'address.country':
                        location[0]['address']['country'],
                    }))
                if saved_location and status == 'UPDATED':
                    location_service.patch(
                        saved_location[0][superdesk.config.ID_FIELD],
                        location[0])
                elif not saved_location:
                    _location = deepcopy(location)
                    location_service.post(_location)
                    item['location'][0]['qcode'] = _location[0]['guid']

            old_item = events_service.find_one(guid=item[GUID_FIELD], req=None)
            if not old_item:
                if not status:
                    item.setdefault('firstcreated', datetime.now())
                    item.setdefault('versioncreated', datetime.now())
                    list_items.append(item)
            else:
                old_item.update(item)
                list_items.append(old_item)
        return list_items


register_feeding_service(SpreadsheetFeedingService)
register_feeding_service_parser(SpreadsheetFeedingService.NAME,
                                'belgaspreadsheet')
Пример #18
0
        """

        payload = {"id": id}
        tree = self._get_tree("item", payload)

        parser = self.get_feed_parser(self.provider, tree)
        items = parser.parse(tree, self.provider)

        return items

    def _fetch_items_in_package(self, item):
        """
        Fetch remote assets for given item.
        """
        items = []
        for group in item.get("groups", []):
            for ref in group.get("refs", []):
                if "residRef" in ref:
                    items.extend(self._parse_items(ref.get("residRef")))

        return items

    def prepare_href(self, href, mimetype=None):
        (scheme, netloc, path, params, query, fragment) = urlparse(href)
        new_href = urlunparse((scheme, netloc, path, "", "", ""))
        return "%s?auth_token=%s" % (new_href, self._get_auth_token(self.provider, update=True))


register_feeding_service(ReutersHTTPFeedingService)
register_feeding_service_parser(ReutersHTTPFeedingService.NAME, "newsml2")
                            byline = item.get('byline') or ''
                            if byline:
                                byline_prefix = ''
                                if not byline.startswith('By '):
                                    byline_prefix = 'By '
                                byline_found = elem_text.lower().startswith('{}{}'.format(byline_prefix,
                                                                                          byline).lower())
                            else:
                                byline_found = elem_text.startswith('By ')
                                if byline_found:
                                    item['byline'] = elem_text

                    # remove the byline from the body text
                    if not byline_found:
                        elements.append('<%s>%s</%s>' % (tag, elem_text, tag))

                    line_counter += 1

            content = dict()
            content['contenttype'] = tree.attrib['contenttype']
            if len(elements) > 0:
                content['content'] = "\n".join(elements)
            elif body.text:
                content['content'] = '<pre>' + body.text + '</pre>'
                content['format'] = CONTENT_TYPE.PREFORMATTED
            return content


register_feed_parser(ReutersNewsMLTwoFeedParser.NAME, ReutersNewsMLTwoFeedParser())
register_feeding_service_parser(ReutersHTTPFeedingService.NAME, ReutersNewsMLTwoFeedParser.NAME)