示例#1
0
    def _fetch_data(self, config, provider):
        url = config['url']
        api_key = config['api_key']

        last_update = provider.get(
            'last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S')

        # Results are pagified so we'll read this many at a time
        offset_jump = 10

        params = {'start': last_update, 'limit': offset_jump}
        headers = {'apikey': api_key}

        items = []

        offset = 0
        while True:
            params['offset'] = offset

            try:
                response = requests.get(url,
                                        params=params,
                                        headers=headers,
                                        timeout=30)
            except requests.exceptions.ConnectionError as err:
                raise IngestApiError.apiConnectionError(exception=err)

            if response.ok:
                # The total number of results are given to us in json, get them
                # via a regex to read the field so we don't have to convert the
                # whole thing to json pointlessly
                item_ident = re.search('\"total\": *[0-9]*',
                                       response.text).group()
                results_str = re.search('[0-9]+', item_ident).group()

                if results_str is None:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.text), provider)

                num_results = int(results_str)

                if num_results > 0:
                    items.append(response.text)

                if offset >= num_results:
                    return items

                offset += offset_jump
            else:
                if re.match('Error: No API Key provided', response.text):
                    raise IngestApiError.apiAuthError(Exception(response.text),
                                                      provider)
                elif response.status_code == 404:
                    raise IngestApiError.apiNotFoundError(
                        Exception(response.reason), provider)
                else:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.reason), provider)

        return items
示例#2
0
    def _test(self, provider):
        config = provider.get('config', {})
        url = config['url']
        api_key = config['api_key']

        # limit the data to a single article and filter out all article fields
        # to save bandwidth
        params = {'limit': 1, 'fields': 'id'}
        headers = {'apikey': api_key}

        try:
            response = requests.get(url,
                                    params=params,
                                    headers=headers,
                                    timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err)

        if not response.ok:
            if response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
示例#3
0
    def _fetch_data(self, config, provider):
        url = config['url']
        api_key = config['api_key']

        last_update = provider.get('last_updated', utcfromtimestamp(0)).strftime('%Y-%m-%dT%H:%M:%S')

        # Results are pagified so we'll read this many at a time
        offset_jump = 10

        params = {'start': last_update, 'limit': offset_jump}
        headers = {'apikey': api_key}

        items = []

        offset = 0
        while True:
            params['offset'] = offset

            try:
                response = requests.get(url, params=params, headers=headers, timeout=30)
            except requests.exceptions.ConnectionError as err:
                raise IngestApiError.apiConnectionError(exception=err)

            if response.ok:
                # The total number of results are given to us in json, get them
                # via a regex to read the field so we don't have to convert the
                # whole thing to json pointlessly
                item_ident = re.search('\"total\": *[0-9]*', response.text).group()
                results_str = re.search('[0-9]+', item_ident).group()

                if results_str is None:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.text), provider)

                num_results = int(results_str)

                if num_results > 0:
                    items.append(response.text)

                if offset >= num_results:
                    return items

                offset += offset_jump
            else:
                if re.match('Error: No API Key provided', response.text):
                    raise IngestApiError.apiAuthError(
                        Exception(response.text), provider)
                elif response.status_code == 404:
                    raise IngestApiError.apiNotFoundError(
                        Exception(response.reason), provider)
                else:
                    raise IngestApiError.apiGeneralError(
                        Exception(response.reason), provider)

        return items
    def _fetch_data(self, config, provider):
        """Fetch the latest feed data.

        :param dict config: RSS resource configuration
        :param provider: data provider instance, needed as an argument when
            raising ingest errors
        :return: fetched RSS data
        :rtype: str

        :raises IngestApiError: if fetching data fails for any reason
            (e.g. authentication error, resource not found, etc.)
        """
        url = config['url']

        if config.get('auth_required', False):
            auth = (config.get('username'), config.get('password'))
            self.auth_info = {
                'username': config.get('username', ''),
                'password': config.get('password', '')
            }
        else:
            auth = None

        try:
            response = requests.get(url, auth=auth, timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err,
                                                    provider=provider)
        except requests.exceptions.RequestException as err:
            raise IngestApiError.apiURLError(exception=err, provider=provider)

        if response.ok:
            return response.content
        else:
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(Exception(response.reason),
                                                  provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
示例#5
0
    def _fetch_data(self, config, provider):
        """Fetch the latest feed data.

        :param dict config: RSS resource configuration
        :param provider: data provider instance, needed as an argument when
            raising ingest errors
        :return: fetched RSS data
        :rtype: str

        :raises IngestApiError: if fetching data fails for any reason
            (e.g. authentication error, resource not found, etc.)
        """
        url = config['url']

        if config.get('auth_required', False):
            auth = (config.get('username'), config.get('password'))
            self.auth_info = {
                'username': config.get('username', ''),
                'password': config.get('password', '')
            }
        else:
            auth = None

        try:
            response = requests.get(url, auth=auth, timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err, provider=provider)
        except requests.exceptions.RequestException as err:
            raise IngestApiError.apiURLError(exception=err, provider=provider)

        if response.ok:
            return response.content
        else:
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(
                    Exception(response.reason), provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
示例#6
0
    def _test(self, provider):
        config = provider.get('config', {})
        url = config['url']
        api_key = config['api_key']

        # limit the data to a single article and filter out all article fields
        # to save bandwidth
        params = {'limit': 1, 'fields': 'id'}
        headers = {'apikey': api_key}

        try:
            response = requests.get(url, params=params, headers=headers, timeout=30)
        except requests.exceptions.ConnectionError as err:
            raise IngestApiError.apiConnectionError(exception=err)

        if not response.ok:
            if response.status_code == 404:
                raise IngestApiError.apiNotFoundError(
                    Exception(response.reason), provider)
            else:
                raise IngestApiError.apiGeneralError(
                    Exception(response.reason), provider)
示例#7
0
class NTBEventsApiFeedingService(HTTPFeedingServiceBase):
    """
    Feeding Service class which can read events from NTB API using HTTP
    """

    NAME = 'ntb_events_api'
    ERRORS = [
        SuperdeskIngestError.notConfiguredError().get_error_description(),
        IngestApiError.apiTimeoutError().get_error_description(),
        IngestApiError.apiConnectionError().get_error_description(),
    ]
    REQUESTS_PER_UPDATE = 4
    EVENTS_PER_REQUEST = 25
    HTTP_TIMEOUT = 20

    label = 'NTB Events API'
    fields = [
        {
            'id': 'url', 'type': 'text', 'label': 'Feed URL',
            'placeholder': 'Feed URL', 'required': True
        }] + HTTPFeedingServiceBase.AUTH_FIELDS
    service = 'events'

    def _update(self, provider, update):
        """
        Fetch events from external API.

        :param provider: Ingest Provider Details.
        :type provider: dict
        :param update: Any update that is required on provider.
        :type update: dict
        :return: a list of events which can be saved.
        """
        all_items = OrderedDict()
        self._provider = provider
        provider_private = self._provider.get('private', {})
        offset = provider_private.get('search', {}).get('offset', 0)

        for _ in range(self.REQUESTS_PER_UPDATE):
            response = self._send_request(offset + len(all_items))
            xml = etree.fromstring(response.content)
            items = self._parse_events(xml=xml)

            if items:
                all_items.update(items)
            else:
                break

        if all_items:
            update['private'] = {
                'search': {
                    'offset': offset + len(all_items)
                }
            }
            all_items = self._filter_items(all_items)
        else:
            update['is_closed'] = True
            update['last_closed'] = {
                'closed_at': utcnow(),
                'message': 'Ingesting was finished.'
            }

        return [all_items]

    def _send_request(self, offset):
        """
        Execute http request to external API

        :param offset: offset provided in request payload
        :type offset: int
        :return: http response
        :raises IngestApiError.apiTimeoutError
        :raises IngestApiError.apiConnectionError
        :raises IngestApiError.apiRequestError
        :raises IngestApiError.apiGeneralError
        :raises IngestApiError.apiAuthError
        :raises IngestApiError.apiNotFoundError
        """
        payload = {
            'search.offset': offset,
            'search.showNumResults': self.EVENTS_PER_REQUEST
        }
        url = self._provider['config']['url'].strip()

        return self.get_url(url, params=payload)

    def _parse_events(self, xml):
        """
        Parse xml document and returns list of events

        :param xml: xml document
        :type xml: lxml.etree._Element
        :return: a list of events
        """
        parser = self.get_feed_parser(self._provider, article=xml)
        return OrderedDict(
            (item['ntb_id'], item) for item in parser.parse(xml)
        )

    def _filter_items(self, items):
        """
        Remove events which are exist in the db.

        :param items: dict with events, ntbId used as a key
        :type items: dict
        :return: a list of events
        """

        req = ParsedRequest()
        req.projection = json.dumps({'ntb_id': 1, 'guid': 1, ITEM_STATE: 1})
        req.max_results = len(items)

        existing_items = superdesk.get_resource_service('events').get_from_mongo(
            req,
            {
                'ntb_id': {
                    '$in': [ntb_id for ntb_id in items.keys()]
                }
            }
        )
        for existing_item in existing_items:
            if existing_item.get(ITEM_STATE) == WORKFLOW_STATE.INGESTED:
                # update event
                items[existing_item['ntb_id']][GUID_FIELD] = existing_item[GUID_FIELD]
            else:
                # remove event when it has a state different from 'ingested'
                del items[existing_item['ntb_id']]

        return [items[i] for i in items.keys()]
    def get_url(self, url=None, **kwargs):
        """Do an HTTP Get on URL

        :param string url: url to use (None to use self.HTTP_URL)
        :param **kwargs: extra parameter for requests
        :return requests.Response: response
        """
        if not url:
            url = self.HTTP_URL
        config = self.config
        user = config.get('username')
        password = config.get('password')
        if user:
            user = user.strip()
        if password:
            password = password.strip()

        auth_required = config.get('auth_required', self.HTTP_AUTH)
        if auth_required is None:
            # auth_required may not be user in the feeding service
            # in this case with use authentification only if user
            # and password are set.
            auth_required = bool(user and password)

        if auth_required:
            if not user:
                raise SuperdeskIngestError.notConfiguredError(
                    "user is not configured")
            if not password:
                raise SuperdeskIngestError.notConfiguredError(
                    "password is not configured")
            kwargs.setdefault('auth', (user, password))

        params = kwargs.pop("params", {})
        if params or self.HTTP_DEFAULT_PARAMETERS:
            # if we have default parameters, we want them to be overriden
            # by conflicting params given in arguments
            if self.HTTP_DEFAULT_PARAMETERS:
                params.update(self.HTTP_DEFAULT_PARAMETERS)
            kwargs["params"] = params

        try:
            response = requests.get(url, timeout=self.HTTP_TIMEOUT, **kwargs)
        except requests.exceptions.Timeout as exception:
            raise IngestApiError.apiTimeoutError(exception, self.provider)
        except requests.exceptions.ConnectionError as exception:
            raise IngestApiError.apiConnectionError(exception, self.provider)
        except requests.exceptions.RequestException as exception:
            raise IngestApiError.apiRequestError(exception, self.provider)
        except Exception as exception:
            traceback.print_exc()
            raise IngestApiError.apiGeneralError(exception, self.provider)

        if not response.ok:
            exception = Exception(response.reason)
            if response.status_code in (401, 403):
                raise IngestApiError.apiAuthError(exception, self.provider)
            elif response.status_code == 404:
                raise IngestApiError.apiNotFoundError(exception, self.provider)
            else:
                raise IngestApiError.apiGeneralError(exception, self.provider)

        return response