Пример #1
0
 def __init__(self, *args, **kwargs):
     class Config(object):
         def __init__(self, user, apikey, ssl=True, proxies=None, timeout=None, verify=None):
             self.user = user
             self.key = apikey
             self.ssl = ssl
             self.proxies = proxies
             self.timeout = timeout
             self.verify = verify
     config = Config(*args, **kwargs)
     self.config = config
     self.request = PartialRequest(
         DatasiftAuth(config.user, config.key),
         ssl=config.ssl,
         proxies=config.proxies,
         timeout=config.timeout,
         verify=config.verify)
     self.push = Push(self.request)
     self.historics = Historics(self.request)
     self.historics_preview = HistoricsPreview(self.request)
     self.managed_sources = ManagedSources(self.request)
     self.list = List(self.request)
     # Initialize callbacks
     self._on_delete = None
     self._on_open = None
     self._on_closed = None
     self._on_ds_message = None
     self.opened = False
     self.subscriptions = {}
     #configure live stream
     websocket_protocol = "wss" if self.config.ssl else "ws"
     host = "%s://%s?%s" % (
         websocket_protocol,
         WEBSOCKET_HOST,
         urlencode(dict(username=config.user, api_key=config.key)))
     self.factory = LiveStreamFactory(host, debug=False, useragent=USER_AGENT)
     self._stream_process = Process(target=self._stream)
     self._stream_process_started = False
Пример #2
0
    def __init__(self, *args, **kwargs):
        class Config(object):
            def __init__(self, user, apikey, ssl=True, proxies=None, timeout=None, verify=None):
                self.user = user
                self.key = apikey
                self.ssl = ssl
                self.proxies = proxies
                self.timeout = timeout
                self.verify = verify

        config = Config(*args, **kwargs)
        self.config = config
        self.request = PartialRequest(
            DatasiftAuth(config.user, config.key),
            ssl=config.ssl,
            proxies=config.proxies,
            timeout=config.timeout,
            verify=config.verify,
        )
        self.push = Push(self.request)
        self.historics = Historics(self.request)
        self.historics_preview = HistoricsPreview(self.request)
        self.managed_sources = ManagedSources(self.request)
        # Initialize callbacks
        self._on_delete = None
        self._on_open = None
        self._on_closed = None
        self._on_ds_message = None
        self.opened = False
        self.subscriptions = {}
        # configure live stream
        websocket_protocol = "wss" if self.config.ssl else "ws"
        host = "%s://%s?%s" % (
            websocket_protocol,
            WEBSOCKET_HOST,
            urlencode(dict(username=config.user, api_key=config.key)),
        )
        self.factory = LiveStreamFactory(host, debug=False, useragent=USER_AGENT)
        self._stream_process = Process(target=self._stream)
        self._stream_process_started = False
Пример #3
0
class Client(object):
    """ Datasift client class.

        Used to interact with the DataSift_ REST API.

        .. _DataSift: http://www.datasift.com/

        :param user: username for the DataSift platform
        :type user: str
        :param apikey: API key for the DataSift platform
        :type apikey: str
        :param ssl: (optional) whether to enable SSL, default is True
        :type ssl: bool
        :param proxies: (optional) dict of proxies for requests to use, of the form {"https": "http://*****:*****@myproxyserver:port/" }
        :type proxies: dict
        :param timeout: (optional) seconds to wait for HTTP connections
        :type timeout: float
        :param verify: (optional) whether to verify SSL certificates
        :type verify: bool

        :ivar push: instance of :class:`~datasift.push.Push`
        :ivar historics: instance of :class:`~datasift.historics.Historics`
        :ivar historics_preview: instance of :class:`~datasift.historics_preview.HistoricsPreview`
        :ivar managed_sources: instance of :class:`~datasift.managed_sources.ManagedSources`

   """
    def __init__(self, *args, **kwargs):
        class Config(object):
            def __init__(self, user, apikey, ssl=True, proxies=None, timeout=None, verify=None):
                self.user = user
                self.key = apikey
                self.ssl = ssl
                self.proxies = proxies
                self.timeout = timeout
                self.verify = verify
        config = Config(*args, **kwargs)
        self.config = config
        self.request = PartialRequest(
            DatasiftAuth(config.user, config.key),
            ssl=config.ssl,
            proxies=config.proxies,
            timeout=config.timeout,
            verify=config.verify)
        self.push = Push(self.request)
        self.historics = Historics(self.request)
        self.historics_preview = HistoricsPreview(self.request)
        self.managed_sources = ManagedSources(self.request)
        self.list = List(self.request)
        # Initialize callbacks
        self._on_delete = None
        self._on_open = None
        self._on_closed = None
        self._on_ds_message = None
        self.opened = False
        self.subscriptions = {}
        #configure live stream
        websocket_protocol = "wss" if self.config.ssl else "ws"
        host = "%s://%s?%s" % (
            websocket_protocol,
            WEBSOCKET_HOST,
            urlencode(dict(username=config.user, api_key=config.key)))
        self.factory = LiveStreamFactory(host, debug=False, useragent=USER_AGENT)
        self._stream_process = Process(target=self._stream)
        self._stream_process_started = False

    def start_stream_subscriber(self):
        """ Starts the stream consumer's main loop.

            Called when the stream consumer has been set up with the correct callbacks.
        """
        if not self._stream_process_started:  # pragma: no cover
            if sys.platform.startswith("win"): # if we're on windows we can't expect multiprocessing to work
                self._stream_process_started = True
                self._stream()
            self._stream_process_started = True
            self._stream_process.start()

    def subscribe(self, stream):
        """ Subscribe to a stream.

            :param stream: stream to subscribe to
            :type stream: str
            :raises: :class:`~datasift.exceptions.StreamSubscriberNotStarted`, :class:`~datasift.exceptions.DeleteRequired`, :class:`~datasift.exceptions.StreamNotConnected`

            Used as a decorator, eg.::

                @client.subscribe(stream)
                def subscribe_to_hash(msg):
                    print(msg)
        """
        if not self._stream_process_started:
            raise StreamSubscriberNotStarted()

        def real_decorator(func):
            if not self._on_delete:
                raise DeleteRequired("""An on_delete function is required. You must process delete messages and remove
                 them from your system (if stored) in order to remain compliant with the ToS""")
            if hasattr(self.factory, 'datasift') and 'send_message' in self.factory.datasift:  # pragma: no cover
                self.subscriptions[stream] = func
                self.factory.datasift['send_message'](json.dumps({"action": "subscribe", "hash": stream}).encode("utf8"))
            else:  # pragma: no cover
                raise StreamNotConnected('The client is not connected to DataSift, unable to subscribe to stream')

        return real_decorator

    def on_open(self, func):
        """ Function to set the callback for the opening of a stream.

            Can be called manually::

                def open_callback(data):
                    setup_stream()
                client.on_open(open_callback)

            or as a decorator::

                @client.on_open
                def open_callback():
                    setup_stream()
        """
        self._on_open = func
        if self.opened:  # pragma: no cover
            self._on_open(self)
        return func

    def on_closed(self, func):
        """ Function to set the callback for the closing of a stream.

            Can be called manually::

                def close_callback():
                    teardown_stream()
                client.on_close(close_callback)

            or as a decorator::

                @client.on_close
                def close_callback():
                    teardown_stream()
        """
        self._on_closed = func
        return func

    def on_delete(self, func):
        """ Function to set the callback for the deletion of an item on an active stream.

            Can be called manually::

                def delete_callback(interaction):
                    delete(interaction)
                client.on_delete(delete_callback)

            or as a decorator::

                @client.on_delete
                def delete_callback(interaction):
                    delete(interaction)
        """
        self._on_delete = func
        return func

    def on_ds_message(self, func):
        """ Function to set the callback for an incoming interaction.

            Can be called manually::

                def message_callback(interaction):
                    process(interaction)
                client.on_ds_message(message_callback)

            or as a decorator::

                @client.on_ds_message
                def message_callback(interaction):
                    process(interaction)
        """
        self._on_ds_message = func
        return func

    def _on_open(self):  # pragma: no cover
        self.opened = True
        if self._on_open:
            self._on_open()

    def _on_close(self, was_clean, code, reason):  # pragma: no cover
        if self._on_closed:
            self._on_closed(was_clean, code, reason)

    def _on_message(self, msg, binary):  # pragma: no cover
        interaction = json.loads(msg.decode("utf8"))
        outputmapper(interaction)
        if 'data' in interaction and 'deleted' in interaction['data']:
            if not self._on_delete:
                raise DeleteRequired()  # really should never happen since we check on subscribe but just in case
            self._on_delete(interaction)
        elif 'status' in interaction:
            if self._on_ds_message:
                self._on_ds_message(interaction)
        else:
            stream = interaction['hash']
            if stream in self.subscriptions:
                self.subscriptions[stream](interaction['data'])

    def _stream(self):  # pragma: no cover
        """Runs in a sub-process to perform stream consumption"""
        self.factory.protocol = LiveStream
        self.factory.datasift = {
            'on_open': self._on_open,
            'on_close': self._on_close,
            'on_message': self._on_message,
            'send_message': None
        }
        if self.config.ssl:
            from datasift.twisted_ssl import ClientContextFactory
            connectWS(self.factory, contextFactory=ClientContextFactory())
        else:
            connectWS(self.factory)
        reactor.run()

    def compile(self, csdl):
        """ Compile the given CSDL.

            Uses API documented at http://dev.datasift.com/docs/api/1/compile

            Raises a DataSiftApiException for any error given by the REST API, including CSDL compilation.

            :param csdl: CSDL to compile
            :type csdl: str
            :returns: dict with extra response data
            :rtype: :class:`~datasift.request.DictResponse`
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        return self.request.post('compile', data=dict(csdl=csdl))

    def validate(self, csdl):
        """ Checks if the given CSDL is valid.

            Uses API documented at http://dev.datasift.com/docs/api/1/validate

            :param csdl: CSDL to validate
            :type csdl: str
            :returns: dict with extra response data
            :rtype: :class:`~datasift.request.DictResponse`
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        return self.request.post('validate', data=dict(csdl=csdl))

    def is_valid(self, csdl):
        """ Checks if the given CSDL is valid.

            Uses API documented at http://dev.datasift.com/docs/api/1/validate

            :param csdl: CSDL to validate
            :type csdl: str
            :returns: Boolean indicating the validity of the CSDL
            :rtype: bool
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        try:
            self.validate(csdl)
        except DataSiftApiException as e:
            if e.response.status_code == 400:
                return False
            else:
                raise e
        return True

    def usage(self, period='hour'):
        """ Check the number of objects processed and delivered for a given time period

            Uses API documented at http://dev.datasift.com/docs/api/1/usage

            :param period: (optional) time period to measure usage for, can be one of "day", "hour" or "current" (5 minutes), default is hour
            :type period: str
            :returns: dict with extra response data
            :rtype: :class:`~datasift.request.DictResponse`
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        return self.request.get('usage', params=dict(period=period))

    def dpu(self, hash):
        """ Calculate the DPU cost of consuming a stream.

            Uses API documented at http://dev.datasift.com/docs/api/1/dpu

            :param hash: target CSDL filter hash
            :type hash: str
            :returns: dict with extra response data
            :rtype: :class:`~datasift.request.DictResponse`
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        return self.request.get('dpu', params=dict(hash=hash))

    def balance(self):
        """ Determine your credit or DPU balance

            Uses API documented at http://dev.datasift.com/docs/api/1/balance

            :returns: dict with extra response data
            :rtype: :class:`~datasift.request.DictResponse`
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        return self.request.get('balance')

    def pull(self, subscription_id, size=None, cursor=None):
        """ Pulls a series of interactions from the queue for the given subscription ID.

            Uses API documented at http://dev.datasift.com/docs/api/1/pull

            :param subscription_id: The ID of the subscription to pull interactions for
            :type subscription_id: str
            :param size: the max amount of data to pull in bytes
            :type size: int
            :param cursor: an ID to use as the point in the queue from which to start fetching data
            :type cursor: str
            :returns: dict with extra response data
            :rtype: :class:`~datasift.request.ResponseList`
            :raises: :class:`~datasift.exceptions.DataSiftApiException`, :class:`requests.exceptions.HTTPError`
        """
        params = {'id': subscription_id}
        if size:
            params['size'] = size
        if cursor:
            params['cursor'] = cursor
        raw = self.request('get', 'pull', params=params)

        def pull_parser(headers, data):
            pull_type = headers.get("X-DataSift-Format")
            if pull_type in ("json_meta", "json_array") :
                return json.loads(data)
            else:
                lines = data.strip().split("\n").__iter__()
                return list(map(json.loads, lines))

        return self.request.build_response(raw, parser=pull_parser)