示例#1
0
def streaming_change_generator(namespace_id, poll_interval, timeout,
                               transaction_pointer, exclude_types=None):
    """
    Poll the transaction log for the given `namespace_id` until `timeout`
    expires, and yield each time new entries are detected.
    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to check changes.
    poll_interval: float
        How often to check for changes.
    timeout: float
        How many seconds to allow the connection to remain open.
    transaction_pointer: int, optional
        Yield transaction rows starting after the transaction with id equal to
        `transaction_pointer`.

    """
    encoder = APIEncoder()
    start_time = time.time()
    while time.time() - start_time < timeout:
        with session_scope() as db_session:
            deltas, new_pointer = format_transactions_after_pointer(
                namespace_id, transaction_pointer, db_session, 100,
                _format_transaction_for_delta_sync, exclude_types)
        if new_pointer is not None and new_pointer != transaction_pointer:
            transaction_pointer = new_pointer
            for delta in deltas:
                yield encoder.cereal(delta) + '\n'
        else:
            gevent.sleep(poll_interval)
示例#2
0
def format_output(public_snapshot, include_body):
    # Because we're using a snapshot of the message API representation in the
    # transaction log, we can just return that directly (without the 'body'
    # field if include_body is False).
    encoder = APIEncoder()
    return encoder.cereal({k: v for k, v in public_snapshot.iteritems()
                           if k != 'body' or include_body})
示例#3
0
def format_output(public_snapshot, include_body):
    # Because we're using a snapshot of the message API representation in the
    # transaction log, we can just return that directly (without the 'body'
    # field if include_body is False).
    encoder = APIEncoder()
    return encoder.cereal({
        k: v
        for k, v in public_snapshot.iteritems() if k != 'body' or include_body
    })
示例#4
0
def streaming_change_generator(
    namespace,
    poll_interval,
    timeout,
    transaction_pointer,
    exclude_types=None,
    include_types=None,
    exclude_folders=True,
    exclude_metadata=True,
    exclude_account=True,
    expand=False,
    is_n1=False,
):
    """
    Poll the transaction log for the given `namespace_id` until `timeout`
    expires, and yield each time new entries are detected.
    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to check changes.
    poll_interval: float
        How often to check for changes.
    timeout: float
        How many seconds to allow the connection to remain open.
    transaction_pointer: int, optional
        Yield transaction rows starting after the transaction with id equal to
        `transaction_pointer`.

    """
    encoder = APIEncoder(is_n1=is_n1)
    start_time = time.time()
    while time.time() - start_time < timeout:
        with session_scope(namespace.id) as db_session:
            deltas, new_pointer = format_transactions_after_pointer(
                namespace,
                transaction_pointer,
                db_session,
                100,
                exclude_types,
                include_types,
                exclude_folders,
                exclude_metadata,
                exclude_account,
                expand=expand,
                is_n1=is_n1,
            )

        if new_pointer is not None and new_pointer != transaction_pointer:
            transaction_pointer = new_pointer
            for delta in deltas:
                yield encoder.cereal(delta) + "\n"
        else:
            yield "\n"
            gevent.sleep(poll_interval)
示例#5
0
def streaming_change_generator(
    namespace,
    poll_interval,
    timeout,
    transaction_pointer,
    exclude_types=None,
    include_types=None,
    exclude_folders=True,
    exclude_metadata=True,
    exclude_account=True,
    expand=False,
    is_n1=False,
):
    """
    Poll the transaction log for the given `namespace_id` until `timeout`
    expires, and yield each time new entries are detected.
    Arguments
    ---------
    namespace_id: int
        Id of the namespace for which to check changes.
    poll_interval: float
        How often to check for changes.
    timeout: float
        How many seconds to allow the connection to remain open.
    transaction_pointer: int, optional
        Yield transaction rows starting after the transaction with id equal to
        `transaction_pointer`.

    """
    encoder = APIEncoder(is_n1=is_n1)
    start_time = time.time()
    while time.time() - start_time < timeout:
        with session_scope(namespace.id) as db_session:
            deltas, new_pointer = format_transactions_after_pointer(
                namespace,
                transaction_pointer,
                db_session,
                100,
                exclude_types,
                include_types,
                exclude_folders,
                exclude_metadata,
                exclude_account,
                expand=expand,
                is_n1=is_n1,
            )

        if new_pointer is not None and new_pointer != transaction_pointer:
            transaction_pointer = new_pointer
            for delta in deltas:
                yield encoder.cereal(delta) + "\n"
        else:
            yield "\n"
            gevent.sleep(poll_interval)
示例#6
0
        def g():
            encoder = APIEncoder()

            with session_scope(self.account_id) as db_session:
                for imap_uids in self._search(db_session, search_query):
                    query = db_session.query(Message) \
                        .join(ImapUid) \
                        .filter(ImapUid.account_id == self.account_id,
                                ImapUid.msg_uid.in_(imap_uids))\
                        .order_by(desc(Message.received_date))\

                    yield encoder.cereal(query.all()) + '\n'
示例#7
0
        def g():
            encoder = APIEncoder()

            with session_scope(self.account_id) as db_session:
                try:
                    for imap_uids in self._search(db_session, search_query):
                        query = (
                            db_session.query(Message).join(ImapUid).filter(
                                ImapUid.account_id == self.account_id,
                                ImapUid.msg_uid.in_(imap_uids),
                            ).order_by(desc(Message.received_date)))
                        yield encoder.cereal(query.all()) + "\n"
                except Exception as e:
                    self.log.error("Error while streaming messages", error=e)
示例#8
0
        def g():
            encoder = APIEncoder()

            with session_scope(self.account_id) as db_session:
                for imap_uids in self._search(db_session, search_query):
                    query = (db_session.query(Thread).join(
                        Message,
                        Message.thread_id == Thread.id).join(ImapUid).filter(
                            ImapUid.account_id == self.account_id,
                            ImapUid.msg_uid.in_(imap_uids),
                            Thread.id == Message.thread_id,
                        ).order_by(desc(Message.received_date)))

                    yield encoder.cereal(query.all()) + "\n"
示例#9
0
class WebhookService(object):
    """Asynchronously consumes the transaction log and executes registered
    webhooks."""
    def __init__(self, poll_interval=1, chunk_size=22):
        self.workers = defaultdict(set)
        self.log = get_logger()
        self.poll_interval = poll_interval
        self.chunk_size = chunk_size
        self.minimum_id = 0
        self.poller = None
        self.polling = False
        self.encoder = APIEncoder()
        self._on_startup()

    @property
    def all_active_workers(self):
        worker_sets = self.workers.values()
        if not worker_sets:
            return set()
        return set.union(*worker_sets)

    def register_hook(self, namespace_id, parameters):
        """Register a new webhook.

        Parameters
        ----------
        namespace_id: int
            ID for the namespace to apply the webhook on.
        parameters: dictionary
            Dictionary of the hook parameters.
        """

        # TODO(emfree) do more meaningful parameter validation here
        # (or in the calling code in the API)

        if urlparse.urlparse(parameters.get('callback_url')).scheme != 'https':
            raise ValueError('callback_url MUST be https!')

        with session_scope() as db_session:
            lens = Lens(
                namespace_id=namespace_id,
                subject=parameters.get('subject'),
                thread_public_id=parameters.get('thread'),
                to_addr=parameters.get('to'),
                from_addr=parameters.get('from'),
                cc_addr=parameters.get('cc'),
                bcc_addr=parameters.get('bcc'),
                any_email=parameters.get('any_email'),
                started_before=parameters.get('started_before'),
                started_after=parameters.get('started_after'),
                last_message_before=parameters.get('last_message_before'),
                last_message_after=parameters.get('last_message_after'),
                filename=parameters.get('filename'))

            hook = Webhook(
                namespace_id=namespace_id,
                lens=lens,
                callback_url=parameters.get('callback_url'),
                failure_notify_url=parameters.get('failure_notify_url'),
                include_body=parameters.get('include_body', False),
                active=parameters.get('active', True),
                min_processed_id=self.minimum_id - 1)

            db_session.add(hook)
            db_session.add(lens)
            db_session.commit()
            if hook.active:
                self._start_hook(hook, db_session)
            return self.encoder.cereal(hook, pretty=True)

    def start_hook(self, hook_public_id):
        with session_scope() as db_session:
            hook = db_session.query(Webhook). \
                filter_by(public_id=hook_public_id).one()
            self._start_hook(hook, db_session)

    def _start_hook(self, hook, db_session):
        self.log.info('Starting hook with public id {}'.format(hook.public_id))
        if any(worker.id == hook.id for worker in self.all_active_workers):
            # Hook already has a worker
            return 'OK hook already running'
        hook.min_processed_id = self.minimum_id - 1
        hook.active = True
        namespace_id = hook.namespace_id
        worker = WebhookWorker(hook)
        self.workers[namespace_id].add(worker)
        if not worker.started:
            worker.start()
        db_session.commit()
        if not self.polling:
            self._start_polling()
        return 'OK hook started'

    def stop_hook(self, hook_public_id):
        self.log.info('Stopping hook with public id {}'.format(hook_public_id))
        with session_scope() as db_session:
            hook = db_session.query(Webhook). \
                filter_by(public_id=hook_public_id).one()
            hook.active = False
            db_session.commit()
            for worker in self.workers[hook.namespace_id]:
                if worker.public_id == hook_public_id:
                    self.workers[hook.namespace_id].remove(worker)
                    worker.kill()
                    break

        if not set.union(*self.workers.values()):
            # Kill the transaction log poller if there are no active hooks.
            self._stop_polling()
        return 'OK hook stopped'

    def _on_startup(self):
        self._load_hooks()
        for worker in itertools.chain(*self.workers.values()):
            if not worker.started:
                worker.start()
        # Needed for workers to actually start up.
        gevent.sleep(0)
        if self.all_active_workers:
            self._start_polling()

    def _start_polling(self):
        self.log.info('Start polling')
        self.minimum_id = min(hook.min_processed_id + 1
                              for hook in self.all_active_workers)
        self.poller = gevent.spawn(self._poll)
        self.polling = True

    def _stop_polling(self):

        self.log.info('Stop polling')
        self.poller.kill()
        self.polling = False

    def _poll(self):
        """Poll the transaction log forever and publish events. Only runs when
        there are actually active webhooks."""
        while True:
            self._process_log()
            gevent.sleep(self.poll_interval)

    def _process_log(self):
        """Scan the transaction log `self.chunk_size` entries at a time,
        publishing matching events to registered hooks."""
        with session_scope() as db_session:
            self.log.info('Scanning tx log from id: {}'.format(
                self.minimum_id))
            unprocessed_txn_count = db_session.query(
                func.count(Transaction.id)).filter(
                    Transaction.table_name == 'message',
                    Transaction.id > self.minimum_id).scalar()
            if unprocessed_txn_count:
                self.log.debug('Total of {0} transactions to process'.format(
                    unprocessed_txn_count))

            max_tx_id, = db_session.query(func.max(Transaction.id)).one()
            if max_tx_id is None:
                max_tx_id = 0
            query = db_session.query(Transaction). \
                filter(Transaction.table_name == 'message',
                       Transaction.command == 'insert'). \
                order_by(asc(Transaction.id))
            for transaction in safer_yield_per(query, Transaction.id,
                                               self.minimum_id,
                                               self.chunk_size):
                namespace_id = transaction.namespace_id
                for worker in self.workers[namespace_id]:
                    if worker.match(transaction):
                        worker.enqueue(EventData(transaction))
                self.minimum_id = transaction.id + 1
            self.log.debug('Processed tx. setting min id to {0}'.format(
                self.minimum_id))

    def _load_hooks(self):
        """Load stored hook parameters from the database. Run once on
        startup."""
        with session_scope() as db_session:
            all_hooks = db_session.query(Webhook).filter_by(active=True).all()
            for hook in all_hooks:
                namespace_id = hook.namespace_id
                self.workers[namespace_id].add(WebhookWorker(hook))
示例#10
0
        def g():
            encoder = APIEncoder()

            with session_scope(self.account_id) as db_session:
                yield encoder.cereal(self.search_threads(db_session, search_query)) + '\n'
示例#11
0
        def g():
            encoder = APIEncoder()

            with session_scope(self.account_id) as db_session:
                yield encoder.cereal(
                    self.search_threads(db_session, search_query)) + '\n'
示例#12
0
class WebhookService():
    """Asynchronously consumes the transaction log and executes registered
    webhooks."""
    def __init__(self, poll_interval=1, chunk_size=22):
        self.workers = defaultdict(set)
        self.log = get_logger()
        self.poll_interval = poll_interval
        self.chunk_size = chunk_size
        self.minimum_id = -1
        self.poller = None
        self.polling = False
        self.encoder = APIEncoder()
        self._on_startup()

    @property
    def all_active_workers(self):
        worker_sets = self.workers.values()
        if not worker_sets:
            return set()
        return set.union(*worker_sets)

    def register_hook(self, namespace_id, parameters):
        """Register a new webhook.

        Parameters
        ----------
        namespace_id: int
            ID for the namespace to apply the webhook on.
        parameters: dictionary
            Dictionary of the hook parameters.
        """

        # TODO(emfree) do more meaningful parameter validation here
        # (or in the calling code in the API)

        if urlparse.urlparse(parameters.get('callback_url')).scheme != 'https':
            raise ValueError('callback_url MUST be https!')

        with session_scope() as db_session:
            lens = Lens(
                namespace_id=namespace_id,
                subject=parameters.get('subject'),
                thread_public_id=parameters.get('thread'),
                to_addr=parameters.get('to'),
                from_addr=parameters.get('from'),
                cc_addr=parameters.get('cc'),
                bcc_addr=parameters.get('bcc'),
                any_email=parameters.get('any_email'),
                started_before=parameters.get('started_before'),
                started_after=parameters.get('started_after'),
                last_message_before=parameters.get('last_message_before'),
                last_message_after=parameters.get('last_message_after'),
                filename=parameters.get('filename'))

            hook = Webhook(
                namespace_id=namespace_id,
                lens=lens,
                callback_url=parameters.get('callback_url'),
                failure_notify_url=parameters.get('failure_notify_url'),
                include_body=parameters.get('include_body', False),
                active=parameters.get('active', True),
                min_processed_id=self.minimum_id)

            db_session.add(hook)
            db_session.add(lens)
            db_session.commit()
            if hook.active:
                self._start_hook(hook, db_session)
            return self.encoder.cereal(hook, pretty=True)


    def start_hook(self, hook_public_id):
        with session_scope() as db_session:
            hook = db_session.query(Webhook). \
                filter_by(public_id=hook_public_id).one()
            self._start_hook(hook, db_session)

    def _start_hook(self, hook, db_session):
        self.log.info('Starting hook with public id {}'.format(hook.public_id))
        if any(worker.id == hook.id for worker in self.all_active_workers):
            # Hook already has a worker
            return 'OK hook already running'
        hook.min_processed_id = self.minimum_id
        hook.active = True
        namespace_id = hook.namespace_id
        worker = WebhookWorker(hook)
        self.workers[namespace_id].add(worker)
        if not worker.started:
            worker.start()
        db_session.commit()
        if not self.polling:
            self._start_polling()
        return 'OK hook started'

    def stop_hook(self, hook_public_id):
        self.log.info('Stopping hook with public id {}'.format(hook_public_id))
        with session_scope() as db_session:
            hook = db_session.query(Webhook). \
                filter_by(public_id=hook_public_id).one()
            hook.active = False
            db_session.commit()
            for worker in self.workers[hook.namespace_id]:
                if worker.public_id == hook_public_id:
                    self.workers[hook.namespace_id].remove(worker)
                    worker.kill()
                    break

        if not set.union(*self.workers.values()):
            # Kill the transaction log poller if there are no active hooks.
            self._stop_polling()
        return 'OK hook stopped'

    def _on_startup(self):
        self._load_hooks()
        for worker in itertools.chain(*self.workers.values()):
            if not worker.started:
                worker.start()
        # Needed for workers to actually start up.
        gevent.sleep(0)
        if self.all_active_workers:
            self._start_polling()

    def _start_polling(self):
        self.log.info('Start polling')
        self.minimum_id = min(hook.min_processed_id for hook in
                              self.all_active_workers)
        self.poller = gevent.spawn(self._poll)
        self.polling = True

    def _stop_polling(self):
        self.log.info('Stop polling')
        self.poller.kill()
        self.polling = False

    def _poll(self):
        """Poll the transaction log forever and publish events. Only runs when
        there are actually active webhooks."""
        while True:
            self._process_log()
            gevent.sleep(self.poll_interval)

    def _process_log(self):
        """Scan the transaction log `self.chunk_size` entries at a time,
        publishing matching events to registered hooks."""
        with session_scope() as db_session:
            self.log.info('Scanning tx log from id: {}'.
                          format(self.minimum_id))
            unprocessed_txn_count = db_session.query(
                func.count(Transaction.id)).filter(
                Transaction.table_name == 'message',
                Transaction.id > self.minimum_id).scalar()
            if unprocessed_txn_count:
                self.log.debug('Total of {0} transactions to process'.
                               format(unprocessed_txn_count))

            max_tx_id, = db_session.query(func.max(Transaction.id)).one()
            if max_tx_id is None:
                max_tx_id = 0
            for pointer in range(self.minimum_id, max_tx_id, self.chunk_size):
                # TODO(emfree) add the right index to make this query more
                # performant.
                for transaction in db_session.query(Transaction). \
                        filter(Transaction.table_name == 'message',
                               Transaction.command == 'insert',
                               Transaction.id > pointer,
                               Transaction.id <= pointer + self.chunk_size). \
                        order_by(asc(Transaction.id)):
                    namespace_id = transaction.namespace_id
                    for worker in self.workers[namespace_id]:
                        if worker.match(transaction):
                            worker.enqueue(EventData(transaction))
                    self.minimum_id = transaction.id
            self.log.debug('Processed tx. setting min id to {0}'.
                           format(self.minimum_id))

    def _load_hooks(self):
        """Load stored hook parameters from the database. Run once on
        startup."""
        with session_scope() as db_session:
            all_hooks = db_session.query(Webhook).filter_by(active=True).all()
            for hook in all_hooks:
                namespace_id = hook.namespace_id
                self.workers[namespace_id].add(WebhookWorker(hook))