def streaming_change_generator(namespace_id, poll_interval, timeout, transaction_pointer, exclude_types=None): """ Poll the transaction log for the given `namespace_id` until `timeout` expires, and yield each time new entries are detected. Arguments --------- namespace_id: int Id of the namespace for which to check changes. poll_interval: float How often to check for changes. timeout: float How many seconds to allow the connection to remain open. transaction_pointer: int, optional Yield transaction rows starting after the transaction with id equal to `transaction_pointer`. """ encoder = APIEncoder() start_time = time.time() while time.time() - start_time < timeout: with session_scope() as db_session: deltas, new_pointer = format_transactions_after_pointer( namespace_id, transaction_pointer, db_session, 100, _format_transaction_for_delta_sync, exclude_types) if new_pointer is not None and new_pointer != transaction_pointer: transaction_pointer = new_pointer for delta in deltas: yield encoder.cereal(delta) + '\n' else: gevent.sleep(poll_interval)
def format_output(public_snapshot, include_body): # Because we're using a snapshot of the message API representation in the # transaction log, we can just return that directly (without the 'body' # field if include_body is False). encoder = APIEncoder() return encoder.cereal({k: v for k, v in public_snapshot.iteritems() if k != 'body' or include_body})
def format_output(public_snapshot, include_body): # Because we're using a snapshot of the message API representation in the # transaction log, we can just return that directly (without the 'body' # field if include_body is False). encoder = APIEncoder() return encoder.cereal({ k: v for k, v in public_snapshot.iteritems() if k != 'body' or include_body })
def streaming_change_generator( namespace, poll_interval, timeout, transaction_pointer, exclude_types=None, include_types=None, exclude_folders=True, exclude_metadata=True, exclude_account=True, expand=False, is_n1=False, ): """ Poll the transaction log for the given `namespace_id` until `timeout` expires, and yield each time new entries are detected. Arguments --------- namespace_id: int Id of the namespace for which to check changes. poll_interval: float How often to check for changes. timeout: float How many seconds to allow the connection to remain open. transaction_pointer: int, optional Yield transaction rows starting after the transaction with id equal to `transaction_pointer`. """ encoder = APIEncoder(is_n1=is_n1) start_time = time.time() while time.time() - start_time < timeout: with session_scope(namespace.id) as db_session: deltas, new_pointer = format_transactions_after_pointer( namespace, transaction_pointer, db_session, 100, exclude_types, include_types, exclude_folders, exclude_metadata, exclude_account, expand=expand, is_n1=is_n1, ) if new_pointer is not None and new_pointer != transaction_pointer: transaction_pointer = new_pointer for delta in deltas: yield encoder.cereal(delta) + "\n" else: yield "\n" gevent.sleep(poll_interval)
def g(): encoder = APIEncoder() with session_scope(self.account_id) as db_session: for imap_uids in self._search(db_session, search_query): query = db_session.query(Message) \ .join(ImapUid) \ .filter(ImapUid.account_id == self.account_id, ImapUid.msg_uid.in_(imap_uids))\ .order_by(desc(Message.received_date))\ yield encoder.cereal(query.all()) + '\n'
def g(): encoder = APIEncoder() with session_scope(self.account_id) as db_session: try: for imap_uids in self._search(db_session, search_query): query = ( db_session.query(Message).join(ImapUid).filter( ImapUid.account_id == self.account_id, ImapUid.msg_uid.in_(imap_uids), ).order_by(desc(Message.received_date))) yield encoder.cereal(query.all()) + "\n" except Exception as e: self.log.error("Error while streaming messages", error=e)
def g(): encoder = APIEncoder() with session_scope(self.account_id) as db_session: for imap_uids in self._search(db_session, search_query): query = (db_session.query(Thread).join( Message, Message.thread_id == Thread.id).join(ImapUid).filter( ImapUid.account_id == self.account_id, ImapUid.msg_uid.in_(imap_uids), Thread.id == Message.thread_id, ).order_by(desc(Message.received_date))) yield encoder.cereal(query.all()) + "\n"
class WebhookService(object): """Asynchronously consumes the transaction log and executes registered webhooks.""" def __init__(self, poll_interval=1, chunk_size=22): self.workers = defaultdict(set) self.log = get_logger() self.poll_interval = poll_interval self.chunk_size = chunk_size self.minimum_id = 0 self.poller = None self.polling = False self.encoder = APIEncoder() self._on_startup() @property def all_active_workers(self): worker_sets = self.workers.values() if not worker_sets: return set() return set.union(*worker_sets) def register_hook(self, namespace_id, parameters): """Register a new webhook. Parameters ---------- namespace_id: int ID for the namespace to apply the webhook on. parameters: dictionary Dictionary of the hook parameters. """ # TODO(emfree) do more meaningful parameter validation here # (or in the calling code in the API) if urlparse.urlparse(parameters.get('callback_url')).scheme != 'https': raise ValueError('callback_url MUST be https!') with session_scope() as db_session: lens = Lens( namespace_id=namespace_id, subject=parameters.get('subject'), thread_public_id=parameters.get('thread'), to_addr=parameters.get('to'), from_addr=parameters.get('from'), cc_addr=parameters.get('cc'), bcc_addr=parameters.get('bcc'), any_email=parameters.get('any_email'), started_before=parameters.get('started_before'), started_after=parameters.get('started_after'), last_message_before=parameters.get('last_message_before'), last_message_after=parameters.get('last_message_after'), filename=parameters.get('filename')) hook = Webhook( namespace_id=namespace_id, lens=lens, callback_url=parameters.get('callback_url'), failure_notify_url=parameters.get('failure_notify_url'), include_body=parameters.get('include_body', False), active=parameters.get('active', True), min_processed_id=self.minimum_id - 1) db_session.add(hook) db_session.add(lens) db_session.commit() if hook.active: self._start_hook(hook, db_session) return self.encoder.cereal(hook, pretty=True) def start_hook(self, hook_public_id): with session_scope() as db_session: hook = db_session.query(Webhook). \ filter_by(public_id=hook_public_id).one() self._start_hook(hook, db_session) def _start_hook(self, hook, db_session): self.log.info('Starting hook with public id {}'.format(hook.public_id)) if any(worker.id == hook.id for worker in self.all_active_workers): # Hook already has a worker return 'OK hook already running' hook.min_processed_id = self.minimum_id - 1 hook.active = True namespace_id = hook.namespace_id worker = WebhookWorker(hook) self.workers[namespace_id].add(worker) if not worker.started: worker.start() db_session.commit() if not self.polling: self._start_polling() return 'OK hook started' def stop_hook(self, hook_public_id): self.log.info('Stopping hook with public id {}'.format(hook_public_id)) with session_scope() as db_session: hook = db_session.query(Webhook). \ filter_by(public_id=hook_public_id).one() hook.active = False db_session.commit() for worker in self.workers[hook.namespace_id]: if worker.public_id == hook_public_id: self.workers[hook.namespace_id].remove(worker) worker.kill() break if not set.union(*self.workers.values()): # Kill the transaction log poller if there are no active hooks. self._stop_polling() return 'OK hook stopped' def _on_startup(self): self._load_hooks() for worker in itertools.chain(*self.workers.values()): if not worker.started: worker.start() # Needed for workers to actually start up. gevent.sleep(0) if self.all_active_workers: self._start_polling() def _start_polling(self): self.log.info('Start polling') self.minimum_id = min(hook.min_processed_id + 1 for hook in self.all_active_workers) self.poller = gevent.spawn(self._poll) self.polling = True def _stop_polling(self): self.log.info('Stop polling') self.poller.kill() self.polling = False def _poll(self): """Poll the transaction log forever and publish events. Only runs when there are actually active webhooks.""" while True: self._process_log() gevent.sleep(self.poll_interval) def _process_log(self): """Scan the transaction log `self.chunk_size` entries at a time, publishing matching events to registered hooks.""" with session_scope() as db_session: self.log.info('Scanning tx log from id: {}'.format( self.minimum_id)) unprocessed_txn_count = db_session.query( func.count(Transaction.id)).filter( Transaction.table_name == 'message', Transaction.id > self.minimum_id).scalar() if unprocessed_txn_count: self.log.debug('Total of {0} transactions to process'.format( unprocessed_txn_count)) max_tx_id, = db_session.query(func.max(Transaction.id)).one() if max_tx_id is None: max_tx_id = 0 query = db_session.query(Transaction). \ filter(Transaction.table_name == 'message', Transaction.command == 'insert'). \ order_by(asc(Transaction.id)) for transaction in safer_yield_per(query, Transaction.id, self.minimum_id, self.chunk_size): namespace_id = transaction.namespace_id for worker in self.workers[namespace_id]: if worker.match(transaction): worker.enqueue(EventData(transaction)) self.minimum_id = transaction.id + 1 self.log.debug('Processed tx. setting min id to {0}'.format( self.minimum_id)) def _load_hooks(self): """Load stored hook parameters from the database. Run once on startup.""" with session_scope() as db_session: all_hooks = db_session.query(Webhook).filter_by(active=True).all() for hook in all_hooks: namespace_id = hook.namespace_id self.workers[namespace_id].add(WebhookWorker(hook))
def g(): encoder = APIEncoder() with session_scope(self.account_id) as db_session: yield encoder.cereal(self.search_threads(db_session, search_query)) + '\n'
def g(): encoder = APIEncoder() with session_scope(self.account_id) as db_session: yield encoder.cereal( self.search_threads(db_session, search_query)) + '\n'
class WebhookService(): """Asynchronously consumes the transaction log and executes registered webhooks.""" def __init__(self, poll_interval=1, chunk_size=22): self.workers = defaultdict(set) self.log = get_logger() self.poll_interval = poll_interval self.chunk_size = chunk_size self.minimum_id = -1 self.poller = None self.polling = False self.encoder = APIEncoder() self._on_startup() @property def all_active_workers(self): worker_sets = self.workers.values() if not worker_sets: return set() return set.union(*worker_sets) def register_hook(self, namespace_id, parameters): """Register a new webhook. Parameters ---------- namespace_id: int ID for the namespace to apply the webhook on. parameters: dictionary Dictionary of the hook parameters. """ # TODO(emfree) do more meaningful parameter validation here # (or in the calling code in the API) if urlparse.urlparse(parameters.get('callback_url')).scheme != 'https': raise ValueError('callback_url MUST be https!') with session_scope() as db_session: lens = Lens( namespace_id=namespace_id, subject=parameters.get('subject'), thread_public_id=parameters.get('thread'), to_addr=parameters.get('to'), from_addr=parameters.get('from'), cc_addr=parameters.get('cc'), bcc_addr=parameters.get('bcc'), any_email=parameters.get('any_email'), started_before=parameters.get('started_before'), started_after=parameters.get('started_after'), last_message_before=parameters.get('last_message_before'), last_message_after=parameters.get('last_message_after'), filename=parameters.get('filename')) hook = Webhook( namespace_id=namespace_id, lens=lens, callback_url=parameters.get('callback_url'), failure_notify_url=parameters.get('failure_notify_url'), include_body=parameters.get('include_body', False), active=parameters.get('active', True), min_processed_id=self.minimum_id) db_session.add(hook) db_session.add(lens) db_session.commit() if hook.active: self._start_hook(hook, db_session) return self.encoder.cereal(hook, pretty=True) def start_hook(self, hook_public_id): with session_scope() as db_session: hook = db_session.query(Webhook). \ filter_by(public_id=hook_public_id).one() self._start_hook(hook, db_session) def _start_hook(self, hook, db_session): self.log.info('Starting hook with public id {}'.format(hook.public_id)) if any(worker.id == hook.id for worker in self.all_active_workers): # Hook already has a worker return 'OK hook already running' hook.min_processed_id = self.minimum_id hook.active = True namespace_id = hook.namespace_id worker = WebhookWorker(hook) self.workers[namespace_id].add(worker) if not worker.started: worker.start() db_session.commit() if not self.polling: self._start_polling() return 'OK hook started' def stop_hook(self, hook_public_id): self.log.info('Stopping hook with public id {}'.format(hook_public_id)) with session_scope() as db_session: hook = db_session.query(Webhook). \ filter_by(public_id=hook_public_id).one() hook.active = False db_session.commit() for worker in self.workers[hook.namespace_id]: if worker.public_id == hook_public_id: self.workers[hook.namespace_id].remove(worker) worker.kill() break if not set.union(*self.workers.values()): # Kill the transaction log poller if there are no active hooks. self._stop_polling() return 'OK hook stopped' def _on_startup(self): self._load_hooks() for worker in itertools.chain(*self.workers.values()): if not worker.started: worker.start() # Needed for workers to actually start up. gevent.sleep(0) if self.all_active_workers: self._start_polling() def _start_polling(self): self.log.info('Start polling') self.minimum_id = min(hook.min_processed_id for hook in self.all_active_workers) self.poller = gevent.spawn(self._poll) self.polling = True def _stop_polling(self): self.log.info('Stop polling') self.poller.kill() self.polling = False def _poll(self): """Poll the transaction log forever and publish events. Only runs when there are actually active webhooks.""" while True: self._process_log() gevent.sleep(self.poll_interval) def _process_log(self): """Scan the transaction log `self.chunk_size` entries at a time, publishing matching events to registered hooks.""" with session_scope() as db_session: self.log.info('Scanning tx log from id: {}'. format(self.minimum_id)) unprocessed_txn_count = db_session.query( func.count(Transaction.id)).filter( Transaction.table_name == 'message', Transaction.id > self.minimum_id).scalar() if unprocessed_txn_count: self.log.debug('Total of {0} transactions to process'. format(unprocessed_txn_count)) max_tx_id, = db_session.query(func.max(Transaction.id)).one() if max_tx_id is None: max_tx_id = 0 for pointer in range(self.minimum_id, max_tx_id, self.chunk_size): # TODO(emfree) add the right index to make this query more # performant. for transaction in db_session.query(Transaction). \ filter(Transaction.table_name == 'message', Transaction.command == 'insert', Transaction.id > pointer, Transaction.id <= pointer + self.chunk_size). \ order_by(asc(Transaction.id)): namespace_id = transaction.namespace_id for worker in self.workers[namespace_id]: if worker.match(transaction): worker.enqueue(EventData(transaction)) self.minimum_id = transaction.id self.log.debug('Processed tx. setting min id to {0}'. format(self.minimum_id)) def _load_hooks(self): """Load stored hook parameters from the database. Run once on startup.""" with session_scope() as db_session: all_hooks = db_session.query(Webhook).filter_by(active=True).all() for hook in all_hooks: namespace_id = hook.namespace_id self.workers[namespace_id].add(WebhookWorker(hook))