def to_mutation(row): id, payload, timestamp, transaction = row version, payload = payload.split(':', 1) if version != '0': raise RuntimeError('Cannot parse payload version: %s', version) (schema, table), operation, primary_key_columns, (old, new), configuration_version = pickle.loads(payload) states = {} if old: states['old'] = row_converter.to_protobuf(old) if new: states['new'] = row_converter.to_protobuf(new) assert states, 'at least one state must be set' return MutationOperation( id=id, schema=schema, table=table, operation=getattr(MutationOperation, operation), identity_columns=primary_key_columns, timestamp=to_timestamp(timestamp), transaction=transaction, **states )
def publish(self, **kwargs): self.receiver((Message(header=Header( publisher=self.id, sequence=next(self.sequence), timestamp=to_timestamp(time.time()), ), **kwargs), ))
def to_mutation(row): id, payload, timestamp, transaction = row version, payload = payload.split(':', 1) if version != '0': raise RuntimeError('Cannot parse payload version: %s', version) (schema, table), operation, primary_key_columns, ( old, new), configuration_version = pickle.loads(payload) states = {} if old: states['old'] = row_converter.to_protobuf(old) if new: states['new'] = row_converter.to_protobuf(new) assert states, 'at least one state must be set' return MutationOperation(id=id, schema=schema, table=table, operation=getattr(MutationOperation, operation), identity_columns=primary_key_columns, timestamp=to_timestamp(timestamp), transaction=transaction, **states)
def publish(self, **kwargs): self.receiver(( Message( header=Header( publisher=self.id, sequence=next(self.sequence), timestamp=to_timestamp(time.time()), ), **kwargs ), ))
def run(self): publisher = Publisher(self.stream.push) try: logger.debug('Started worker.') # TODO: this connection needs to timeout in case the lock cannot be # grabbed or the connection cannot be established to avoid never # exiting logger.info('Registering as queue consumer...') with self.database.connection() as connection, connection.cursor() as cursor: statement = "SELECT * FROM pgq.register_consumer(%s, %s)" cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer)) (new,) = cursor.fetchone() logger.info('Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing') connection.commit() logger.info('Ready to relay events.') while True: if self.__stop_requested.wait(0.01): break # TODO: this needs a timeout as well # TODO: this probably should have a lock on consumption with self.database.connection() as connection: # Check to see if there is a batch available to be relayed. statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)" with connection.cursor() as cursor: cursor.execute(statement, (self.cluster.get_queue_name(self.set), self.consumer,)) (batch_id,) = cursor.fetchone() if batch_id is None: connection.commit() continue # There is nothing to consume. # Fetch the details of the batch. with connection.cursor() as cursor: cursor.execute(BATCH_INFO_STATEMENT, (batch_id,)) start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone() batch = BatchIdentifier( id=batch_id, node=self.database.id.bytes, ) begin = BeginOperation( start=Tick( id=start_id, snapshot=to_snapshot(start_snapshot), timestamp=to_timestamp(start_timestamp), ), end=Tick( id=end_id, snapshot=to_snapshot(end_snapshot), timestamp=to_timestamp(end_timestamp), ), ) with publisher.batch(batch, begin) as publish: # Fetch the events for the batch. This uses a named cursor # to avoid having to load the entire event block into # memory at once. with connection.cursor('events') as cursor: statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)" cursor.execute(statement, (batch_id,)) # TODO: Publish these in chunks, the full ack + RTT is a performance killer for mutation in itertools.imap(to_mutation, cursor): publish(mutation) with connection.cursor() as cursor: cursor.execute("SELECT * FROM pgq.finish_batch(%s)", (batch_id,)) (success,) = cursor.fetchone() # XXX: Not sure why this could happen? if not success: raise RuntimeError('Could not close batch!') # XXX: Since this is outside of the batch block, this # downstream consumers need to be able to handle receiving # the same transaction multiple times, probably by checking # a metadata table before starting to apply a batch. connection.commit() logger.debug('Successfully relayed batch: %s.', FormattedBatchIdentifier(batch)) except Exception as error: logger.exception('Caught exception in worker: %s', error) self.__result.set_exception(error) else: logger.debug('Stopped.') self.__result.set_result(None)
def test_timetamp_conversion(): assert to_timestamp(1438814328.940597) == Timestamp( seconds=1438814328, nanos=940597057, # this is different due to floating point arithmetic )
def run(self): publisher = Publisher(self.handler.push) try: logger.debug('Started worker.') # TODO: this connection needs to timeout in case the lock cannot be # grabbed or the connection cannot be established to avoid never # exiting logger.info('Registering as queue consumer...') with self.database.connection() as connection, connection.cursor( ) as cursor: statement = "SELECT * FROM pgq.register_consumer(%s, %s)" cursor.execute( statement, (self.cluster.get_queue_name(self.set), self.consumer)) (new, ) = cursor.fetchone() logger.info( 'Registered as queue consumer: %s (%s registration).', self.consumer, 'new' if new else 'existing') connection.commit() logger.info('Ready to relay events.') while True: if self.__stop_requested.wait(0.01): break # TODO: this needs a timeout as well # TODO: this probably should have a lock on consumption with self.database.connection() as connection: # Check to see if there is a batch available to be relayed. statement = "SELECT batch_id FROM pgq.next_batch_info(%s, %s)" with connection.cursor() as cursor: cursor.execute(statement, ( self.cluster.get_queue_name(self.set), self.consumer, )) (batch_id, ) = cursor.fetchone() if batch_id is None: connection.commit() continue # There is nothing to consume. # Fetch the details of the batch. with connection.cursor() as cursor: cursor.execute(BATCH_INFO_STATEMENT, (batch_id, )) start_id, start_snapshot, start_timestamp, end_id, end_snapshot, end_timestamp = cursor.fetchone( ) batch = BatchIdentifier( id=batch_id, node=self.database.id.bytes, ) begin = BeginOperation( start=Tick( id=start_id, snapshot=to_snapshot(start_snapshot), timestamp=to_timestamp(start_timestamp), ), end=Tick( id=end_id, snapshot=to_snapshot(end_snapshot), timestamp=to_timestamp(end_timestamp), ), ) with publisher.batch(batch, begin) as publish: # Fetch the events for the batch. This uses a named cursor # to avoid having to load the entire event block into # memory at once. with connection.cursor('events') as cursor: statement = "SELECT ev_id, ev_data, extract(epoch from ev_time), ev_txid FROM pgq.get_batch_events(%s)" cursor.execute(statement, (batch_id, )) for mutation in itertools.imap( to_mutation, cursor): publish(mutation) with connection.cursor() as cursor: cursor.execute( "SELECT * FROM pgq.finish_batch(%s)", (batch_id, )) (success, ) = cursor.fetchone() # XXX: Not sure why this could happen? if not success: raise RuntimeError('Could not close batch!') # XXX: Since this is outside of the batch block, this # downstream consumers need to be able to handle receiving # the same transaction multiple times, probably by checking # a metadata table before starting to apply a batch. connection.commit() logger.debug('Successfully relayed batch %s.', batch) except Exception as error: logger.exception('Caught exception in worker: %s', error) self.__result.set_exception(error) else: logger.debug('Stopped.') self.__result.set_result(None)