class AsyncWorker(object): """ Fetches from Kafka topics and processes them. :param consumer_topic: Name of the Kafka topic for consume. :type consumer_topic: str :param service: Service function which is executed every time when job is processed. Service must get as argument str or dict type object. :type service: callable :param consumer_conf: config for Kafka consumer. :type consumer_conf: dict :param failed_topic: Kafka topic for produce unprocessed messages from consumer_topic. :type failed_topic: str :param producer_conf: config for Kafka producer for producing unprocessed messages. :type producer_conf: dict """ def __init__(self, consumer_topic: str, service: Callable, consumer_conf: dict, failed_topic: str, producer_conf: dict): self._consumer_topic = consumer_topic self._consumer = Consumer(consumer_conf) self._service = service self._failed_topic = failed_topic # use naming like <project name>_<version>_<consumer_topic><retry/failed> self._producer = AsyncProducer(producer_conf) def __repr__(self): """Return the string representation of the worker. :return: String representation of the worker. :rtype: str """ return 'Worker(Consumer={}, consume_topic={})'.format( self._consumer, self._consumer_topic) def __del__(self): # pragma: no cover # noinspection PyBroadException try: self._consumer.close() except Exception: pass async def _exec_service(self, message_value): if iscoroutinefunction(self._service): res = await self._service(message_value) else: res = self._service(message_value) return res async def _process_message(self, msg: Message): """ De-serialize message and execute service. :param msg: Kafka message. :type msg: confluent_kafka.Message` """ LOGGER.info( 'Processing Message(topic={}, partition={}, offset={}) ...'.format( msg.topic, msg.partition, msg.offset)) service_repr = get_call_repr(self._service) LOGGER.info('Executing job {}'.format(service_repr)) try: message_value = _decode_msg_value(msg.value()) res = await self._exec_service(message_value) except KeyboardInterrupt: LOGGER.error('Job was interrupted: {}'.format(msg.offset())) except Exception as err: LOGGER.exception('Job {} raised an exception: {}'.format( msg.offset(), err)) await self._producer.produce(topic=self._failed_topic, value=msg.value(), error=str(err)) else: LOGGER.info('Job {} returned: {}'.format(msg.offset(), res)) @property def consumer_topic(self): """Return the name of the Kafka topic. :return: Name of the Kafka topic. :rtype: str """ return self._consumer_topic @property def consumer(self): """Return the Kafka consumer instance. :return: Kafka consumer instance. :rtype: kafka.KafkaConsumer """ return self._consumer @property def service(self): """Return the service function. :return: Callback function, or None if not set. :rtype: callable | None """ return self._service async def start(self, max_messages: int = math.inf, commit_offsets: bool = True) -> int: """Start processing Kafka messages and executing jobs. :param max_messages: Maximum number of Kafka messages to process before stopping. If not set, worker runs until interrupted. :type max_messages: int :param commit_offsets: If set to True, consumer offsets are committed every time a message is processed (default: True). :type commit_offsets: bool :return: Total number of messages processed. :rtype: int """ LOGGER.info('Starting {} ...'.format(self)) self._consumer.unsubscribe() self._consumer.subscribe([self.consumer_topic]) LOGGER.info(" Try get messages from position: {}".format( self._consumer.position(self._consumer.assignment()))) messages_processed = 0 while messages_processed < max_messages: loop = asyncio.get_event_loop() # awaiting place for processing messages in other coroutines messages = await loop.run_in_executor( None, partial(self._consumer.consume, 10, 2.0)) LOGGER.debug(" Try get messages from position: {}".format( self._consumer.position(self._consumer.assignment()))) if not messages: LOGGER.debug("Messages not found") continue for msg in messages: if msg.error(): LOGGER.error("Consumer error: {}".format(msg.error())) LOGGER.info("Get message with offset {}".format(msg.offset())) asyncio.create_task(self._process_message(msg)) if commit_offsets: self._consumer.commit() messages_processed += 1 self._consumer.close() return messages_processed
def compute_achieved_throughput(broker, partitions_with_offsets, result_dict): partitions_with_offsets = {} input_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), # 'group.id': 'achieved_throughput_measurer', 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) output_consumer = Consumer({ 'bootstrap.servers': broker, 'group.id': str(uuid.uuid4()), # 'group.id': 'achieved_throughput_measurer', 'auto.offset.reset': 'earliest', 'enable.auto.commit': True, 'auto.commit.interval.ms': 1000, 'api.version.request': True, 'max.poll.interval.ms': 60000 }) if 'input' in partitions_with_offsets and len( partitions_with_offsets['input']) > 0: input_consumer.assign(partitions_with_offsets['input']) else: input_consumer.subscribe(['read', 'update', 'transfer']) if 'output' in partitions_with_offsets and len( partitions_with_offsets['output']) > 0: output_consumer.assign(partitions_with_offsets['output']) else: output_consumer.subscribe(['responses']) while True: msgs = input_consumer.consume(timeout=5, num_messages=500) if len(msgs) == 0: break for msg in msgs: try: wrapped = Wrapper() wrapped.ParseFromString(msg.value()) result = {} result['operation'] = msg.topic() result['input_time'] = msg.timestamp()[1] result_dict[wrapped.request_id] = result except DecodeError as e: print("Could not decode?") pass partitions_with_offsets['input'] = input_consumer.position( input_consumer.assignment()) input_consumer.close() total_messages = 0 start_time = 0 end_time = 0 first = True while True: msgs = output_consumer.consume(timeout=5, num_messages=500) if len(msgs) == 0: break for msg in msgs: response = Response() response.ParseFromString(msg.value()) key = response.request_id status_code = response.status_code if key in result_dict: if first: start_time = msg.timestamp()[1] / 1000 first = False total_messages += 1 end_time = msg.timestamp()[1] / 1000 result_dict[key]['output_time'] = msg.timestamp()[1] result_dict[key]['status_code'] = status_code partitions_with_offsets['output'] = output_consumer.position( output_consumer.assignment()) output_consumer.close() print("Total messages considered: " + str(total_messages)) if total_messages == 0 or end_time - start_time == 0: return 0 return total_messages / (end_time - start_time)