Exemplo n.º 1
0
def get_partitions_with_offsets(broker):
    input_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    output_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    input_consumer.subscribe(['read', 'update', 'transfer'])
    output_consumer.subscribe(['responses'])

    msgs = input_consumer.consume(timeout=5, num_messages=100)
    if len(msgs) == 0:
        print("returned empty")
        return {}

    partitions_with_offsets = {'input': [], 'output': []}

    input_partitions = input_consumer.assignment()
    for p in input_partitions:
        _, h = input_consumer.get_watermark_offsets(p)
        p.offset = h
        partitions_with_offsets['input'].append(p)

    output_consumer.consume(timeout=5, num_messages=100)
    output_partitions = output_consumer.assignment()
    for p in output_partitions:
        _, h = output_consumer.get_watermark_offsets(p)
        p.offset = h
        partitions_with_offsets['output'].append(p)

    return partitions_with_offsets
Exemplo n.º 2
0
class AsyncWorker(object):
    """
    Fetches from Kafka topics and processes them.

    :param consumer_topic: Name of the Kafka topic for consume.
    :type consumer_topic: str
    :param service: Service function which is executed every time when job is processed.
    Service must get as argument str or dict type object.
    :type service: callable
    :param consumer_conf: config for Kafka consumer.
    :type consumer_conf: dict
    :param failed_topic: Kafka topic for produce unprocessed messages from consumer_topic.
    :type failed_topic: str
    :param producer_conf: config for Kafka producer for producing unprocessed messages.
    :type producer_conf: dict
    """
    def __init__(self, consumer_topic: str, service: Callable,
                 consumer_conf: dict, failed_topic: str, producer_conf: dict):

        self._consumer_topic = consumer_topic
        self._consumer = Consumer(consumer_conf)
        self._service = service
        self._failed_topic = failed_topic  # use naming like <project name>_<version>_<consumer_topic><retry/failed>
        self._producer = AsyncProducer(producer_conf)

    def __repr__(self):
        """Return the string representation of the worker.
        :return: String representation of the worker.
        :rtype: str
        """

        return 'Worker(Consumer={}, consume_topic={})'.format(
            self._consumer, self._consumer_topic)

    def __del__(self):  # pragma: no cover
        # noinspection PyBroadException
        try:
            self._consumer.close()
        except Exception:
            pass

    async def _exec_service(self, message_value):
        if iscoroutinefunction(self._service):
            res = await self._service(message_value)
        else:
            res = self._service(message_value)
        return res

    async def _process_message(self, msg: Message):
        """
        De-serialize message and execute service.
        :param msg: Kafka message.
        :type msg: confluent_kafka.Message`
        """
        LOGGER.info(
            'Processing Message(topic={}, partition={}, offset={}) ...'.format(
                msg.topic, msg.partition, msg.offset))
        service_repr = get_call_repr(self._service)
        LOGGER.info('Executing job {}'.format(service_repr))
        try:
            message_value = _decode_msg_value(msg.value())
            res = await self._exec_service(message_value)

        except KeyboardInterrupt:
            LOGGER.error('Job was interrupted: {}'.format(msg.offset()))

        except Exception as err:
            LOGGER.exception('Job {} raised an exception: {}'.format(
                msg.offset(), err))

            await self._producer.produce(topic=self._failed_topic,
                                         value=msg.value(),
                                         error=str(err))
        else:
            LOGGER.info('Job {} returned: {}'.format(msg.offset(), res))

    @property
    def consumer_topic(self):
        """Return the name of the Kafka topic.
        :return: Name of the Kafka topic.
        :rtype: str
        """
        return self._consumer_topic

    @property
    def consumer(self):
        """Return the Kafka consumer instance.
        :return: Kafka consumer instance.
        :rtype: kafka.KafkaConsumer
        """
        return self._consumer

    @property
    def service(self):
        """Return the service function.
        :return: Callback function, or None if not set.
        :rtype: callable | None
        """
        return self._service

    async def start(self,
                    max_messages: int = math.inf,
                    commit_offsets: bool = True) -> int:
        """Start processing Kafka messages and executing jobs.
        :param max_messages: Maximum number of Kafka messages to process before stopping. If not set, worker runs until
        interrupted.

        :type max_messages: int
        :param commit_offsets: If set to True, consumer offsets are committed every time a message is processed
        (default: True).
        :type commit_offsets: bool
        :return: Total number of messages processed.
        :rtype: int
        """
        LOGGER.info('Starting {} ...'.format(self))

        self._consumer.unsubscribe()
        self._consumer.subscribe([self.consumer_topic])
        LOGGER.info(" Try get messages from position: {}".format(
            self._consumer.position(self._consumer.assignment())))
        messages_processed = 0
        while messages_processed < max_messages:
            loop = asyncio.get_event_loop()
            # awaiting place for processing messages in other coroutines
            messages = await loop.run_in_executor(
                None, partial(self._consumer.consume, 10, 2.0))
            LOGGER.debug(" Try get messages from position: {}".format(
                self._consumer.position(self._consumer.assignment())))
            if not messages:
                LOGGER.debug("Messages not found")
                continue
            for msg in messages:
                if msg.error():
                    LOGGER.error("Consumer error: {}".format(msg.error()))
                LOGGER.info("Get message with offset {}".format(msg.offset()))
                asyncio.create_task(self._process_message(msg))
            if commit_offsets:
                self._consumer.commit()

            messages_processed += 1
        self._consumer.close()
        return messages_processed
Exemplo n.º 3
0
def compute_achieved_throughput(broker, partitions_with_offsets, result_dict):
    partitions_with_offsets = {}
    input_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        # 'group.id': 'achieved_throughput_measurer',
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    output_consumer = Consumer({
        'bootstrap.servers': broker,
        'group.id': str(uuid.uuid4()),
        # 'group.id': 'achieved_throughput_measurer',
        'auto.offset.reset': 'earliest',
        'enable.auto.commit': True,
        'auto.commit.interval.ms': 1000,
        'api.version.request': True,
        'max.poll.interval.ms': 60000
    })

    if 'input' in partitions_with_offsets and len(
            partitions_with_offsets['input']) > 0:
        input_consumer.assign(partitions_with_offsets['input'])
    else:
        input_consumer.subscribe(['read', 'update', 'transfer'])

    if 'output' in partitions_with_offsets and len(
            partitions_with_offsets['output']) > 0:
        output_consumer.assign(partitions_with_offsets['output'])
    else:
        output_consumer.subscribe(['responses'])

    while True:
        msgs = input_consumer.consume(timeout=5, num_messages=500)
        if len(msgs) == 0:
            break
        for msg in msgs:
            try:
                wrapped = Wrapper()
                wrapped.ParseFromString(msg.value())

                result = {}
                result['operation'] = msg.topic()
                result['input_time'] = msg.timestamp()[1]
                result_dict[wrapped.request_id] = result
            except DecodeError as e:
                print("Could not decode?")
                pass

    partitions_with_offsets['input'] = input_consumer.position(
        input_consumer.assignment())
    input_consumer.close()

    total_messages = 0
    start_time = 0
    end_time = 0
    first = True

    while True:
        msgs = output_consumer.consume(timeout=5, num_messages=500)
        if len(msgs) == 0:
            break
        for msg in msgs:
            response = Response()
            response.ParseFromString(msg.value())
            key = response.request_id
            status_code = response.status_code
            if key in result_dict:
                if first:
                    start_time = msg.timestamp()[1] / 1000
                    first = False
                total_messages += 1
                end_time = msg.timestamp()[1] / 1000
                result_dict[key]['output_time'] = msg.timestamp()[1]
                result_dict[key]['status_code'] = status_code

    partitions_with_offsets['output'] = output_consumer.position(
        output_consumer.assignment())
    output_consumer.close()

    print("Total messages considered: " + str(total_messages))

    if total_messages == 0 or end_time - start_time == 0:
        return 0

    return total_messages / (end_time - start_time)