Пример #1
0
    async def __consume(self, loop) -> _QueueInternalResult:
        def deserializer(serialized):
            return json.loads(serialized)

        try:
            consumer = AIOKafkaConsumer(
                self._config.optional_param["topic_name"],
                loop=loop,
                group_id="youyaku_ai_group",
                # isolation_level="read_committed",
                bootstrap_servers=self._config.get_url(),
                value_deserializer=deserializer,
                auto_offset_reset="earliest",
                enable_auto_commit=False,
            )
            await consumer.start()

            # messageのpositionとoffset(終端)を確認し、データがなければ、空のデータを返す
            # TODO : 1パーティションの対応のみなので、パーティションが複数の対応が必要
            partition = list(consumer.assignment())[0]
            position = await consumer.position(partition=partition)
            offset_dict = await consumer.end_offsets(partitions=[partition])
            end = offset_dict[partition]
            if position == end:
                return _QueueInternalResult(result=[], e=None)

            # データを一つ取得
            data = await consumer.getone()
            messages = [data.value]
            await consumer.commit()
        except Exception as e:
            return _QueueInternalResult(result=None, e=e)
        finally:
            await consumer.stop()
        return _QueueInternalResult(result=messages, e=None)
Пример #2
0
class BaseKafkaTableBuilder(object):
    """Table builder.

    Builds table using single consumer consuming linearly
    from raw topic.
    """
    def __init__(self, topic, loop):
        self.topic = topic
        self.consumer = None
        self.messages = []
        self.loop = loop
        self.table = defaultdict(int)
        self.key_tps = defaultdict(set)
        self._assignment = None

    async def build(self):
        await self._init_consumer()
        await self._build_table()

    def get_key(self, message):
        return json.loads(message.key.decode())

    def get_value(self, message):
        return json.loads(message.value.decode())

    async def _init_consumer(self):
        if not self.consumer:
            self.consumer = AIOKafkaConsumer(
                self.topic,
                loop=self.loop,
                bootstrap_servers=bootstrap_servers,
                auto_offset_reset="earliest",
            )
            await self.consumer.start()
            self._assignment = self.consumer.assignment()

    async def _build_table(self):
        while True:
            message = await self.consumer.getone()
            self.messages.append(message)
            await self._apply(message)
            if await self._positions() == self._highwaters():
                print("Done building table")
                return

    async def _apply(self, message):
        print(message)

    async def _positions(self):
        assert self.consumer
        return {
            tp: await self.consumer.position(tp)
            for tp in self._assignment
        }

    def _highwaters(self):
        assert self.consumer
        return {tp: self.consumer.highwater(tp) for tp in self._assignment}
Пример #3
0
async def consume_events(app: web.Application) -> None:
    """The main Kafka consumer, which routes messages to processing functions
    or tasks.
    """
    logger = structlog.get_logger(app["safir/config"].logger_name)

    registry = RegistryApi(
        session=app["safir/http_session"],
        url=app["safir/config"].schema_registry_url,
    )
    deserializer = Deserializer(registry=registry)

    consumer_settings = {
        "bootstrap_servers": app["safir/config"].kafka_broker_url,
        "group_id": app["safir/config"].kafka_consumer_group_id,
        "auto_offset_reset": "latest",
        "security_protocol": app["safir/config"].kafka_protocol,
    }
    if consumer_settings["security_protocol"] == "SSL":
        consumer_settings["ssl_context"] = app["safir/kafka_ssl_context"]
    consumer = AIOKafkaConsumer(
        loop=asyncio.get_event_loop(), **consumer_settings
    )

    topic_names = get_configured_topics(app)

    scheduler = await aiojobs.create_scheduler()

    try:
        await consumer.start()
        logger.info("Started Kafka consumer")

        logger.info("Subscribing to Kafka topics", names=topic_names)
        consumer.subscribe(topic_names)

        partitions = consumer.assignment()
        while len(partitions) == 0:
            # Wait for the consuemr to get partition assignment
            await asyncio.sleep(1.0)
            partitions = consumer.assignment()
        logger.info(
            "Got initial partition assignment for Kafka topics",
            partitions=[str(p) for p in partitions],
        )

        async for message in consumer:
            try:
                value_info = await deserializer.deserialize(
                    message.value, include_schema=True
                )
            except Exception:
                logger.exception(
                    "Failed to deserialize a Kafka message value",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
                continue

            try:
                await route_message(
                    app=app,
                    scheduler=scheduler,
                    message=value_info["message"],
                    schema_id=value_info["id"],
                    schema=value_info["schema"],
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
            except Exception:
                logger.exception(
                    "Failed to route a Kafka message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )

    except asyncio.CancelledError:
        logger.info("consume_events task got cancelled")
    finally:
        logger.info("consume_events task cancelling")
        await consumer.stop()
        await scheduler.close()
Пример #4
0
class AIOKafkaRPCClient(object):
    log = logging.getLogger(__name__)

    def __init__(self,
                 kafka_servers='localhost:9092',
                 in_topic='aiokafkarpc_in',
                 out_topic='aiokafkarpc_out',
                 out_partitions=(0, ),
                 max_bytes=1048576,
                 translation_table=[],
                 *,
                 loop):
        self.call = CallObj(self._call_wrapper)

        self._topic_in = in_topic
        self._loop = loop
        self._waiters = {}
        self._out_topic = out_topic
        self._out_partitions = out_partitions
        self.lock = False

        default, ext_hook = get_msgpack_hooks(translation_table)
        self.__consumer = AIOKafkaConsumer(
            self._out_topic,
            loop=loop,
            bootstrap_servers=kafka_servers,
            group_id=None,
            fetch_max_bytes=max_bytes,
            key_deserializer=lambda x: x.decode("utf-8"),
            enable_auto_commit=True,
            value_deserializer=lambda x: msgpack.unpackb(
                x, ext_hook=ext_hook, encoding="utf-8"))

        self.__producer = AIOKafkaProducer(
            bootstrap_servers=kafka_servers,
            loop=loop,
            max_request_size=max_bytes,
            enable_idempotence=False,
            key_serializer=lambda x: x.encode("utf-8"),
            value_serializer=lambda x: msgpack.packb(x, default=default))

    async def run(self):
        await self.__producer.start()
        await self.__consumer.start()

        # FIXME manual partition assignment does not work correctly in aiokafka
        # self.__consumer.assign(
        # [TopicPartition(self._out_topic, p) for p in self._out_partitions])
        #
        # ensure that topic partitions exists
        for tp in self.__consumer.assignment():
            await self.__consumer.position(tp)
        self._consume_task = self._loop.create_task(self.__consume_routine())

    async def close(self, timeout=10):
        await self.__producer.stop()
        if self._waiters:
            await asyncio.wait(self._waiters.values(),
                               loop=self._loop,
                               timeout=timeout)

        self._consume_task.cancel()
        try:
            await self._consume_task
        except asyncio.CancelledError:
            pass
        await self.__consumer.stop()

        for fut in self._waiters.values():
            fut.set_exception(asyncio.TimeoutError())

    def _call_wrapper(self, method):
        async def rpc_call(*args, **kw_args):
            call_id = uuid.uuid4().hex
            ptid = random.choice(self._out_partitions)
            request = (method, args, kw_args, ptid)
            fut = asyncio.Future(loop=self._loop)
            fut.add_done_callback(lambda fut: self._waiters.pop(call_id))
            self._waiters[call_id] = fut
            try:
                await self.__producer.send(self._topic_in,
                                           request,
                                           key=call_id)
            except Exception as err:
                self.log.error("send RPC request failed: %s", err)
                self._waiters[call_id].set_exception(err)
            return await self._waiters[call_id]

        return rpc_call

    async def __consume_routine(self):
        while True:
            message = await self.__consumer.getone()
            call_id = message.key
            response = message.value
            self.call = CallObj(self._call_wrapper)

            fut = self._waiters.get(call_id)
            if fut is None:
                continue
            if "error" in response:
                self.log.debug(response.get("stacktrace"))
                fut.set_exception(RPCError(response["error"]))
            else:
                fut.set_result(response["result"])
Пример #5
0
class KafkaConsumer(BaseEventConsumer):
    def __init__(
        self,
        settings: object,
        app: BaseApp,
        serializer: BaseEventSerializer,
        event_topics: List[str],
        event_group: str,
        position: str,
    ) -> None:
        if not hasattr(settings, 'KAFKA_BOOTSTRAP_SERVER'):
            raise Exception('Missing KAFKA_BOOTSTRAP_SERVER config')

        self.max_retries = 10
        if hasattr(settings, 'EVENTY_CONSUMER_MAX_RETRIES'):
            self.max_retries = settings.EVENTY_CONSUMER_MAX_RETRIES

        self.retry_interval = 1000
        if hasattr(settings, 'EVENTY_CONSUMER_RETRY_INTERVAL'):
            self.retry_interval = settings.EVENTY_CONSUMER_RETRY_INTERVAL

        self.retry_backoff_coeff = 2
        if hasattr(settings, 'EVENTY_CONSUMER_RETRY_BACKOFF_COEFF'):
            self.retry_backoff_coeff = settings.EVENTY_CONSUMER_RETRY_BACKOFF_COEFF

        self.app = app
        self.event_topics = event_topics
        self.event_group = event_group
        self.position = position
        self.consumer = None
        self.current_position_checkpoint_callback = None
        self.end_position_checkpoint_callback = None
        bootstrap_servers = settings.KAFKA_BOOTSTRAP_SERVER

        consumer_args: Dict[str, Any]
        consumer_args = {
            'loop': asyncio.get_event_loop(),
            'bootstrap_servers': [bootstrap_servers],
            'enable_auto_commit': False,
            'group_id': self.event_group,
            'value_deserializer': serializer.decode,
            'auto_offset_reset': self.position
        }

        try:
            self.consumer = AIOKafkaConsumer(*self.event_topics,
                                             **consumer_args)

        except Exception as e:
            logger.error(
                f"Unable to connect to the Kafka broker {bootstrap_servers} : {e}"
            )
            raise e

    def set_current_position_checkpoint_callback(self, checkpoint_callback):
        self.current_position_checkpoint_callback = checkpoint_callback

    def set_end_position_checkpoint_callback(self, checkpoint_callback):
        self.end_position_checkpoint_callback = checkpoint_callback

    async def current_position(self):
        # Warning: this method returns last committed offsets for each assigned partition
        position = {}
        for partition in self.consumer.assignment():
            offset = await self.consumer.committed(partition) or 0
            position[partition] = offset
        return position

    async def consumer_position(self):
        # Warning: this method returns current offsets for each assigned partition
        position = {}
        for partition in self.consumer.assignment():
            position[partition] = await self.consumer.position(partition)
        return position

    async def end_position(self):
        position = {}
        for partition in self.consumer.assignment():
            offset = (await self.consumer.end_offsets([partition]))[partition]
            position[partition] = offset
        return position

    async def is_checkpoint_reached(self, checkpoint):
        for partition in self.consumer.assignment():
            position = (await self.consumer.position(partition))
            if position < checkpoint[partition]:
                return False
        return True

    async def start(self):
        logger.info(
            f'Starting kafka consumer on topic {self.event_topics} with group {self.event_group}'
        )
        try:
            await self.consumer.start()
        except Exception as e:
            logger.error(
                f'An error occurred while starting kafka consumer '
                f'on topic {self.event_topics} with group {self.event_group}: {e}'
            )
            sys.exit(1)

        current_position_checkpoint = None
        end_position_checkpoint = None
        if self.event_group is not None:
            current_position = await self.current_position()
            end_position = await self.end_position()
            logger.debug(f'Current position : {current_position}')
            logger.debug(f'End position : {end_position}')

            if self.position == 'earliest' and self.event_group is not None:
                current_position_checkpoint = current_position
                end_position_checkpoint = end_position
                await self.consumer.seek_to_beginning()

        async for msg in self.consumer:
            retries = 0
            sleep_duration_in_ms = self.retry_interval
            while True:
                try:
                    event = msg.value
                    corr_id = event.correlation_id

                    logger.info(f"[CID:{corr_id}] Start handling {event.name}")
                    await event.handle(app=self.app, corr_id=corr_id)
                    logger.info(f"[CID:{corr_id}] End handling {event.name}")

                    if self.event_group is not None:
                        logger.debug(
                            f"[CID:{corr_id}] Commit Kafka transaction")
                        await self.consumer.commit()

                    logger.debug(
                        f"[CID:{corr_id}] Continue with the next message")
                    # break the retry loop
                    break
                except Exception:
                    logger.exception(
                        f'[CID:{corr_id}] An error occurred while handling received message.'
                    )

                    if retries != self.max_retries:
                        # increase the number of retries
                        retries = retries + 1

                        sleep_duration_in_s = int(sleep_duration_in_ms / 1000)
                        logger.info(
                            f"[CID:{corr_id}] Sleeping {sleep_duration_in_s}s a before retrying..."
                        )
                        await asyncio.sleep(sleep_duration_in_s)

                        # increase the sleep duration
                        sleep_duration_in_ms = sleep_duration_in_ms * self.retry_backoff_coeff

                    else:
                        logger.error(
                            f'[CID:{corr_id}] Unable to handle message within {1 + self.max_retries} tries. Stopping process'
                        )
                        sys.exit(1)

            if current_position_checkpoint and await self.is_checkpoint_reached(
                    current_position_checkpoint):
                logger.info('Current position checkpoint reached')
                if self.current_position_checkpoint_callback:
                    await self.current_position_checkpoint_callback()
                current_position_checkpoint = None

            if end_position_checkpoint and await self.is_checkpoint_reached(
                    end_position_checkpoint):
                logger.info('End position checkpoint reached')
                if self.end_position_checkpoint_callback:
                    await self.end_position_checkpoint_callback()
                end_position_checkpoint = None
Пример #6
0
async def consume_kafka(app):
    """Consume Kafka messages directed to templatebot's functionality."""
    logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"])

    registry = RegistryApi(
        session=app["root"]["api.lsst.codes/httpSession"],
        url=app["root"]["templatebot/registryUrl"],
    )
    deserializer = Deserializer(registry=registry)

    consumer_settings = {
        "bootstrap_servers": app["root"]["templatebot/brokerUrl"],
        "group_id": app["root"]["templatebot/slackGroupId"],
        "auto_offset_reset": "latest",
        "ssl_context": app["root"]["templatebot/kafkaSslContext"],
        "security_protocol": app["root"]["templatebot/kafkaProtocol"],
    }
    consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(),
                                **consumer_settings)

    try:
        await consumer.start()
        logger.info("Started Kafka consumer", **consumer_settings)

        topic_names = [
            app["root"]["templatebot/appMentionTopic"],
            app["root"]["templatebot/messageImTopic"],
            app["root"]["templatebot/interactionTopic"],
        ]
        logger.info("Subscribing to Kafka topics", names=topic_names)
        consumer.subscribe(topic_names)

        logger.info("Finished subscribing ot Kafka topics", names=topic_names)

        partitions = consumer.assignment()
        logger.info("Waiting on partition assignment", names=topic_names)
        while len(partitions) == 0:
            # Wait for the consumer to get partition assignment
            await asyncio.sleep(1.0)
            partitions = consumer.assignment()
        logger.info(
            "Initial partition assignment",
            partitions=[str(p) for p in partitions],
        )

        async for message in consumer:
            logger.info(
                "Got Kafka message from sqrbot",
                topic=message.topic,
                partition=message.partition,
                offset=message.offset,
            )
            try:
                message_info = await deserializer.deserialize(message.value)
            except Exception:
                logger.exception(
                    "Failed to deserialize a message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
                continue

            event = message_info["message"]
            logger.debug(
                "New message",
                topic=message.topic,
                partition=message.partition,
                offset=message.offset,
                contents=event,
            )

            try:
                await route_event(
                    event=message_info["message"],
                    app=app,
                    schema_id=message_info["id"],
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
            except Exception:
                logger.exception(
                    "Failed to handle message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )

    except asyncio.CancelledError:
        logger.info("consume_kafka task got cancelled")
    finally:
        logger.info("consume_kafka task cancelling")
        await consumer.stop()
Пример #7
0
async def consume_events(app):
    """Consume events from templatebot-related topics in SQuaRE Events (Kafka).

    Notes
    -----
    Templatebot has *two* Kafka consumers. This is one, and the other is
    in `templatebot.slack`. The Slack consumer only listens to topics from
    Slack (SQuaRE Bot), and is focused on responding to Slack-based workflows.
    This consumer is focused on backend-driven events, such as the
    ``templatebot-render_ready`` topic.
    """
    logger = structlog.get_logger(app["root"]["api.lsst.codes/loggerName"])

    registry = RegistryApi(
        session=app["root"]["api.lsst.codes/httpSession"],
        url=app["root"]["templatebot/registryUrl"],
    )
    deserializer = Deserializer(registry=registry)

    consumer_settings = {
        "bootstrap_servers": app["root"]["templatebot/brokerUrl"],
        "group_id": app["root"]["templatebot/eventsGroupId"],
        "auto_offset_reset": "latest",
        "ssl_context": app["root"]["templatebot/kafkaSslContext"],
        "security_protocol": app["root"]["templatebot/kafkaProtocol"],
    }
    consumer = AIOKafkaConsumer(loop=asyncio.get_event_loop(),
                                **consumer_settings)

    try:
        await consumer.start()
        logger.info("Started Kafka consumer for events", **consumer_settings)

        topic_names = [app["root"]["templatebot/renderreadyTopic"]]
        logger.info("Subscribing to Kafka topics", names=topic_names)
        consumer.subscribe(topic_names)

        partitions = consumer.assignment()
        while len(partitions) == 0:
            # Wait for the consumer to get partition assignment
            await asyncio.sleep(1.0)
            partitions = consumer.assignment()
        logger.info(
            "Initial partition assignment for event topics",
            partitions=[str(p) for p in partitions],
        )

        async for message in consumer:
            try:
                message_info = await deserializer.deserialize(
                    message.value, include_schema=True)
            except Exception:
                logger.exception(
                    "Failed to deserialize an event message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
                continue

            event = message_info["message"]
            logger.debug(
                "New event message",
                topic=message.topic,
                partition=message.partition,
                offset=message.offset,
                contents=event,
            )

            try:
                await route_event(
                    app=app,
                    event=message_info["message"],
                    schema_id=message_info["id"],
                    schema=message_info["schema"],
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )
            except Exception:
                logger.exception(
                    "Failed to handle event message",
                    topic=message.topic,
                    partition=message.partition,
                    offset=message.offset,
                )

    except asyncio.CancelledError:
        logger.info("consume_events task got cancelled")
    finally:
        logger.info("consume_events task cancelling")
        await consumer.stop()
Пример #8
0
class KafkaConsumer(BaseConsumer):
    """KafkaConsumer is a client that publishes records to the Kafka cluster.
    """
    _client: KafkaClient
    serializer: BaseSerializer
    _bootstrap_servers: Union[str, List[str]]
    _client_id: str
    _topics: List[str]
    _group_id: str
    _auto_offset_reset: str
    _max_retries: int
    _retry_interval: int
    _retry_backoff_coeff: int
    _isolation_level: str
    _assignors_data: Dict[str, Any]
    _store_manager: BaseStoreManager
    _running: bool
    _kafka_consumer: AIOKafkaConsumer
    _transactional_manager: KafkaTransactionalManager

    __current_offsets: Dict[str, BasePositioning]
    __last_offsets: Dict[str, BasePositioning]
    __last_committed_offsets: Dict[str, BasePositioning]

    _loop: asyncio.AbstractEventLoop
    logger: Logger

    def __init__(self, client: KafkaClient, serializer: BaseSerializer, topics: List[str],
                 loop: asyncio.AbstractEventLoop, client_id: str = None, group_id: str = None,
                 auto_offset_reset: str = 'earliest', max_retries: int = 10, retry_interval: int = 1000,
                 retry_backoff_coeff: int = 2, assignors_data: Dict[str, Any] = None,
                 store_manager: BaseStoreManager = None, isolation_level: str = 'read_uncommitted',
                 transactional_manager: KafkaTransactionalManager = None) -> None:
        """
        KafkaConsumer constructor

        Args:
            client (KafkaClient): Initialization class (contains, client_id / bootstraps_server)
            serializer (BaseSerializer): Serializer encode & decode event
            topics (List[str]): List of topics to subscribe to
            loop (asyncio.AbstractEventLoop): Asyncio loop
            client_id (str): Client name (if is none, KafkaConsumer use KafkaClient client_id)
            group_id (str): name of the consumer group, and to use for fetching and committing offsets.
                            If None, offset commits are disabled
            auto_offset_reset (str): A policy for resetting offsets on OffsetOutOfRange errors: ‘earliest’ will move to
                                     the oldest available message, ‘latest’ will move to the most recent.
                                     Any other value will raise the exception
            max_retries (int): Number of retries before critical failure
            retry_interval (int): Interval before next retries
            retry_backoff_coeff (int): Backoff coeff for next retries
            assignors_data (Dict[str, Any]): Dict with assignors information, more details in
                                             StatefulsetPartitionAssignor
            store_manager (BaseStoreManager): If this store_manager is set, consumer call initialize_store_manager()
                                                otherwise listen_event was started
            isolation_level (str): Controls how to read messages written transactionally. If set to read_committed,
                                   will only return transactional messages which have been committed.
                                   If set to read_uncommitted, will return all messages, even transactional messages
                                   which have been aborted. Non-transactional messages will be returned unconditionally
                                   in either mode.

        Returns:
            None
        """
        super().__init__()
        self.logger = getLogger('tonga')

        # Register KafkaClient
        self._client = client

        # Set default assignors_data if is None
        if assignors_data is None:
            assignors_data = {}

        # Create client_id
        if client_id is None:
            self._client_id = self._client.client_id + '-' + str(self._client.cur_instance)
        else:
            self._client_id = client_id

        if isinstance(serializer, BaseSerializer):
            self.serializer = serializer
        else:
            raise BadSerializer

        self._bootstrap_servers = self._client.bootstrap_servers
        self._topics = topics
        self._group_id = group_id
        self._auto_offset_reset = auto_offset_reset
        self._max_retries = max_retries
        self._retry_interval = retry_interval
        self._retry_backoff_coeff = retry_backoff_coeff
        self._isolation_level = isolation_level
        self._assignors_data = assignors_data
        self._store_manager = store_manager
        self._running = False
        self._loop = loop

        self.__current_offsets = dict()
        self.__last_offsets = dict()
        self.__last_committed_offsets = dict()

        self._transactional_manager = transactional_manager

        try:
            self.logger.info(json.dumps(assignors_data))
            statefulset_assignor = StatefulsetPartitionAssignor(bytes(json.dumps(assignors_data), 'utf-8'))
            self._kafka_consumer = AIOKafkaConsumer(*self._topics, loop=self._loop,
                                                    bootstrap_servers=self._bootstrap_servers,
                                                    client_id=self._client_id, group_id=group_id,
                                                    value_deserializer=self.serializer.decode,
                                                    auto_offset_reset=self._auto_offset_reset,
                                                    isolation_level=self._isolation_level, enable_auto_commit=False,
                                                    key_deserializer=KafkaKeySerializer.decode,
                                                    partition_assignment_strategy=[statefulset_assignor])
        except KafkaError as err:
            self.logger.exception('%s', err.__str__())
            raise err
        except ValueError as err:
            self.logger.exception('%s', err.__str__())
            raise AioKafkaConsumerBadParams
        self.logger.debug('Create new consumer %s, group_id %s', self._client_id, group_id)

    async def start_consumer(self) -> None:
        """
        Start consumer

        Returns:
            None

        Raises:
            AttributeError: KafkaConsumerError
            ValueError: If KafkaError or KafkaTimoutError is raised, exception value is contain
                        in KafkaConsumerError.msg
        """
        if self._running:
            raise KafkaConsumerAlreadyStartedError
        for retry in range(2):
            try:
                await self._kafka_consumer.start()
                self._running = True
                self.logger.debug('Start consumer : %s, group_id : %s, retry : %s', self._client_id, self._group_id,
                                  retry)
            except KafkaTimeoutError as err:
                self.logger.exception('%s', err.__str__())
                await asyncio.sleep(1)
            except KafkaConnectionError as err:
                self.logger.exception('%s', err.__str__())
                await asyncio.sleep(1)
            except KafkaError as err:
                self.logger.exception('%s', err.__str__())
                raise err
            else:
                break
        else:
            raise ConsumerConnectionError

    async def stop_consumer(self) -> None:
        """
        Stop consumer

        Returns:
            None

        Raises:
            AttributeError: KafkaConsumerError
            ValueError: If KafkaError is raised, exception value is contain
                        in KafkaConsumerError.msg
        """
        if not self._running:
            raise KafkaConsumerNotStartedError
        try:
            await self._kafka_consumer.stop()
            self._running = False
            self.logger.debug('Stop consumer : %s, group_id : %s', self._client_id, self._group_id)
        except KafkaTimeoutError as err:
            self.logger.exception('%s', err.__str__())
            raise ConsumerKafkaTimeoutError
        except KafkaError as err:
            self.logger.exception('%s', err.__str__())
            raise err

    def is_running(self) -> bool:
        return self._running

    async def get_last_committed_offsets(self) -> Dict[str, BasePositioning]:
        """
        Get last committed offsets

        Returns:
            Dict[str, KafkaPositioning]: Contains all assigned partitions with last committed offsets
        """
        last_committed_offsets: Dict[str, BasePositioning] = dict()
        self.logger.debug('Get last committed offsets')
        if self._group_id is None:
            raise IllegalOperation
        for tp in self._kafka_consumer.assignment():
            offset = await self._kafka_consumer.committed(tp)
            last_committed_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \
                KafkaPositioning(tp.topic, tp.partition, offset)
        return last_committed_offsets

    async def get_current_offsets(self) -> Dict[str, BasePositioning]:
        """
        Get current offsets

        Returns:
            Dict[str, KafkaPositioning]: Contains all assigned partitions with current offsets
        """
        current_offsets: Dict[str, BasePositioning] = dict()
        self.logger.debug('Get current offsets')
        for tp in self._kafka_consumer.assignment():
            try:
                offset = await self._kafka_consumer.position(tp)
                current_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \
                    KafkaPositioning(tp.topic, tp.partition, offset)
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise err
        return current_offsets

    async def get_beginning_offsets(self) -> Dict[str, BasePositioning]:
        """
        Get beginning offsets

        Returns:
            Dict[str, KafkaPositioning]: Contains all assigned partitions with beginning offsets
        """
        beginning_offsets: Dict[str, BasePositioning] = dict()
        self.logger.debug('Get beginning offsets')
        for tp in self._kafka_consumer.assignment():
            try:
                offset = (await self._kafka_consumer.beginning_offsets([tp]))[tp]
                beginning_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \
                    KafkaPositioning(tp.topic, tp.partition, offset)
            except KafkaTimeoutError as err:
                self.logger.exception('%s', err.__str__())
                raise ConsumerKafkaTimeoutError
            except UnsupportedVersionError as err:
                self.logger.exception('%s', err.__str__())
                raise err
        return beginning_offsets

    async def get_last_offsets(self) -> Dict[str, BasePositioning]:
        """
        Get last offsets

        Returns:
            Dict[str, KafkaPositioning]: Contains all assigned partitions with last offsets
        """
        last_offsets: Dict[str, BasePositioning] = dict()
        self.logger.debug('Get last offsets')
        for tp in self._kafka_consumer.assignment():
            try:
                offset = (await self._kafka_consumer.end_offsets([tp]))[tp]
                last_offsets[KafkaPositioning.make_class_assignment_key(tp.topic, tp.partition)] = \
                    KafkaPositioning(tp.topic, tp.partition, offset)
            except KafkaTimeoutError as err:
                self.logger.exception('%s', err.__str__())
                raise ConsumerKafkaTimeoutError
            except UnsupportedVersionError as err:
                self.logger.exception('%s', err.__str__())
                raise err
        return last_offsets

    async def load_offsets(self, mod: str = 'earliest') -> None:
        """
        This method was call before consume topics, assign position to consumer

        Args:
            mod: Start position of consumer (earliest, latest, committed)

        Returns:
            None
        """
        self.logger.debug('Load offset mod : %s', mod)
        if not self._running:
            await self.start_consumer()

        if mod == 'latest':
            await self.seek_to_end()
        elif mod == 'earliest':
            await self.seek_to_beginning()
        elif mod == 'committed':
            await self.seek_to_last_commit()
        else:
            raise KafkaConsumerError

        self.__current_offsets = await self.get_current_offsets()
        self.__last_offsets = await self.get_last_offsets()

        if self._group_id is not None:
            self.__last_committed_offsets = await self.get_last_committed_offsets()
            for key, kafka_positioning in self.__last_committed_offsets.items():
                if kafka_positioning.get_current_offset() is None:
                    self.logger.debug('Seek to beginning, no committed offsets was found')
                    await self.seek_to_beginning(kafka_positioning)

    async def debug_print_all_msg(self):
        """
        Debug method, useful for display all msg was contained in assigned topic/partitions

        Returns:
            None
        """
        while True:
            message = await self._kafka_consumer.getone()
            self.logger.info('----------------------------------------------------------------------------------------')
            self.logger.info('Topic %s, Partition %s, Offset %s, Key %s, Value %s, Headers %s',
                             message.topic, message.partition, message.offset, message.key, message.value,
                             message.headers)
            self.logger.info('----------------------------------------------------------------------------------------')

    async def listen_records(self, mod: str = 'earliest') -> None:
        """
        Listens records from assigned topic / partitions

        Args:
            mod: Start position of consumer (earliest, latest, committed)

        Returns:
            None
        """
        if not self._running:
            await self.load_offsets(mod)

        self.pprint_consumer_offsets()

        async for msg in self._kafka_consumer:
            # Debug Display
            self.logger.debug("---------------------------------------------------------------------------------")
            self.logger.debug('New Message on consumer %s, Topic %s, Partition %s, Offset %s, '
                              'Key %s, Value %s, Headers %s', self._client_id, msg.topic, msg.partition,
                              msg.offset, msg.key, msg.value, msg.headers)
            self.pprint_consumer_offsets()
            self.logger.debug("---------------------------------------------------------------------------------")

            key = KafkaPositioning.make_class_assignment_key(msg.topic, msg.partition)
            self.__current_offsets[key].set_current_offset(msg.offset)
            if self._transactional_manager is not None:
                self._transactional_manager.set_ctx(KafkaTransactionContext(msg.topic, msg.partition,
                                                                            msg.offset, self._group_id))
            # self.last_offsets = await self.get_last_offsets()

            sleep_duration_in_ms = self._retry_interval
            for retries in range(0, self._max_retries):
                try:
                    record_class = msg.value['record_class']
                    handler_class = msg.value['handler_class']

                    if handler_class is None:
                        self.logger.debug('Empty handler')
                        break

                    self.logger.debug('Event name : %s  Event content :\n%s',
                                      record_class.event_name(), record_class.__dict__)

                    # Calls handle if event is instance BaseHandler
                    if isinstance(handler_class, BaseEventHandler):
                        transactional = await handler_class.handle(event=record_class)
                    elif isinstance(handler_class, BaseCommandHandler):
                        transactional = await handler_class.execute(event=record_class)
                    elif isinstance(handler_class, BaseResultHandler):
                        transactional = await handler_class.on_result(event=record_class)
                    else:
                        # Otherwise raise KafkaConsumerUnknownHandler
                        raise UnknownHandler

                    # If result is none (no transactional process), check if consumer has an
                    # group_id (mandatory to commit in Kafka)
                    if transactional is None and self._group_id is not None:
                        # Check if next commit was possible (Kafka offset)
                        if self.__last_committed_offsets[key] is None or \
                                self.__last_committed_offsets[key].get_current_offset() <= \
                                self.__current_offsets[key].get_current_offset():

                            self.logger.debug('Commit msg %s in topic %s partition %s offset %s',
                                              record_class.event_name(), msg.topic, msg.partition,
                                              self.__current_offsets[key].get_current_offset() + 1)
                            tp = self.__current_offsets[key].to_topics_partition()
                            await self._kafka_consumer.commit(
                                {tp: self.__current_offsets[key].get_current_offset() + 1})
                            self.__last_committed_offsets[key].set_current_offset(msg.offset + 1)

                    # Transactional process no commit
                    elif transactional:
                        self.logger.debug('Transaction end')
                        self.__current_offsets = await self.get_current_offsets()
                        self.__last_committed_offsets = await self.get_last_committed_offsets()
                    # Otherwise raise KafkaConsumerUnknownHandlerReturn
                    elif transactional is None and self._group_id is None:
                        pass
                    else:
                        raise UnknownHandlerReturn

                    # Break if everything was successfully processed
                    break
                except UninitializedStore as err:
                    self.logger.exception('%s', err.__str__())
                    retries = 0
                    await asyncio.sleep(10)
                except IllegalStateError as err:
                    self.logger.exception('%s', err.__str__())
                    raise NoPartitionAssigned
                except ValueError as err:
                    self.logger.exception('%s', err.__str__())
                    raise OffsetError
                except CommitFailedError as err:
                    self.logger.exception('%s', err.__str__())
                    raise err
                except (KafkaError, HandlerException) as err:
                    self.logger.exception('%s', err.__str__())
                    sleep_duration_in_s = int(sleep_duration_in_ms / 1000)
                    await asyncio.sleep(sleep_duration_in_s)
                    sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff
                    if retries not in range(0, self._max_retries):
                        await self.stop_consumer()
                        self.logger.error('Max retries, close consumer and exit')
                        exit(1)

    async def _refresh_offsets(self) -> None:
        """
        This method refresh __current_offsets / __last_offsets / __last_committed_offsets

        Returns:
            None
        """
        self.logger.debug('Call refresh offsets')

        self.__current_offsets = await self.get_current_offsets()
        self.__last_offsets = await self.get_last_offsets()

        if self._group_id is not None:
            self.__last_committed_offsets = await self.get_last_committed_offsets()
        else:
            raise IllegalOperation

    async def check_if_store_is_ready(self) -> None:
        """ If store is ready consumer set store initialize flag to true

        Returns:
            None
        """

        # Check if local store is initialize
        self.logger.info('Started check_if_store_is_ready')
        if not self._store_manager.get_local_store().get_persistency().is_initialize():
            key = KafkaPositioning.make_class_assignment_key(self._store_manager.get_topic_store(),
                                                             self._client.cur_instance)
            if self.__last_offsets[key].get_current_offset() == 0:
                self._store_manager.__getattribute__('_initialize_local_store').__call__()
                self.logger.info('Local store was initialized')
            elif self.__current_offsets[key].get_current_offset() == self.__last_offsets[key].get_current_offset():
                self._store_manager.__getattribute__('_initialize_local_store').__call__()
                self.logger.info('Local store was initialized')

        # Check if global store is initialize
        if not self._store_manager.get_global_store().get_persistency().is_initialize():
            for key, positioning in self.__last_offsets.items():
                if self._client.cur_instance != positioning.get_partition():
                    if positioning.get_current_offset() == 0:
                        continue
                    elif positioning.get_current_offset() == self.__current_offsets[key].get_current_offset():
                        continue
                    else:
                        break
            else:
                self._store_manager.__getattribute__('_initialize_global_store').__call__()
                self.logger.info('Global store was initialized')

    async def listen_store_records(self, rebuild: bool = False) -> None:
        """
        Listens events for store construction

        Args:
            rebuild (bool): if true consumer seek to fist offset for rebuild own state

        Returns:
            None
        """
        if self._store_manager is None:
            raise KeyError

        self.logger.info('Start listen store records')

        await self.start_consumer()

        await self._store_manager.__getattribute__('_initialize_stores').__call__()

        if not self._running:
            raise KafkaConsumerError('Fail to start tongaConsumer', 500)

        # Check if store is ready
        await self._refresh_offsets()

        await self.check_if_store_is_ready()
        self.pprint_consumer_offsets()

        async for msg in self._kafka_consumer:
            positioning_key = KafkaPositioning.make_class_assignment_key(msg.topic, msg.partition)
            self.__current_offsets[positioning_key].set_current_offset(msg.offset)

            # Debug Display
            self.logger.debug("---------------------------------------------------------------------------------")
            self.logger.debug('New Message on consumer %s, Topic %s, Partition %s, Offset %s, '
                              'Key %s, Value %s, Headers %s', self._client_id, msg.topic, msg.partition,
                              msg.offset, msg.key, msg.value, msg.headers)
            self.pprint_consumer_offsets()
            self.logger.debug("---------------------------------------------------------------------------------")

            # Check if store is ready
            await self.check_if_store_is_ready()

            sleep_duration_in_ms = self._retry_interval
            for retries in range(0, self._max_retries):
                try:
                    record_class: BaseRecord = msg.value['record_class']
                    handler_class: BaseStoreRecordHandler = msg.value['handler_class']

                    self.logger.debug('Store event name : %s\nEvent content :\n%s\n',
                                      record_class.event_name(), record_class.__dict__)

                    positioning = self.__current_offsets[positioning_key]
                    if self._client.cur_instance == msg.partition:
                        # Calls local_state_handler if event is instance BaseStorageBuilder
                        if rebuild and not self._store_manager.get_local_store().get_persistency().is_initialize():
                            if isinstance(record_class, StoreRecord):
                                self.logger.debug('Call local_store_handler')
                                await handler_class.local_store_handler(store_record=record_class,
                                                                        positioning=positioning)
                            else:
                                raise UnknownStoreRecordHandler
                    elif self._client.cur_instance != msg.partition:
                        if isinstance(record_class, StoreRecord):
                            self.logger.debug('Call global_store_handler')
                            await handler_class.global_store_handler(store_record=record_class, positioning=positioning)
                        else:
                            raise UnknownStoreRecordHandler

                    # Check if store is ready
                    await self.check_if_store_is_ready()

                    # Break if everything was successfully processed
                    break
                except IllegalStateError as err:
                    self.logger.exception('%s', err.__str__())
                    raise NoPartitionAssigned
                except ValueError as err:
                    self.logger.exception('%s', err.__str__())
                    raise OffsetError
                except CommitFailedError as err:
                    self.logger.exception('%s', err.__str__())
                    raise err
                except (KafkaError, HandlerException) as err:
                    self.logger.exception('%s', err.__str__())
                    sleep_duration_in_s = int(sleep_duration_in_ms / 1000)
                    await asyncio.sleep(sleep_duration_in_s)
                    sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff
                    if retries not in range(0, self._max_retries):
                        await self.stop_consumer()
                        self.logger.error('Max retries, close consumer and exit')
                        exit(1)

    def is_lag(self) -> bool:
        """
        Consumer has lag ?

        Returns:
            bool: True if consumer is lagging otherwise return false and consumer is up to date
        """
        if self.__last_offsets == self.__current_offsets:
            return False
        return True

    async def seek_to_beginning(self, positioning: BasePositioning = None) -> None:
        """
        Seek to fist offset, mod 'earliest'.
        If positioning is None consumer will seek all assigned partition to beginning

        Args:
            positioning (BasePositioning): Positioning class contain (topic name / partition number)

        Returns:
            None
        """
        if not self._running:
            await self.start_consumer()
        if positioning is not None:
            try:
                await self._kafka_consumer.seek_to_beginning(positioning.to_topics_partition())
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            except TypeError as err:
                self.logger.exception('%s', err.__str__())
                raise TopicPartitionError
            self.logger.debug('Seek to beginning for topic : %s, partition : %s', positioning.get_partition(),
                              positioning.get_partition())
        else:
            try:
                await self._kafka_consumer.seek_to_beginning()
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            self.logger.debug('Seek to beginning for all topics & partitions')

    async def seek_to_end(self, positioning: BasePositioning = None) -> None:
        """
        Seek to latest offset, mod 'latest'.
        If positioning is None consumer will seek all assigned partition to end

        Args:
            positioning (BasePositioning): Positioning class contain (topic name / partition number)

        Returns:
            None
        """
        if not self._running:
            await self.start_consumer()
        if positioning is not None:
            try:
                await self._kafka_consumer.seek_to_end(positioning.to_topics_partition())
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            except TypeError as err:
                self.logger.exception('%s', err.__str__())
                raise TopicPartitionError
            self.logger.debug('Seek to end for topic : %s, partition : %s', positioning.get_topics(),
                              positioning.get_partition())
        else:
            try:
                await self._kafka_consumer.seek_to_end()
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            self.logger.debug('Seek to end for all topics & partitions')

    async def seek_to_last_commit(self, positioning: BasePositioning = None) -> None:
        """
        Seek to last committed offsets, mod 'committed'
        If positioning is None consumer will seek all assigned partition to last committed offset

        Args:
            positioning (BasePositioning): Positioning class contain (topic name / partition number / offset number)

        Returns:
            None
        """
        if self._group_id is None:
            raise IllegalOperation
        if not self._running:
            await self.start_consumer()
        if positioning:
            try:
                await self._kafka_consumer.seek_to_committed(positioning.to_topics_partition())
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            except TypeError as err:
                self.logger.exception('%s', err.__str__())
                raise TopicPartitionError
            self.logger.debug('Seek to last committed for topic : %s, partition : %s', positioning.get_topics(),
                              positioning.get_partition())
        else:
            try:
                await self._kafka_consumer.seek_to_committed()
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            self.logger.debug('Seek to last committed for all topics & partitions')

    async def seek_custom(self, positioning: BasePositioning) -> None:
        """
        Seek to custom offsets

        Args:
            positioning (BasePositioning): Positioning class contain (topic name / partition number / offset number)

        Returns:
            None
        """
        if not self._running:
            await self.start_consumer()
        if positioning is not None:
            try:
                await self._kafka_consumer.seek(positioning.to_topics_partition(), positioning.get_current_offset())
            except ValueError as err:
                self.logger.exception('%s', err.__str__())
                raise OffsetError
            except TypeError as err:
                self.logger.exception('%s', err.__str__())
                raise TopicPartitionError
            except IllegalStateError as err:
                self.logger.exception('%s', err.__str__())
                raise NoPartitionAssigned
            self.logger.debug('Custom seek for topic : %s, partition : %s, offset : %s',
                              positioning.get_topics(), positioning.get_partition(), positioning.get_current_offset())
        else:
            raise KafkaConsumerError

    async def _make_manual_commit(self, to_commit: List[BasePositioning]):
        commits = {}
        for positioning in to_commit:
            commits[positioning.to_topics_partition()] = positioning.get_current_offset()

        await self._kafka_consumer.commit(commits)

    async def subscriptions(self) -> frozenset:
        """
        Get list of subscribed topic

        Returns:
            frozenset: List of subscribed topic

        """
        if not self._running:
            await self.start_consumer()
        return self._kafka_consumer.subscription()

    def pprint_consumer_offsets(self) -> None:
        """
        Debug tool, print all consumer position

        Returns:
            None
        """
        self.logger.debug('Client ID = %s', self._client_id)

        self.logger.debug('Current Offset = %s', [positioning.pprint() for key, positioning in
                                                  self.__current_offsets.items()])
        self.logger.debug('Last Offset = %s', [positioning.pprint() for key, positioning in
                                               self.__last_offsets.items()])

        self.logger.debug('Last committed offset = %s', [positioning.pprint() for key, positioning in
                                                         self.__last_committed_offsets.items()])

    def get_consumer(self) -> AIOKafkaConsumer:
        """
        Get aiokafka consumer

        Returns:
            AIOKafkaConsumer: Current instance of AIOKafkaConsumer
        """
        return self._kafka_consumer

    def get_offset_bundle(self) -> Dict[str, Dict[str, BasePositioning]]:
        """
        Return a bundle with each assigned assigned topic/partition with current, latest, last committed
        topic/partition as dict

        Returns:
            Dict[str, Dict[TopicPartition, int]]: Contains current_offset / last_offset / last_committed_offset
        """
        return {
            'current_offset': self.__current_offsets.copy(),
            'last_offset': self.__last_offsets.copy(),
            'last_committed_offset': self.__last_committed_offsets.copy()
        }

    def get_current_offset(self) -> Dict[str, BasePositioning]:
        """
        Return current offset of each assigned topic/partition

        Returns:
            Dict[str, BasePositioning]: Dict contains current offset of each assigned partition
        """
        return self.__current_offsets.copy()

    def get_last_offset(self) -> Dict[str, BasePositioning]:
        """
        Return last offset of each assigned topic/partition

        Returns:
            Dict[str, BasePositioning]: Dict contains latest offset of each assigned partition
        """
        return self.__last_offsets.copy()

    def get_last_committed_offset(self) -> Dict[str, BasePositioning]:
        """
        Return last committed offset of each assigned topic/partition

        Returns:
            Dict[str, BasePositioning]: Dict contains last committed offset of each assigned partition
        """
        return self.__last_committed_offsets.copy()
Пример #9
0
async def test_kafka_consumer_partition(
        asyncio_kafka_consumer: AIOKafkaConsumer):
    assert [
        TopicPartition(KAFKA_TEST_TOPIC, KAFKA_PARTITION),
    ] == sorted(list(asyncio_kafka_consumer.assignment()))