Exemplo n.º 1
0
async def get_offsets_scope_of_topic_and_set_to_start(consumer, topic):
    partition_set = consumer.partitions_for_topic(topic)
    logging.info((topic, partition_set))
    await consumer.seek_to_end()
    last_offset = None
    for partition in partition_set:
        tp = TopicPartition(topic, partition)
        offset = await consumer.position(tp)
        if (offset and not last_offset) or (offset and last_offset
                                            and offset > last_offset):
            last_offset = offset
        logging.info((topic, partition, last_offset))

    await consumer.seek_to_beginning()
    first_offset = None
    for partition in partition_set:
        tp = TopicPartition(topic, partition)
        offset = await consumer.position(tp)
        if (offset and not first_offset) or (offset and last_offset
                                             and offset < first_offset):
            first_offset = offset
        logging.info((topic, partition, first_offset))
    if last_offset and not first_offset:
        first_offset = 0
    return first_offset, last_offset
Exemplo n.º 2
0
async def test_initialize_store_builder(get_store_manager):
    store_builder = get_store_manager

    await store_builder.initialize_store_builder()

    assigned_partitions = list()
    last_offsets = dict()
    assigned_partitions.append(TopicPartition('test-store', 0))
    last_offsets[TopicPartition('test-store', 0)] = 0
    test_store_metadata_local = BaseStoreMetaData(assigned_partitions,
                                                  last_offsets, 0, 1)

    local_store = store_builder.get_local_store()
    local_store_metadata = await local_store.get_metadata()

    assert local_store_metadata.to_dict() == test_store_metadata_local.to_dict(
    )

    assigned_partitions = list()
    last_offsets = dict()
    for i in range(0, 1):
        assigned_partitions.append(TopicPartition('test-store', 0))
    for j in range(0, 1):
        last_offsets[TopicPartition('test-store', 0)] = 0
    test_store_metadata_global = BaseStoreMetaData(assigned_partitions,
                                                   last_offsets, 0, 1)

    global_store = store_builder.get_global_store()
    global_store_metadata = await global_store.get_metadata()

    assert global_store_metadata.to_dict(
    ) == test_store_metadata_global.to_dict()
Exemplo n.º 3
0
async def test_local_memory_get_metadata(get_local_memory_store_connection):
    local_memory_store = get_local_memory_store_connection
    assigned_partitions = [TopicPartition('test', 2)]
    last_offsets = {TopicPartition('test', 2): 0}
    current_instance = 2
    nb_replica = 4
    db_meta = await local_memory_store.get_metadata()
    local_meta = BaseStoreMetaData(assigned_partitions, last_offsets,
                                   current_instance, nb_replica).to_dict()
    assert db_meta.to_dict() == local_meta
Exemplo n.º 4
0
async def test_global_memory_store_set_store_position(get_global_memory_store_connection):
    global_memory_store = get_global_memory_store_connection

    assigned_partitions = [TopicPartition('test', 2)]
    last_offsets = {TopicPartition('test', 2): 0}
    current_instance = 2
    nb_replica = 4
    await global_memory_store.set_store_position(current_instance, nb_replica, assigned_partitions, last_offsets)

    db_meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica)
    r_db_meta = await global_memory_store.get_metadata()
    assert r_db_meta.to_dict() == db_meta.to_dict()
Exemplo n.º 5
0
async def test_global_memory_update_metadata_tp_offset(get_global_memory_store_connection):
    global_memory_store = get_global_memory_store_connection
    tp = TopicPartition('test', 2)
    await global_memory_store.update_metadata_tp_offset(tp, 4)

    assigned_partitions = [TopicPartition('test', 2)]
    last_offsets = {TopicPartition('test', 2): 4}
    current_instance = 2
    nb_replica = 4
    global_meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica).to_dict()

    db_meta = await global_memory_store.get_metadata()
    assert db_meta.to_dict() == global_meta
Exemplo n.º 6
0
async def test_global_memory_store_get_all(get_global_memory_store_connection):
    global_memory_store = get_global_memory_store_connection
    await global_memory_store.global_set('test1', b'value1')
    await global_memory_store.global_set('test2', b'value2')

    assigned_partitions = [TopicPartition('test', 2)]
    last_offsets = {TopicPartition('test', 2): 0}
    current_instance = 2
    nb_replica = 4
    meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica)

    assert await global_memory_store.get_all() == {'test1': b'value1', 'test2': b'value2',
                                                   'metadata': bytes(str(meta.to_dict()), 'utf-8')}
Exemplo n.º 7
0
    async def seek_custom(self,
                          topic: str = None,
                          partition: int = None,
                          offset: int = None) -> None:
        """
        Seek to custom offsets

        Args:
            partition (int): Partition value
            topic (str): Topic name
            offset (int): Offset value

        Returns:
            None
        """
        if not self._running:
            await self.start_consumer()
        if partition is not None and topic is not None and offset is not None:
            try:
                await self._kafka_consumer.seek(
                    TopicPartition(topic, partition), offset)
            except ValueError as err:
                self.logger.exception(f'{err.__str__()}')
                raise OffsetError
            except TypeError as err:
                self.logger.exception(f'{err.__str__()}')
                raise TopicPartitionError
            except IllegalStateError as err:
                self.logger.exception(f'{err.__str__()}')
                raise NoPartitionAssigned
            self.logger.debug(
                f'Custom seek for topic : {topic}, partition : {partition}, offset : {offset}'
            )
        else:
            raise KafkaConsumerError('Custom seek need 3 argv', 500)
Exemplo n.º 8
0
    async def seek_to_end(self,
                          partition: int = None,
                          topic: str = None) -> None:
        """
        Seek to latest offset, mod 'latest'

        Args:
            partition (int): Partition value
            topic (str): Topic name

        Returns:
            None
        """
        if not self._running:
            await self.start_consumer()
        if partition is not None and topic is not None:
            try:
                await self._kafka_consumer.seek_to_end(
                    TopicPartition(topic, partition))
            except IllegalStateError as err:
                self.logger.exception(f'{err.__str__()}')
                raise NoPartitionAssigned
            except TypeError as err:
                self.logger.exception(f'{err.__str__()}')
                raise TopicPartitionError
            self.logger.debug(
                f'Seek to end for topic : {topic}, partition : {partition}')
        else:
            try:
                await self._kafka_consumer.seek_to_end()
            except IllegalStateError as err:
                self.logger.exception(f'{err.__str__()}')
                raise NoPartitionAssigned
            self.logger.debug(f'Seek to end for all topics & partitions')
Exemplo n.º 9
0
async def consume_by_topic(websocket, topic):
    consumer = await get_consumer()
    await consumer.start()
    tp = TopicPartition(topic, 0)
    consumer.assign([tp])

    current_offsets = await consumer.end_offsets([tp])
    target_offset = current_offsets[tp] - 200
    target_offset = 0 if target_offset < 0 else target_offset
    consumer.seek(tp, target_offset)

    try:
        counter = 0
        while websocket.client_state == WebSocketState.CONNECTED:
            msg_dict = await consumer.getmany(tp)

            for _, msgs in msg_dict.items():
                item_cnt = len(msgs)
                if item_cnt:
                    logging.info(f"retrieved {item_cnt} items at once")
                for msg in msgs:
                    await websocket.send_text(
                        json.dumps({
                            "message": msg.value.decode(),
                            "timestamp": msg.timestamp
                        }))

            counter += 1
            if counter % 10 == 0:
                await websocket.send_text("ping")
                await websocket.receive_text()
    except WebSocketDisconnect as e:
        logging.info("Exited", e)
    finally:
        await consumer.stop()
Exemplo n.º 10
0
async def seek_to_offset(consumer: AIOKafkaConsumer, topic: str, start: int = -1):
    """
    Seek to the last message in topic.
    """
    partition_number, offset = -1, -1
    # Loop through partitions and find the latest offset
    for p in consumer.partitions_for_topic(topic):
        tp = TopicPartition(topic, p)
        committed = await consumer.committed(tp)
        await consumer.seek_to_end(tp)
        last_offset = await consumer.position(tp)
        # print("topic: {} partition: {} committed: {} last: {}".format(topic, p, committed, last_offset))
        if offset < last_offset:
            offset = last_offset
            partition_number = p
    tp = TopicPartition(topic, partition_number)
    consumer.seek(tp, offset - start)
Exemplo n.º 11
0
 async def _seek_offsets(self, consumer, topics, offsets):
     for topic in topics:
         parts = consumer.partitions_for_topic(topic)
         for part in parts:
             tp = TopicPartition(topic, part)
             max_offsets = await consumer.end_offsets([tp])
             max_offsets = max_offsets[tp]
             if 0 > offsets or offsets > max_offsets:
                 raise ValueError("offsets out of range of value")
             consumer.seek(tp, offsets)
Exemplo n.º 12
0
    async def fetch_report(self):
        '''Get a single report in a context,
        commit only if handled succesfully.
        '''
        msg = await self._cons.getone()
        tp = TopicPartition(msg.topic, msg.partition)

        site = read_report(msg.value)
        print('Received', site, flush=True)
        yield site

        await self._cons.committed(tp)
Exemplo n.º 13
0
    async def _seek(self, topic, step):
        partition = self._consumer.partitions_for_topic(topic)
        if not partition:
            return
        pid = partition.pop()
        tp = TopicPartition(topic, pid)

        try:
            position = await self._consumer.position(tp)
        except IllegalStateError:
            position = 0

        if position > 0:
            self._consumer.seek(tp, position + step)
Exemplo n.º 14
0
 async def _do_some_work(self, work, topics, group_id, offsets, listener, bootstrap_servers, enable_commit, **kwargs):
     consumer = AIOKafkaConsumer(loop=self.loop,
                                 bootstrap_servers=bootstrap_servers,
                                 group_id=group_id,
                                 fetch_max_wait_ms=self.fetch_max_wait_ms,
                                 max_partition_fetch_bytes=self.max_partition_fetch_bytes,
                                 request_timeout_ms=self.request_timeout_ms,
                                 auto_offset_reset=self.auto_offset_reset,
                                 enable_auto_commit=self.enable_auto_commit,
                                 auto_commit_interval_ms=self.auto_commit_interval_ms,
                                 check_crcs=self.check_crcs,
                                 metadata_max_age_ms=self.metadata_max_age_ms,
                                 heartbeat_interval_ms=self.heartbeat_interval_ms,
                                 session_timeout_ms=self.session_timeout_ms,
                                 exclude_internal_topics=self.exclude_internal_topics,
                                 connections_max_idle_ms=self.connections_max_idle_ms,
                                 **kwargs)
     consumer.subscribe(topics=topics, listener=listener)
     await consumer.start()
     if offsets is not None:
         await self._seek_offsets(consumer, topics, offsets)
     try:
         async for msg in consumer:
             try:
                 if msg is None:
                     continue
                 await work(msg)
                 if enable_commit:
                     meta = "Some utf-8 metadata"
                     tp = TopicPartition(msg.topic, msg.partition)
                     offsets = {tp: OffsetAndMetadata(msg.offset + 1, meta)}
                     await consumer.commit(offsets)
             except OffsetOutOfRangeError as err:
                 tps = err.args[0].keys()
                 await consumer.seek_to_beginning(*tps)
                 continue
             except Exception as e:
                 root_logger.error(f'{traceback.format_exc()}')
                 continue
     except Exception as e:
         raise e
     finally:
         await consumer.stop()
Exemplo n.º 15
0
    async def _run(self):
        self.status = RedisStatus(prefix=self.stat_cfg.prefix,
                                  host=self.stat_cfg.host,
                                  pwd=self.stat_cfg.pwd,
                                  db=self.stat_cfg.db)
        await self.status.open()

        for part in self.queue_para.partitions:
            topic_partition = TopicPartition(self.topic, part)
            offset = await self.status.read(self.topic, partition=part)
            logging.info(
                f"thread:{self.name} {self.topic} read offset {offset}")
            self.partitions[topic_partition] = offset

        await self.status.close()

        # tasks = [asyncio.ensure_future(self._queue_client(partition)) for partition in self.partitions]
        tasks = [
            asyncio.ensure_future(self._pull(self.partitions)),
            asyncio.ensure_future(self._detect())
        ]
        await asyncio.gather(*tasks)
Exemplo n.º 16
0
    async def delete_from_local_store(self, key: str) -> RecordMetadata:
        """
        Delete from local store

        Args:
            key (str): Object key as string

        Returns:
            None
        """
        if self._local_store.is_initialized():
            store_builder = StoreRecord(key=key, ctype='del', value=b'')
            try:
                record_metadata: RecordMetadata = await self._store_producer.send_and_await(store_builder,
                                                                                            self._topic_store)
                await self._local_store.update_metadata_tp_offset(TopicPartition(record_metadata.topic,
                                                                                 record_metadata.partition),
                                                                  record_metadata.offset)
            except (KeyErrorSendEvent, ValueErrorSendEvent, TypeErrorSendEvent, FailToSendEvent):
                raise FailToSendStoreRecord
            await self._local_store.delete(key)
            return record_metadata
        else:
            raise UninitializedStore
Exemplo n.º 17
0
async def pull(loop, server, topic, group_id, batch_size=1, shuffle=False):
    client = AIOKafkaConsumer(
        topic,
        loop=loop,
        bootstrap_servers=server,
        group_id=group_id,
        auto_offset_reset='earliest',
        enable_auto_commit=False,
    )
    await client.start()

    partitions = client.partitions_for_topic(topic)
    while partitions is None:
        await asyncio.sleep(0.1)

    partitions = list(partitions)
    partitions = [TopicPartition(topic, partition) for partition in partitions]
    #current_offsets = await client.beginning_offsets(partitions)
    end_offsets = await client.end_offsets(partitions)
    current_partition = 0
    done = False

    async def next_partition(current_partition):
        current_partition += 1  # todo recursive
        if current_partition >= len(partitions):
            return None

        current_offset = await client.position(partitions[current_partition])
        if current_offset >= end_offsets[partitions[current_partition]]:
            current_partition = await next_partition(current_partition)
        print("remaining record: {}, partition: {}".format(
            remaining_records, current_partition))
        return current_partition

    current_offset = await client.position(partitions[current_partition])
    if current_offset >= end_offsets[partitions[current_partition]]:
        done = True

    while done is False:
        remaining_records = batch_size
        batch = []
        while remaining_records > 0:
            msg = await client.getone(partitions[current_partition])
            batch.append(msg)
            remaining_records -= 1

            current_offset = await client.position(
                partitions[current_partition])
            if current_offset >= end_offsets[partitions[current_partition]]:
                current_partition = await next_partition(current_partition)
                print("remaining record: {}, partition: {}".format(
                    remaining_records, current_partition))
                if current_partition is None:
                    done = True
                    break

        if len(batch) > 0:
            yield (batch)
        '''
        data = await client.getmany(max_records=batch_size)
        print(data)
        #for tp, messages in data.items():
        messages = data[topic]
        if len(messages) > 0:
            batch = []
            for msg in messages:
                batch.append(msg)
            yield(batch)
        else:
            done = True
        '''

    await client.stop()
Exemplo n.º 18
0
    async def listen_event(self, mod: str = 'earliest') -> None:
        """
        Listens events from assigned topic / partitions

        Args:
            mod: Start position of consumer (earliest, latest, committed)

        Returns:
            None
        """
        if not self._running:
            await self.load_offsets(mod)

        self.pprint_consumer_offsets()

        # await self.getone()
        async for msg in self._kafka_consumer:
            # Debug Display
            self.logger.debug(
                "---------------------------------------------------------------------------------"
            )
            self.logger.debug(
                f'New Message on consumer {self._client_id}, Topic {msg.topic}, '
                f'Partition {msg.partition}, Offset {msg.offset}, Key {msg.key}, Value {msg.value},'
                f'Headers {msg.headers}')
            self.pprint_consumer_offsets()
            self.logger.debug(
                "---------------------------------------------------------------------------------"
            )

            tp = TopicPartition(msg.topic, msg.partition)
            self.__current_offsets[tp] = msg.offset
            # self.last_offsets = await self.get_last_offsets()

            sleep_duration_in_ms = self._retry_interval
            for retries in range(0, self._max_retries):
                try:
                    decode_dict = msg.value
                    event_class = decode_dict['event_class']
                    handler_class = decode_dict['handler_class']

                    logging.debug(
                        f'Event name : {event_class.event_name()}  Event content :\n{event_class.__dict__}'
                    )

                    # Calls handle if event is instance BaseHandler
                    if isinstance(handler_class, BaseEventHandler):
                        result = await handler_class.handle(
                            event=event_class,
                            group_id=self._group_id,
                            tp=tp,
                            offset=msg.offset)
                    elif isinstance(handler_class, BaseCommandHandler):
                        result = await handler_class.execute(
                            event=event_class,
                            group_id=self._group_id,
                            tp=tp,
                            offset=msg.offset)
                    elif isinstance(handler_class, BaseResultHandler):
                        result = await handler_class.on_result(
                            event=event_class,
                            group_id=self._group_id,
                            tp=tp,
                            offset=msg.offset)
                    else:
                        # Otherwise raise KafkaConsumerUnknownHandler
                        raise UnknownHandler

                    # If result is none (no transactional process), check if consumer has an
                    # group_id (mandatory to commit in Kafka)
                    if result is None and self._group_id is not None:
                        # Check if next commit was possible (Kafka offset)
                        if self.__last_committed_offsets[tp] is None or \
                                self.__last_committed_offsets[tp] <= self.__current_offsets[tp]:
                            self.logger.debug(
                                f'Commit msg {event_class.event_name()} in topic {msg.topic} partition '
                                f'{msg.partition} offset {self.__current_offsets[tp] + 1}'
                            )
                            await self._kafka_consumer.commit(
                                {tp: self.__current_offsets[tp] + 1})
                            self.__last_committed_offsets[tp] = msg.offset + 1

                    # Transactional process no commit
                    elif result == 'transaction':
                        self.logger.debug(f'Transaction end')
                        self.__current_offsets = await self.get_current_offsets(
                        )
                        self.__last_committed_offsets = await self.get_last_committed_offsets(
                        )
                    # Otherwise raise KafkaConsumerUnknownHandlerReturn
                    else:
                        raise UnknownHandlerReturn

                    # Break if everything was successfully processed
                    break
                except IllegalStateError as err:
                    self.logger.exception(f'{err.__str__()}')
                    raise NoPartitionAssigned
                except ValueError as err:
                    self.logger.exception(f'{err.__str__()}')
                    raise OffsetError
                except CommitFailedError as err:
                    self.logger.exception(f'{err.__str__()}')
                    raise err
                except (KafkaError, HandlerException) as err:
                    self.logger.exception(f'{err.__str__()}')
                    sleep_duration_in_s = int(sleep_duration_in_ms / 1000)
                    await asyncio.sleep(sleep_duration_in_s)
                    sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff
                    if retries not in range(0, self._max_retries):
                        await self.stop_consumer()
                        logging.error(f'Max retries, close consumer and exit')
                        exit(1)
Exemplo n.º 19
0
    async def listen_store_records(self, rebuild: bool = False) -> None:
        """
        Listens events for store construction

        Args:
            rebuild (bool): if true consumer seek to fist offset for rebuild own state

        Returns:
            None
        """
        if self._store_builder is None:
            raise KeyError

        self.logger.info('Start listen store records')

        await self.start_consumer()
        await self._store_builder.initialize_store_builder()

        if not self._running:
            raise KafkaConsumerError('Fail to start tongaConsumer', 500)

        # Check if store is ready
        self.check_if_store_is_ready()
        self.pprint_consumer_offsets()

        async for msg in self._kafka_consumer:
            # Debug Display
            self.logger.debug(
                "---------------------------------------------------------------------"
            )
            self.logger.debug(
                f'New Message on store builder consumer {self._client_id}, Topic {msg.topic}, '
                f'Partition {msg.partition}, Offset {msg.offset}, Key {msg.key}, Value {msg.value},'
                f'Headers {msg.headers}')
            self.pprint_consumer_offsets()
            self.logger.debug(
                "---------------------------------------------------------------------"
            )

            # Check if store is ready
            self.check_if_store_is_ready()

            tp = TopicPartition(msg.topic, msg.partition)
            self.__current_offsets[tp] = msg.offset

            sleep_duration_in_ms = self._retry_interval
            for retries in range(0, self._max_retries):
                try:
                    decode_dict = msg.value
                    event_class: BaseModel = decode_dict['event_class']
                    handler_class: BaseStoreRecordHandler = decode_dict[
                        'handler_class']

                    logging.debug(
                        f'Store event name : {event_class.event_name()}\nEvent '
                        f'content :\n{event_class.__dict__}\n')

                    result = None
                    if msg.partition == self._store_builder.get_current_instance(
                    ):
                        # Calls local_state_handler if event is instance BaseStorageBuilder
                        if rebuild and not self._store_builder.get_local_store(
                        ).is_initialized():
                            if isinstance(event_class, BaseStoreRecord):
                                result = await handler_class.local_store_handler(
                                    store_record=event_class,
                                    group_id=self._group_id,
                                    tp=tp,
                                    offset=msg.offset)
                            else:
                                raise UnknownStoreRecordHandler
                    elif msg.partition != self._store_builder.get_current_instance(
                    ):
                        if isinstance(event_class, BaseStoreRecord):
                            result = await handler_class.global_store_handler(
                                store_record=event_class,
                                group_id=self._group_id,
                                tp=tp,
                                offset=msg.offset)
                        else:
                            raise UnknownStoreRecordHandler

                    # If result is none (no transactional process), check if consumer has an
                    # group_id (mandatory to commit in Kafka)
                    # TODO Add commit store later V2
                    # if result is None and self._group_id is not None:
                    #     # Check if next commit was possible (Kafka offset)
                    #     if self.__last_committed_offsets[tp] is None or \
                    #             self.__last_committed_offsets[tp] <= self.__current_offsets[tp]:
                    #         self.logger.debug(f'Commit msg {event_class.event_name()} in topic {msg.topic} partition '
                    #                           f'{msg.partition} offset {self.__current_offsets[tp] + 1}')
                    #         await self._kafka_consumer.commit({tp: self.__current_offsets[tp] + 1})
                    #         self.__last_committed_offsets[tp] = msg.offset + 1
                    # # Otherwise raise ValueError
                    # else:
                    #     raise ValueError

                    # Break if everything was successfully processed
                    break
                except IllegalStateError as err:
                    self.logger.exception(f'{err.__str__()}')
                    raise NoPartitionAssigned
                except ValueError as err:
                    self.logger.exception(f'{err.__str__()}')
                    raise OffsetError
                except CommitFailedError as err:
                    self.logger.exception(f'{err.__str__()}')
                    raise err
                except (KafkaError, HandlerException) as err:
                    self.logger.exception(f'{err.__str__()}')
                    sleep_duration_in_s = int(sleep_duration_in_ms / 1000)
                    await asyncio.sleep(sleep_duration_in_s)
                    sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff
                    if retries not in range(0, self._max_retries):
                        await self.stop_consumer()
                        logging.error(f'Max retries, close consumer and exit')
                        exit(1)
Exemplo n.º 20
0
async def kafka_consumer_prepare(brokers,
                                 topic: str,
                                 default_offspec="-0",
                                 parts_offspecs: dict = {},
                                 **consumer_kw):
    """
    :param brokers: bootstrap servers
    :param str topic: topic to consume from (... to assign with)
    :param dict parts_offspecs: (partition-id -> offset-spec), where offset-spec: "[+-]<integer>"

    :return: AIOKafkaConsumer
    """
    if isinstance(brokers, str):
        brokers = brokers.split(",")
    brokers = list(brokers)
    assert brokers, "Empty list of kafka brokers"

    # connect & fetch metadata
    c = AIOKafkaConsumer(bootstrap_servers=",".join(brokers),
                         enable_auto_commit=False,
                         group_id=None,
                         **consumer_kw)
    await c.start()

    all_topics = await c.topics()
    assert topic in all_topics, str((topic, all_topics))
    topic_parts = c.partitions_for_topic(topic)

    # perform seeking for each partition
    # --
    tp_ofssp = {
        TopicPartition(topic, _part_id):
        parts_offspecs.get(_part_id, default_offspec)
        for _part_id in topic_parts
    }
    c.assign(list(tp_ofssp))
    from pprint import pprint
    pprint(tp_ofssp)
    pprint(parts_offspecs)
    await c.start(
    )  # NB: "restart" is necessary for the assignment to propagate
    #     across all the aiokafka's (sic!) abstraction layers
    assert c.assignment().union(tp_ofssp) == tp_ofssp.keys()

    # --
    tp_ranges = {
        tp: [beg, None]
        for tp, beg in (await c.beginning_offsets(list(tp_ofssp))).items()
    }
    for tp, end in (await c.end_offsets(list(tp_ofssp))).items():
        tp_ranges[tp][-1] = end

    for tp, offspec in tp_ofssp.items():
        beg, end = tp_ranges[tp]
        offs = None  #:int
        # ---
        if isinstance(offspec, int):
            offs = int(offspec)
        elif isinstance(offspec, str):
            offs = int(offspec[1:] or offspec)
            if (len(offspec) > 1):
                if offspec[0] in ("+", " "):
                    offs += beg
                elif offspec[0] == "-":
                    offs = end - offs
                else:
                    try:
                        offs = int(offspec)  # trivial case: string-encoded int
                    except ValueError:
                        raise ValueError(
                            "Invalid partition offset specifier: "
                            "unknown prefix '%s' (0x%.2x)" %
                            (offspec[0], ord(offspec[0])), offspec)
        else:
            raise ValueError("Invalid partition offset specifier: " % offspec,
                             offspec, type(offspec))
        # ---
        if (offs < beg): offs = beg
        elif (offs > end): offs = end
        # ---
        assert isinstance(offs, int)
        c.seek(tp, offs)
        #print(">> seek(%s:%d, %9d)" % (tp.topic, tp.partition, offs))

    # --
    return c
Exemplo n.º 21
0
def commit_offset(consumer: AIOKafkaConsumer, msg: ConsumerRecord):
    tp = TopicPartition(msg.topic, msg.partition)
    asyncio.create_task(consumer.commit({tp: msg.offset + 1}))
Exemplo n.º 22
0
    async def initialize_store_builder(self) -> None:
        """
        Initializes store builder, connect local & global store with tonga consumer.
        This function seek to last committed offset if store_metadata exist.

        Returns:
            None
        """
        # Initialize local store
        self._logger.info('Start initialize store builder')
        if isinstance(self._local_store, LocalStoreMemory):
            # If _local_store is an instance from LocalStoreMemory, auto seek to earliest position for rebuild
            self._logger.info('LocalStoreMemory seek to earliest')
            assigned_partitions = list()
            last_offsets = dict()
            assigned_partitions.append(TopicPartition(self._topic_store, self._current_instance))
            last_offsets[TopicPartition(self._topic_store, self._current_instance)] = 0
            await self._local_store.set_store_position(self._current_instance, self._nb_replica, assigned_partitions,
                                                       last_offsets)
            try:
                await self._store_consumer.load_offsets('earliest')
            except (TopicPartitionError, NoPartitionAssigned) as err:
                self._logger.exception(f'{err.__str__()}')
                raise CanNotInitializeStore
        else:
            try:
                # Try to get local_store_metadata, seek at last read offset
                local_store_metadata = await self._local_store.get_metadata()
            except StoreKeyNotFound:
                # If metadata doesn't exist in DB, auto seek to earliest position for rebuild
                assigned_partitions = list()
                last_offsets = dict()
                assigned_partitions.append(TopicPartition(self._topic_store, self._current_instance))
                last_offsets[TopicPartition(self._topic_store, self._current_instance)] = 0
                await self._local_store.set_store_position(self._current_instance, self._nb_replica,
                                                           assigned_partitions, last_offsets)
                try:
                    await self._store_consumer.load_offsets('earliest')
                except (TopicPartitionError, NoPartitionAssigned) as err:
                    self._logger.exception(f'{err.__str__()}')
                    raise CanNotInitializeStore
            else:
                # If metadata is exist in DB, , auto seek to last position
                try:
                    last_offset = local_store_metadata.last_offsets[
                        TopicPartition(self._topic_store, self._current_instance)]
                    await self._store_consumer.seek_custom(self._topic_store, self._current_instance, last_offset)
                except (OffsetError, TopicPartitionError, NoPartitionAssigned) as err:
                    self._logger.exception(f'{err.__str__()}')
                    raise CanNotInitializeStore
                await self._local_store.set_store_position(self._current_instance, self._nb_replica,
                                                           local_store_metadata.assigned_partitions,
                                                           local_store_metadata.last_offsets)

        # Initialize global store
        if isinstance(self._global_store, GlobalStoreMemory):
            # If _global_store is an instance from GlobalStoreMemory, auto seek to earliest position for rebuild
            self._logger.info('GlobalStoreMemory seek to earliest')
            assigned_partitions = list()
            last_offsets = dict()
            for i in range(0, self._nb_replica):
                assigned_partitions.append(TopicPartition(self._topic_store, i))
            for j in range(0, self._nb_replica):
                last_offsets[TopicPartition(self._topic_store, j)] = 0
            await self._global_store.set_store_position(self._current_instance, self._nb_replica, assigned_partitions,
                                                        last_offsets)
            try:
                await self._store_consumer.load_offsets('earliest')
            except (TopicPartitionError, NoPartitionAssigned) as err:
                self._logger.exception(f'{err.__str__()}')
                raise CanNotInitializeStore
        else:
            try:
                global_store_metadata = await self._global_store.get_metadata()
            except StoreKeyNotFound:
                # If metadata doesn't exist in DB
                assigned_partitions = list()
                last_offsets = dict()
                for i in range(0, self._nb_replica):
                    assigned_partitions.append(TopicPartition(self._topic_store, self._current_instance))
                for j in range(0, self._nb_replica):
                    last_offsets[TopicPartition(self._topic_store, self._current_instance)] = 0
                await self._global_store.set_store_position(self._current_instance, self._nb_replica,
                                                            assigned_partitions, last_offsets)
                try:
                    await self._store_consumer.load_offsets('earliest')
                except (TopicPartitionError, NoPartitionAssigned) as err:
                    self._logger.exception(f'{err.__str__()}')
                    raise CanNotInitializeStore
            else:
                # If metadata is exist in DB
                for tp, offset in global_store_metadata.last_offsets.items():
                    try:
                        await self._store_consumer.seek_custom(tp.topic, tp.partition, offset)
                    except (OffsetError, TopicPartitionError, NoPartitionAssigned) as err:
                        self._logger.exception(f'{err.__str__()}')
                        raise CanNotInitializeStore
                await self._global_store.set_store_position(self._current_instance, self._nb_replica,
                                                            global_store_metadata.assigned_partitions,
                                                            global_store_metadata.last_offsets)
Exemplo n.º 23
0
 def to_topics_partition(self) -> TopicPartition:
     return TopicPartition(topic=self._topic, partition=self._partition)
Exemplo n.º 24
0
    def assign(self, cluster: ClusterMetadata, members: Dict[str, ConsumerProtocolMemberMetadata]) \
            -> Dict[str, ConsumerProtocolMemberAssignment]:
        """Assign function was call by aiokafka for assign consumer on right topic partition.

        Args:
            cluster (ClusterMetadata):  Kafka-python cluster metadata (more detail in kafka-python documentation)
            members (Dict[str, ConsumerProtocolMemberMetadata]): members dict which contains
                                                                ConsumerProtocolMemberMetadata
                                                                (more detail in kafka-python documentation)

        Returns:
            Dict[str, ConsumerProtocolMemberAssignment]: dict which contain members and assignment protocol (more detail
                                                         in kafka-python documentation)
        """
        self.logger.info('Statefulset Partition Assignor')
        self.logger.debug('Cluster = %s\nMembers = %s', cluster, members)

        # Get all topic
        all_topics: Set = set()
        for key, metadata in members.items():
            self.logger.debug('Key = %s\nMetadata = %s', key, metadata)
            all_topics.update(metadata.subscription)

        # Get all partitions by topic name
        all_topic_partitions = []
        for topic in all_topics:
            partitions = cluster.partitions_for_topic(topic)
            if partitions is None:
                self.logger.warning('No partition metadata for topic %s', topic)
                continue
            for partition in partitions:
                all_topic_partitions.append(TopicPartition(topic, partition))
        # Sort partition
        all_topic_partitions.sort()

        # Create default dict with lambda
        assignment: DefaultDict[str, Any] = collections.defaultdict(lambda: collections.defaultdict(list))

        advanced_assignor_dict = self.get_advanced_assignor_dict(all_topic_partitions)

        for topic, partitions in advanced_assignor_dict.items():
            for member_id, member_data in members.items():
                # Loads member assignors data
                user_data = json.loads(member_data.user_data)
                # Get number of partitions by topic name
                topic_number_partitions = len(partitions)

                # Logic assignors if nb_replica as same as topic_numbers_partitions (used by StoreBuilder for
                # assign each partitions to right instance, in this case nb_replica is same as topic_number_partitions)
                if user_data['nb_replica'] == topic_number_partitions:
                    if user_data['assignor_policy'] == 'all':
                        for partition in partitions:
                            assignment[member_id][topic].append(partition)
                    elif user_data['assignor_policy'] == 'only_own':
                        if user_data['instance'] in partitions:
                            assignment[member_id][topic].append(partitions[user_data['instance']])
                    else:
                        raise BadAssignorPolicy

                else:
                    raise NotImplementedError

        self.logger.debug('Assignment = %s', assignment)

        protocol_assignment = {}
        for member_id in members:
            protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(self.version,
                                                                              sorted(assignment[member_id].items()),
                                                                              members[member_id].user_data)

        self.logger.debug('Protocol Assignment = %s', protocol_assignment)
        return protocol_assignment