async def get_offsets_scope_of_topic_and_set_to_start(consumer, topic): partition_set = consumer.partitions_for_topic(topic) logging.info((topic, partition_set)) await consumer.seek_to_end() last_offset = None for partition in partition_set: tp = TopicPartition(topic, partition) offset = await consumer.position(tp) if (offset and not last_offset) or (offset and last_offset and offset > last_offset): last_offset = offset logging.info((topic, partition, last_offset)) await consumer.seek_to_beginning() first_offset = None for partition in partition_set: tp = TopicPartition(topic, partition) offset = await consumer.position(tp) if (offset and not first_offset) or (offset and last_offset and offset < first_offset): first_offset = offset logging.info((topic, partition, first_offset)) if last_offset and not first_offset: first_offset = 0 return first_offset, last_offset
async def test_initialize_store_builder(get_store_manager): store_builder = get_store_manager await store_builder.initialize_store_builder() assigned_partitions = list() last_offsets = dict() assigned_partitions.append(TopicPartition('test-store', 0)) last_offsets[TopicPartition('test-store', 0)] = 0 test_store_metadata_local = BaseStoreMetaData(assigned_partitions, last_offsets, 0, 1) local_store = store_builder.get_local_store() local_store_metadata = await local_store.get_metadata() assert local_store_metadata.to_dict() == test_store_metadata_local.to_dict( ) assigned_partitions = list() last_offsets = dict() for i in range(0, 1): assigned_partitions.append(TopicPartition('test-store', 0)) for j in range(0, 1): last_offsets[TopicPartition('test-store', 0)] = 0 test_store_metadata_global = BaseStoreMetaData(assigned_partitions, last_offsets, 0, 1) global_store = store_builder.get_global_store() global_store_metadata = await global_store.get_metadata() assert global_store_metadata.to_dict( ) == test_store_metadata_global.to_dict()
async def test_local_memory_get_metadata(get_local_memory_store_connection): local_memory_store = get_local_memory_store_connection assigned_partitions = [TopicPartition('test', 2)] last_offsets = {TopicPartition('test', 2): 0} current_instance = 2 nb_replica = 4 db_meta = await local_memory_store.get_metadata() local_meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica).to_dict() assert db_meta.to_dict() == local_meta
async def test_global_memory_store_set_store_position(get_global_memory_store_connection): global_memory_store = get_global_memory_store_connection assigned_partitions = [TopicPartition('test', 2)] last_offsets = {TopicPartition('test', 2): 0} current_instance = 2 nb_replica = 4 await global_memory_store.set_store_position(current_instance, nb_replica, assigned_partitions, last_offsets) db_meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica) r_db_meta = await global_memory_store.get_metadata() assert r_db_meta.to_dict() == db_meta.to_dict()
async def test_global_memory_update_metadata_tp_offset(get_global_memory_store_connection): global_memory_store = get_global_memory_store_connection tp = TopicPartition('test', 2) await global_memory_store.update_metadata_tp_offset(tp, 4) assigned_partitions = [TopicPartition('test', 2)] last_offsets = {TopicPartition('test', 2): 4} current_instance = 2 nb_replica = 4 global_meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica).to_dict() db_meta = await global_memory_store.get_metadata() assert db_meta.to_dict() == global_meta
async def test_global_memory_store_get_all(get_global_memory_store_connection): global_memory_store = get_global_memory_store_connection await global_memory_store.global_set('test1', b'value1') await global_memory_store.global_set('test2', b'value2') assigned_partitions = [TopicPartition('test', 2)] last_offsets = {TopicPartition('test', 2): 0} current_instance = 2 nb_replica = 4 meta = BaseStoreMetaData(assigned_partitions, last_offsets, current_instance, nb_replica) assert await global_memory_store.get_all() == {'test1': b'value1', 'test2': b'value2', 'metadata': bytes(str(meta.to_dict()), 'utf-8')}
async def seek_custom(self, topic: str = None, partition: int = None, offset: int = None) -> None: """ Seek to custom offsets Args: partition (int): Partition value topic (str): Topic name offset (int): Offset value Returns: None """ if not self._running: await self.start_consumer() if partition is not None and topic is not None and offset is not None: try: await self._kafka_consumer.seek( TopicPartition(topic, partition), offset) except ValueError as err: self.logger.exception(f'{err.__str__()}') raise OffsetError except TypeError as err: self.logger.exception(f'{err.__str__()}') raise TopicPartitionError except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned self.logger.debug( f'Custom seek for topic : {topic}, partition : {partition}, offset : {offset}' ) else: raise KafkaConsumerError('Custom seek need 3 argv', 500)
async def seek_to_end(self, partition: int = None, topic: str = None) -> None: """ Seek to latest offset, mod 'latest' Args: partition (int): Partition value topic (str): Topic name Returns: None """ if not self._running: await self.start_consumer() if partition is not None and topic is not None: try: await self._kafka_consumer.seek_to_end( TopicPartition(topic, partition)) except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except TypeError as err: self.logger.exception(f'{err.__str__()}') raise TopicPartitionError self.logger.debug( f'Seek to end for topic : {topic}, partition : {partition}') else: try: await self._kafka_consumer.seek_to_end() except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned self.logger.debug(f'Seek to end for all topics & partitions')
async def consume_by_topic(websocket, topic): consumer = await get_consumer() await consumer.start() tp = TopicPartition(topic, 0) consumer.assign([tp]) current_offsets = await consumer.end_offsets([tp]) target_offset = current_offsets[tp] - 200 target_offset = 0 if target_offset < 0 else target_offset consumer.seek(tp, target_offset) try: counter = 0 while websocket.client_state == WebSocketState.CONNECTED: msg_dict = await consumer.getmany(tp) for _, msgs in msg_dict.items(): item_cnt = len(msgs) if item_cnt: logging.info(f"retrieved {item_cnt} items at once") for msg in msgs: await websocket.send_text( json.dumps({ "message": msg.value.decode(), "timestamp": msg.timestamp })) counter += 1 if counter % 10 == 0: await websocket.send_text("ping") await websocket.receive_text() except WebSocketDisconnect as e: logging.info("Exited", e) finally: await consumer.stop()
async def seek_to_offset(consumer: AIOKafkaConsumer, topic: str, start: int = -1): """ Seek to the last message in topic. """ partition_number, offset = -1, -1 # Loop through partitions and find the latest offset for p in consumer.partitions_for_topic(topic): tp = TopicPartition(topic, p) committed = await consumer.committed(tp) await consumer.seek_to_end(tp) last_offset = await consumer.position(tp) # print("topic: {} partition: {} committed: {} last: {}".format(topic, p, committed, last_offset)) if offset < last_offset: offset = last_offset partition_number = p tp = TopicPartition(topic, partition_number) consumer.seek(tp, offset - start)
async def _seek_offsets(self, consumer, topics, offsets): for topic in topics: parts = consumer.partitions_for_topic(topic) for part in parts: tp = TopicPartition(topic, part) max_offsets = await consumer.end_offsets([tp]) max_offsets = max_offsets[tp] if 0 > offsets or offsets > max_offsets: raise ValueError("offsets out of range of value") consumer.seek(tp, offsets)
async def fetch_report(self): '''Get a single report in a context, commit only if handled succesfully. ''' msg = await self._cons.getone() tp = TopicPartition(msg.topic, msg.partition) site = read_report(msg.value) print('Received', site, flush=True) yield site await self._cons.committed(tp)
async def _seek(self, topic, step): partition = self._consumer.partitions_for_topic(topic) if not partition: return pid = partition.pop() tp = TopicPartition(topic, pid) try: position = await self._consumer.position(tp) except IllegalStateError: position = 0 if position > 0: self._consumer.seek(tp, position + step)
async def _do_some_work(self, work, topics, group_id, offsets, listener, bootstrap_servers, enable_commit, **kwargs): consumer = AIOKafkaConsumer(loop=self.loop, bootstrap_servers=bootstrap_servers, group_id=group_id, fetch_max_wait_ms=self.fetch_max_wait_ms, max_partition_fetch_bytes=self.max_partition_fetch_bytes, request_timeout_ms=self.request_timeout_ms, auto_offset_reset=self.auto_offset_reset, enable_auto_commit=self.enable_auto_commit, auto_commit_interval_ms=self.auto_commit_interval_ms, check_crcs=self.check_crcs, metadata_max_age_ms=self.metadata_max_age_ms, heartbeat_interval_ms=self.heartbeat_interval_ms, session_timeout_ms=self.session_timeout_ms, exclude_internal_topics=self.exclude_internal_topics, connections_max_idle_ms=self.connections_max_idle_ms, **kwargs) consumer.subscribe(topics=topics, listener=listener) await consumer.start() if offsets is not None: await self._seek_offsets(consumer, topics, offsets) try: async for msg in consumer: try: if msg is None: continue await work(msg) if enable_commit: meta = "Some utf-8 metadata" tp = TopicPartition(msg.topic, msg.partition) offsets = {tp: OffsetAndMetadata(msg.offset + 1, meta)} await consumer.commit(offsets) except OffsetOutOfRangeError as err: tps = err.args[0].keys() await consumer.seek_to_beginning(*tps) continue except Exception as e: root_logger.error(f'{traceback.format_exc()}') continue except Exception as e: raise e finally: await consumer.stop()
async def _run(self): self.status = RedisStatus(prefix=self.stat_cfg.prefix, host=self.stat_cfg.host, pwd=self.stat_cfg.pwd, db=self.stat_cfg.db) await self.status.open() for part in self.queue_para.partitions: topic_partition = TopicPartition(self.topic, part) offset = await self.status.read(self.topic, partition=part) logging.info( f"thread:{self.name} {self.topic} read offset {offset}") self.partitions[topic_partition] = offset await self.status.close() # tasks = [asyncio.ensure_future(self._queue_client(partition)) for partition in self.partitions] tasks = [ asyncio.ensure_future(self._pull(self.partitions)), asyncio.ensure_future(self._detect()) ] await asyncio.gather(*tasks)
async def delete_from_local_store(self, key: str) -> RecordMetadata: """ Delete from local store Args: key (str): Object key as string Returns: None """ if self._local_store.is_initialized(): store_builder = StoreRecord(key=key, ctype='del', value=b'') try: record_metadata: RecordMetadata = await self._store_producer.send_and_await(store_builder, self._topic_store) await self._local_store.update_metadata_tp_offset(TopicPartition(record_metadata.topic, record_metadata.partition), record_metadata.offset) except (KeyErrorSendEvent, ValueErrorSendEvent, TypeErrorSendEvent, FailToSendEvent): raise FailToSendStoreRecord await self._local_store.delete(key) return record_metadata else: raise UninitializedStore
async def pull(loop, server, topic, group_id, batch_size=1, shuffle=False): client = AIOKafkaConsumer( topic, loop=loop, bootstrap_servers=server, group_id=group_id, auto_offset_reset='earliest', enable_auto_commit=False, ) await client.start() partitions = client.partitions_for_topic(topic) while partitions is None: await asyncio.sleep(0.1) partitions = list(partitions) partitions = [TopicPartition(topic, partition) for partition in partitions] #current_offsets = await client.beginning_offsets(partitions) end_offsets = await client.end_offsets(partitions) current_partition = 0 done = False async def next_partition(current_partition): current_partition += 1 # todo recursive if current_partition >= len(partitions): return None current_offset = await client.position(partitions[current_partition]) if current_offset >= end_offsets[partitions[current_partition]]: current_partition = await next_partition(current_partition) print("remaining record: {}, partition: {}".format( remaining_records, current_partition)) return current_partition current_offset = await client.position(partitions[current_partition]) if current_offset >= end_offsets[partitions[current_partition]]: done = True while done is False: remaining_records = batch_size batch = [] while remaining_records > 0: msg = await client.getone(partitions[current_partition]) batch.append(msg) remaining_records -= 1 current_offset = await client.position( partitions[current_partition]) if current_offset >= end_offsets[partitions[current_partition]]: current_partition = await next_partition(current_partition) print("remaining record: {}, partition: {}".format( remaining_records, current_partition)) if current_partition is None: done = True break if len(batch) > 0: yield (batch) ''' data = await client.getmany(max_records=batch_size) print(data) #for tp, messages in data.items(): messages = data[topic] if len(messages) > 0: batch = [] for msg in messages: batch.append(msg) yield(batch) else: done = True ''' await client.stop()
async def listen_event(self, mod: str = 'earliest') -> None: """ Listens events from assigned topic / partitions Args: mod: Start position of consumer (earliest, latest, committed) Returns: None """ if not self._running: await self.load_offsets(mod) self.pprint_consumer_offsets() # await self.getone() async for msg in self._kafka_consumer: # Debug Display self.logger.debug( "---------------------------------------------------------------------------------" ) self.logger.debug( f'New Message on consumer {self._client_id}, Topic {msg.topic}, ' f'Partition {msg.partition}, Offset {msg.offset}, Key {msg.key}, Value {msg.value},' f'Headers {msg.headers}') self.pprint_consumer_offsets() self.logger.debug( "---------------------------------------------------------------------------------" ) tp = TopicPartition(msg.topic, msg.partition) self.__current_offsets[tp] = msg.offset # self.last_offsets = await self.get_last_offsets() sleep_duration_in_ms = self._retry_interval for retries in range(0, self._max_retries): try: decode_dict = msg.value event_class = decode_dict['event_class'] handler_class = decode_dict['handler_class'] logging.debug( f'Event name : {event_class.event_name()} Event content :\n{event_class.__dict__}' ) # Calls handle if event is instance BaseHandler if isinstance(handler_class, BaseEventHandler): result = await handler_class.handle( event=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) elif isinstance(handler_class, BaseCommandHandler): result = await handler_class.execute( event=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) elif isinstance(handler_class, BaseResultHandler): result = await handler_class.on_result( event=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) else: # Otherwise raise KafkaConsumerUnknownHandler raise UnknownHandler # If result is none (no transactional process), check if consumer has an # group_id (mandatory to commit in Kafka) if result is None and self._group_id is not None: # Check if next commit was possible (Kafka offset) if self.__last_committed_offsets[tp] is None or \ self.__last_committed_offsets[tp] <= self.__current_offsets[tp]: self.logger.debug( f'Commit msg {event_class.event_name()} in topic {msg.topic} partition ' f'{msg.partition} offset {self.__current_offsets[tp] + 1}' ) await self._kafka_consumer.commit( {tp: self.__current_offsets[tp] + 1}) self.__last_committed_offsets[tp] = msg.offset + 1 # Transactional process no commit elif result == 'transaction': self.logger.debug(f'Transaction end') self.__current_offsets = await self.get_current_offsets( ) self.__last_committed_offsets = await self.get_last_committed_offsets( ) # Otherwise raise KafkaConsumerUnknownHandlerReturn else: raise UnknownHandlerReturn # Break if everything was successfully processed break except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except ValueError as err: self.logger.exception(f'{err.__str__()}') raise OffsetError except CommitFailedError as err: self.logger.exception(f'{err.__str__()}') raise err except (KafkaError, HandlerException) as err: self.logger.exception(f'{err.__str__()}') sleep_duration_in_s = int(sleep_duration_in_ms / 1000) await asyncio.sleep(sleep_duration_in_s) sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff if retries not in range(0, self._max_retries): await self.stop_consumer() logging.error(f'Max retries, close consumer and exit') exit(1)
async def listen_store_records(self, rebuild: bool = False) -> None: """ Listens events for store construction Args: rebuild (bool): if true consumer seek to fist offset for rebuild own state Returns: None """ if self._store_builder is None: raise KeyError self.logger.info('Start listen store records') await self.start_consumer() await self._store_builder.initialize_store_builder() if not self._running: raise KafkaConsumerError('Fail to start tongaConsumer', 500) # Check if store is ready self.check_if_store_is_ready() self.pprint_consumer_offsets() async for msg in self._kafka_consumer: # Debug Display self.logger.debug( "---------------------------------------------------------------------" ) self.logger.debug( f'New Message on store builder consumer {self._client_id}, Topic {msg.topic}, ' f'Partition {msg.partition}, Offset {msg.offset}, Key {msg.key}, Value {msg.value},' f'Headers {msg.headers}') self.pprint_consumer_offsets() self.logger.debug( "---------------------------------------------------------------------" ) # Check if store is ready self.check_if_store_is_ready() tp = TopicPartition(msg.topic, msg.partition) self.__current_offsets[tp] = msg.offset sleep_duration_in_ms = self._retry_interval for retries in range(0, self._max_retries): try: decode_dict = msg.value event_class: BaseModel = decode_dict['event_class'] handler_class: BaseStoreRecordHandler = decode_dict[ 'handler_class'] logging.debug( f'Store event name : {event_class.event_name()}\nEvent ' f'content :\n{event_class.__dict__}\n') result = None if msg.partition == self._store_builder.get_current_instance( ): # Calls local_state_handler if event is instance BaseStorageBuilder if rebuild and not self._store_builder.get_local_store( ).is_initialized(): if isinstance(event_class, BaseStoreRecord): result = await handler_class.local_store_handler( store_record=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) else: raise UnknownStoreRecordHandler elif msg.partition != self._store_builder.get_current_instance( ): if isinstance(event_class, BaseStoreRecord): result = await handler_class.global_store_handler( store_record=event_class, group_id=self._group_id, tp=tp, offset=msg.offset) else: raise UnknownStoreRecordHandler # If result is none (no transactional process), check if consumer has an # group_id (mandatory to commit in Kafka) # TODO Add commit store later V2 # if result is None and self._group_id is not None: # # Check if next commit was possible (Kafka offset) # if self.__last_committed_offsets[tp] is None or \ # self.__last_committed_offsets[tp] <= self.__current_offsets[tp]: # self.logger.debug(f'Commit msg {event_class.event_name()} in topic {msg.topic} partition ' # f'{msg.partition} offset {self.__current_offsets[tp] + 1}') # await self._kafka_consumer.commit({tp: self.__current_offsets[tp] + 1}) # self.__last_committed_offsets[tp] = msg.offset + 1 # # Otherwise raise ValueError # else: # raise ValueError # Break if everything was successfully processed break except IllegalStateError as err: self.logger.exception(f'{err.__str__()}') raise NoPartitionAssigned except ValueError as err: self.logger.exception(f'{err.__str__()}') raise OffsetError except CommitFailedError as err: self.logger.exception(f'{err.__str__()}') raise err except (KafkaError, HandlerException) as err: self.logger.exception(f'{err.__str__()}') sleep_duration_in_s = int(sleep_duration_in_ms / 1000) await asyncio.sleep(sleep_duration_in_s) sleep_duration_in_ms = sleep_duration_in_ms * self._retry_backoff_coeff if retries not in range(0, self._max_retries): await self.stop_consumer() logging.error(f'Max retries, close consumer and exit') exit(1)
async def kafka_consumer_prepare(brokers, topic: str, default_offspec="-0", parts_offspecs: dict = {}, **consumer_kw): """ :param brokers: bootstrap servers :param str topic: topic to consume from (... to assign with) :param dict parts_offspecs: (partition-id -> offset-spec), where offset-spec: "[+-]<integer>" :return: AIOKafkaConsumer """ if isinstance(brokers, str): brokers = brokers.split(",") brokers = list(brokers) assert brokers, "Empty list of kafka brokers" # connect & fetch metadata c = AIOKafkaConsumer(bootstrap_servers=",".join(brokers), enable_auto_commit=False, group_id=None, **consumer_kw) await c.start() all_topics = await c.topics() assert topic in all_topics, str((topic, all_topics)) topic_parts = c.partitions_for_topic(topic) # perform seeking for each partition # -- tp_ofssp = { TopicPartition(topic, _part_id): parts_offspecs.get(_part_id, default_offspec) for _part_id in topic_parts } c.assign(list(tp_ofssp)) from pprint import pprint pprint(tp_ofssp) pprint(parts_offspecs) await c.start( ) # NB: "restart" is necessary for the assignment to propagate # across all the aiokafka's (sic!) abstraction layers assert c.assignment().union(tp_ofssp) == tp_ofssp.keys() # -- tp_ranges = { tp: [beg, None] for tp, beg in (await c.beginning_offsets(list(tp_ofssp))).items() } for tp, end in (await c.end_offsets(list(tp_ofssp))).items(): tp_ranges[tp][-1] = end for tp, offspec in tp_ofssp.items(): beg, end = tp_ranges[tp] offs = None #:int # --- if isinstance(offspec, int): offs = int(offspec) elif isinstance(offspec, str): offs = int(offspec[1:] or offspec) if (len(offspec) > 1): if offspec[0] in ("+", " "): offs += beg elif offspec[0] == "-": offs = end - offs else: try: offs = int(offspec) # trivial case: string-encoded int except ValueError: raise ValueError( "Invalid partition offset specifier: " "unknown prefix '%s' (0x%.2x)" % (offspec[0], ord(offspec[0])), offspec) else: raise ValueError("Invalid partition offset specifier: " % offspec, offspec, type(offspec)) # --- if (offs < beg): offs = beg elif (offs > end): offs = end # --- assert isinstance(offs, int) c.seek(tp, offs) #print(">> seek(%s:%d, %9d)" % (tp.topic, tp.partition, offs)) # -- return c
def commit_offset(consumer: AIOKafkaConsumer, msg: ConsumerRecord): tp = TopicPartition(msg.topic, msg.partition) asyncio.create_task(consumer.commit({tp: msg.offset + 1}))
async def initialize_store_builder(self) -> None: """ Initializes store builder, connect local & global store with tonga consumer. This function seek to last committed offset if store_metadata exist. Returns: None """ # Initialize local store self._logger.info('Start initialize store builder') if isinstance(self._local_store, LocalStoreMemory): # If _local_store is an instance from LocalStoreMemory, auto seek to earliest position for rebuild self._logger.info('LocalStoreMemory seek to earliest') assigned_partitions = list() last_offsets = dict() assigned_partitions.append(TopicPartition(self._topic_store, self._current_instance)) last_offsets[TopicPartition(self._topic_store, self._current_instance)] = 0 await self._local_store.set_store_position(self._current_instance, self._nb_replica, assigned_partitions, last_offsets) try: await self._store_consumer.load_offsets('earliest') except (TopicPartitionError, NoPartitionAssigned) as err: self._logger.exception(f'{err.__str__()}') raise CanNotInitializeStore else: try: # Try to get local_store_metadata, seek at last read offset local_store_metadata = await self._local_store.get_metadata() except StoreKeyNotFound: # If metadata doesn't exist in DB, auto seek to earliest position for rebuild assigned_partitions = list() last_offsets = dict() assigned_partitions.append(TopicPartition(self._topic_store, self._current_instance)) last_offsets[TopicPartition(self._topic_store, self._current_instance)] = 0 await self._local_store.set_store_position(self._current_instance, self._nb_replica, assigned_partitions, last_offsets) try: await self._store_consumer.load_offsets('earliest') except (TopicPartitionError, NoPartitionAssigned) as err: self._logger.exception(f'{err.__str__()}') raise CanNotInitializeStore else: # If metadata is exist in DB, , auto seek to last position try: last_offset = local_store_metadata.last_offsets[ TopicPartition(self._topic_store, self._current_instance)] await self._store_consumer.seek_custom(self._topic_store, self._current_instance, last_offset) except (OffsetError, TopicPartitionError, NoPartitionAssigned) as err: self._logger.exception(f'{err.__str__()}') raise CanNotInitializeStore await self._local_store.set_store_position(self._current_instance, self._nb_replica, local_store_metadata.assigned_partitions, local_store_metadata.last_offsets) # Initialize global store if isinstance(self._global_store, GlobalStoreMemory): # If _global_store is an instance from GlobalStoreMemory, auto seek to earliest position for rebuild self._logger.info('GlobalStoreMemory seek to earliest') assigned_partitions = list() last_offsets = dict() for i in range(0, self._nb_replica): assigned_partitions.append(TopicPartition(self._topic_store, i)) for j in range(0, self._nb_replica): last_offsets[TopicPartition(self._topic_store, j)] = 0 await self._global_store.set_store_position(self._current_instance, self._nb_replica, assigned_partitions, last_offsets) try: await self._store_consumer.load_offsets('earliest') except (TopicPartitionError, NoPartitionAssigned) as err: self._logger.exception(f'{err.__str__()}') raise CanNotInitializeStore else: try: global_store_metadata = await self._global_store.get_metadata() except StoreKeyNotFound: # If metadata doesn't exist in DB assigned_partitions = list() last_offsets = dict() for i in range(0, self._nb_replica): assigned_partitions.append(TopicPartition(self._topic_store, self._current_instance)) for j in range(0, self._nb_replica): last_offsets[TopicPartition(self._topic_store, self._current_instance)] = 0 await self._global_store.set_store_position(self._current_instance, self._nb_replica, assigned_partitions, last_offsets) try: await self._store_consumer.load_offsets('earliest') except (TopicPartitionError, NoPartitionAssigned) as err: self._logger.exception(f'{err.__str__()}') raise CanNotInitializeStore else: # If metadata is exist in DB for tp, offset in global_store_metadata.last_offsets.items(): try: await self._store_consumer.seek_custom(tp.topic, tp.partition, offset) except (OffsetError, TopicPartitionError, NoPartitionAssigned) as err: self._logger.exception(f'{err.__str__()}') raise CanNotInitializeStore await self._global_store.set_store_position(self._current_instance, self._nb_replica, global_store_metadata.assigned_partitions, global_store_metadata.last_offsets)
def to_topics_partition(self) -> TopicPartition: return TopicPartition(topic=self._topic, partition=self._partition)
def assign(self, cluster: ClusterMetadata, members: Dict[str, ConsumerProtocolMemberMetadata]) \ -> Dict[str, ConsumerProtocolMemberAssignment]: """Assign function was call by aiokafka for assign consumer on right topic partition. Args: cluster (ClusterMetadata): Kafka-python cluster metadata (more detail in kafka-python documentation) members (Dict[str, ConsumerProtocolMemberMetadata]): members dict which contains ConsumerProtocolMemberMetadata (more detail in kafka-python documentation) Returns: Dict[str, ConsumerProtocolMemberAssignment]: dict which contain members and assignment protocol (more detail in kafka-python documentation) """ self.logger.info('Statefulset Partition Assignor') self.logger.debug('Cluster = %s\nMembers = %s', cluster, members) # Get all topic all_topics: Set = set() for key, metadata in members.items(): self.logger.debug('Key = %s\nMetadata = %s', key, metadata) all_topics.update(metadata.subscription) # Get all partitions by topic name all_topic_partitions = [] for topic in all_topics: partitions = cluster.partitions_for_topic(topic) if partitions is None: self.logger.warning('No partition metadata for topic %s', topic) continue for partition in partitions: all_topic_partitions.append(TopicPartition(topic, partition)) # Sort partition all_topic_partitions.sort() # Create default dict with lambda assignment: DefaultDict[str, Any] = collections.defaultdict(lambda: collections.defaultdict(list)) advanced_assignor_dict = self.get_advanced_assignor_dict(all_topic_partitions) for topic, partitions in advanced_assignor_dict.items(): for member_id, member_data in members.items(): # Loads member assignors data user_data = json.loads(member_data.user_data) # Get number of partitions by topic name topic_number_partitions = len(partitions) # Logic assignors if nb_replica as same as topic_numbers_partitions (used by StoreBuilder for # assign each partitions to right instance, in this case nb_replica is same as topic_number_partitions) if user_data['nb_replica'] == topic_number_partitions: if user_data['assignor_policy'] == 'all': for partition in partitions: assignment[member_id][topic].append(partition) elif user_data['assignor_policy'] == 'only_own': if user_data['instance'] in partitions: assignment[member_id][topic].append(partitions[user_data['instance']]) else: raise BadAssignorPolicy else: raise NotImplementedError self.logger.debug('Assignment = %s', assignment) protocol_assignment = {} for member_id in members: protocol_assignment[member_id] = ConsumerProtocolMemberAssignment(self.version, sorted(assignment[member_id].items()), members[member_id].user_data) self.logger.debug('Protocol Assignment = %s', protocol_assignment) return protocol_assignment