def test_generate_new_offsets(): offsets = [Offset('group_1', 'topic_1', 0, 100), Offset('group_1', 'topic_1', 1, 200), Offset('group_1', 'topic_1', 2, 500), Offset('group_1', 'topic_1', 3, 505)] partitions = [ Partition('topic_1', 0, [Message(1, 'key_1', 'val_1', 123456789, None), Message(99, 'key_2', 'val_2', 123456789, None), Message(101, 'key_3', 'val_3', 123456789, None), Message(199, 'key_4', 'val_4', 123456789, None), Message(499, 'key_5', 'val_5', 123456789, None),]), Partition('topic_1', 1, [Message(1, 'key_1', 'val_1', 123456789, None), Message(99, 'key_2', 'val_2', 123456789, None), Message(101, 'key_3', 'val_3', 123456789, None), Message(199, 'key_4', 'val_4', 123456789, None), Message(499, 'key_5', 'val_5', 123456789, None),]), Partition('topic_1', 2, [Message(1, 'key_1', 'val_1', 123456789, None), Message(99, 'key_2', 'val_2', 123456789, None), Message(101, 'key_3', 'val_3', 123456789, None), Message(199, 'key_4', 'val_4', 123456789, None), Message(499, 'key_5', 'val_5', 123456789, None),]), Partition('topic_1', 3, [Message(1, 'key_1', 'val_1', 123456789, None), Message(99, 'key_2', 'val_2', 123456789, None), Message(101, 'key_3', 'val_3', 123456789, None), Message(199, 'key_4', 'val_4', 123456789, None), Message(499, 'key_5', 'val_5', 123456789, None),]) ] new_offsets = generate_new_offsets(offsets, partitions) assert new_offsets == [Offset(consumer_group='group_1', topic='topic_1', partition=0, value=2), Offset(consumer_group='group_1', topic='topic_1', partition=1, value=4), Offset(consumer_group='group_1', topic='topic_1', partition=2, value=5), Offset(consumer_group='group_1', topic='topic_1', partition=3, value=5)]
def test_partition_from_file(tmpdir): partition = Partition( 'topic_name_1', 1, [Message(1, 'Key', 'Message_1', 123456789)]) file_path = os.path.join(tmpdir, 'topic_name_1_1.sqlite3') partition.to_file(file_path) from_file_partition = Partition.from_file(file_path) assert from_file_partition.topic == partition.topic assert from_file_partition.name == partition.name assert from_file_partition.messages == partition.messages
def backup(self): # Read topic messages from kafka broker if self.config.kafka_library == 'confluent': reader = ConfluentKafkaReader(self.config.brokers) else: reader = PythonKafkaReader(self.config.brokers) if self.config.ignore_missing_topics: logger.debug('Filter out topics that are not in Kafka broker') broker_topic_names = reader.list_topics() topics = [] for t in self.config.topics: if t not in broker_topic_names: logger.debug(f'Ignore topic {t} since it is ' 'missing in kafka broker') continue topics.append(t) else: topics = self.config.topics reader.subscribe(topics) msg_dict = reader.read(timeout=self.config.consumer_timeout) partitions = [ Partition(topic, partition_no, msgs) for (topic, partition_no), msgs in msg_dict.items() ] # Fetch consumer group offsets admin_client = ConfluentAdminClient(self.config.brokers) offsets = admin_client.get_consumer_offsets( topics, no_of_threads=self.config.threads) # Write topic messages and consumer offsets to disk data_flow_manager = DataFlowManager(self.config.data) data_flow_manager.write(offsets, partitions)
def test_partition_to_file(tmpdir): partition = Partition( 'topic_name_1', 1, [Message(1, 'Key', 'Message_1', 123456789, [('key', b'val_1')])]) file_path = os.path.join(tmpdir, 'topic_name_1_1.sqlite3') partition.to_file(file_path) conn = sqlite3.connect(file_path) cursor = conn.cursor() cursor.execute('SELECT * FROM metadata LIMIT 1') metadata = cursor.fetchone() assert metadata == ('topic_name_1', 1) cursor.execute('SELECT * FROM data') messages = cursor.fetchmany() msgs = [Message.from_row(*m) for m in messages] assert msgs assert len(msgs) == 1 assert msgs == [Message(1, 'Key', 'Message_1', 123456789, [('key', b'val_1')])] conn.close()
def test_data_flow_manager_write(tmpdir): data_flow_manager = DataFlowManager(tmpdir) offsets = [ Offset('group_1', 'topic_1', 0, 10), Offset('group_1', 'topic_2', 0, 1) ] partitions = [ Partition('topic_1', 0, [Message(0, 'key_1', 'val_1', 123456789, None)]), Partition('topic_1', 1, [Message(0, 'key_1', 'val_1', 123456789, None)]), Partition('topic_2', 0, [Message(0, 'key_1', 'val_1', 123456789, None)]), ] data_flow_manager.write(offsets, partitions) assert os.path.isdir(os.path.join(tmpdir, 'partitions')) assert len(os.listdir(os.path.join(tmpdir, 'partitions'))) == 3 assert os.path.isfile(os.path.join(tmpdir, 'offsets.sqlite3'))
def test_data_flow_manager_read(tmpdir): data_flow_manager = DataFlowManager(tmpdir) offsets = [ Offset('group_1', 'topic_1', 0, 10), Offset('group_1', 'topic_2', 0, 1) ] partitions = [ Partition('topic_1', 0, [Message(0, 'key_1', 'val_1', 123456789, None)]), Partition('topic_1', 1, [Message(0, 'key_1', 'val_1', 123456789, None)]), Partition('topic_2', 0, [Message(0, 'key_1', 'val_1', 123456789, None)]), ] data_flow_manager.write(offsets, partitions) from_disk_offsets, from_disk_partitions = data_flow_manager.read() assert from_disk_offsets == offsets assert from_disk_partitions == partitions
def read( self, topics: Optional[List[str]] = None ) -> Tuple[List[Offset], List[Partition]]: partitions = [] for file_name in os.listdir(self.partition_file_dir): if not file_name.endswith('sqlite3'): continue file_path = os.path.join(self.partition_file_dir, file_name) partition = Partition.from_file(file_path) logger.debug(f'Read {len(partition.messages)} for topic: ' f'{partition.topic} partition: {partition.name} ' 'from disk') # TODO: Consider using filename as filter or Write required # metadata about filename and topic/partition in one main sqlite3 # file. if topics and partition.topic not in topics: continue partitions.append(partition) offset_manager = OffsetManager.from_file(self.offset_file_path) if topics: offsets = [o for o in offset_manager.offsets if o.topic in topics] offsets = offset_manager.offsets return (offsets, partitions)
def test_partition(): assert Partition('topic_name', 1, [])
import os import sqlite3 import pytest from ksnap.message import Message from ksnap.partition import Partition from ksnap.offset import (Offset, OffsetManager, _calculate_new_offset, _generate_partition_dict, generate_new_offsets) PARTITIONS = Partition( 'topic_1', 1, [Message(0, 'key_1', 'val_1', 123456789, None), Message(99, 'key_2', 'val_2', 123456789, None), Message(100, 'key_3', 'val_3', 123456789, None), Message(200, 'key_4', 'val_4', 123456789, None), Message(499, 'key_5', 'val_5', 123456789, None),]) @pytest.mark.parametrize( 'curr_offset_val, partition, expected', [ (100, PARTITIONS, 2), (1, PARTITIONS, 1), (500, PARTITIONS, 5), (600, PARTITIONS, 5), ] ) def test_calculate_new_offset(curr_offset_val, partition, expected): new_offset = _calculate_new_offset(curr_offset_val, partition) assert new_offset == expected