def test_transaction_api(): """ Excercise the transactional API """ p = Producer({"transactional.id": "test"}) with pytest.raises(KafkaException) as ex: p.init_transactions(0.5) assert ex.value.args[0].code() == KafkaError._TIMED_OUT assert ex.value.args[0].retriable() is True assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False # Any subsequent APIs will fail since init did not succeed. with pytest.raises(KafkaException) as ex: p.begin_transaction() assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False consumer = Consumer({"group.id": "testgroup"}) group_metadata = consumer.consumer_group_metadata() consumer.close() with pytest.raises(KafkaException) as ex: p.send_offsets_to_transaction([TopicPartition("topic", 0, 123)], group_metadata) assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False with pytest.raises(KafkaException) as ex: p.commit_transaction(0.5) assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False with pytest.raises(KafkaException) as ex: p.abort_transaction(0.5) assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False
def main(args): brokers = args.brokers group_id = args.group_id input_topic = args.input_topic input_partition = args.input_partition output_topic = args.output_topic consumer = Consumer({ 'bootstrap.servers': brokers, 'group.id': group_id, 'auto.offset.reset': 'earliest', # Do not advance committed offsets outside of the transaction. # Consumer offsets are committed along with the transaction # using the producer's send_offsets_to_transaction() API. 'enable.auto.commit': False, 'enable.partition.eof': True, }) # Prior to KIP-447 being supported each input partition requires # its own transactional producer, so in this example we use # assign() to a single partition rather than subscribe(). # A more complex alternative is to dynamically create a producer per # partition in subscribe's rebalance callback. consumer.assign([TopicPartition(input_topic, input_partition)]) producer = Producer({ 'bootstrap.servers': brokers, 'transactional.id': 'eos-transactions.py' }) # Initialize producer transaction. producer.init_transactions() # Start producer transaction. producer.begin_transaction() eof = {} msg_cnt = 0 print("=== Starting Consume-Transform-Process loop ===") while True: # serve delivery reports from previous produce()s producer.poll(0) # read message from input_topic msg = consumer.poll(timeout=1.0) if msg is None: continue topic, partition = msg.topic(), msg.partition() if msg.error(): if msg.error().code() == KafkaError._PARTITION_EOF: eof[(topic, partition)] = True print("=== Reached the end of {} [{}] at {}====".format( topic, partition, msg.offset())) if len(eof) == len(consumer.assignment()): print("=== Reached end of input ===") break continue # clear EOF if a new message has been received eof.pop((topic, partition), None) msg_cnt += 1 # process message processed_key, processed_value = process_input(msg) # produce transformed message to output topic producer.produce(output_topic, processed_value, processed_key, on_delivery=delivery_report) if msg_cnt % 100 == 0: print( "=== Committing transaction with {} messages at input offset {} ===" .format(msg_cnt, msg.offset())) # Send the consumer's position to transaction to commit # them along with the transaction, committing both # input and outputs in the same transaction is what provides EOS. producer.send_offsets_to_transaction( consumer.position(consumer.assignment()), consumer.consumer_group_metadata()) # Commit the transaction producer.commit_transaction() # Begin new transaction producer.begin_transaction() msg_cnt = 0 print("=== Committing final transaction with {} messages ===".format( msg_cnt)) # commit processed message offsets to the transaction producer.send_offsets_to_transaction( consumer.position(consumer.assignment()), consumer.consumer_group_metadata()) # commit transaction producer.commit_transaction() consumer.close()
'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, 'group.id': f"TS-joiner_{__name__}_2", 'auto.offset.reset': 'earliest', 'enable.auto.commit': False, 'enable.auto.offset.store': False }) kafka_consumer.subscribe([KAFKA_TOPIC_IN_1, KAFKA_TOPIC_IN_2]) kafka_consumer.assign([TopicPartition(KAFKA_TOPIC_IN_1), TopicPartition(KAFKA_TOPIC_IN_2)]) # Create a Kafka producer kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS, "transactional.id": 'eos-transactions.py'}) # Initialize producer transaction. kafka_producer.init_transactions() # Start producer transaction. kafka_producer.begin_transaction() def delivery_report(err, msg): """ Delivery callback for Kafka Produce. Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: if VERBOSE: # get the sent message using msg.value() print(f"Message '{msg.key().decode('utf-8')}' \tdelivered to topic '{msg.topic()}' [{msg.partition()}].") # define customized function for join def join_fct(record_left, record_right):
return account_event def create_price_event(): price = random.choice(list(range(1, 100))) price_event = { "id": f"{uuid.uuid4()}", "newPrice": price, "stock": random.choice(["StockA", "StockB"]), } return price_event def get_account_number(): return str(random.choice(list(range(1000, 1003)))) for i in range(1): event = create_order_event() message = json.dumps(event) print(f'Sending event {message}') producer.begin_transaction() producer.produce('incoming-orders', f'{message}'.encode('utf-8'), event['transactionId'].encode("utf-8")) producer.flush() producer.commit_transaction() time.sleep(0.5)
class KafkaProducerConfluent: """ Продюсер (Производитель). confluent_kafka """ """ Инициализация """ def __init__(self, hosts=None, configuration=None, use_tx=False, one_topic_name=None, auto_flush_size=0, flush_is_bad=False): """ :param configuration: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ if configuration is None: self.configuration = { 'client.id': default_cfg.DEFAULT_CONNECTION_OPTION_ADMIN['client_id'], 'socket.timeout.ms': default_cfg.DEFAULT_BROKER_TIMEOUT_MS_OPERATIONS } if use_tx: self.configuration['transactional.id'] = str(uuid4()) else: self.configuration = configuration if hosts: self.configuration['bootstrap.servers'] = hosts else: if not self.configuration.get('bootstrap.servers'): self.configuration[ 'bootstrap.servers'] = GeneralConfig.KAFKA_URL self.use_tx = use_tx self.topic_part_itr = None self.topic_parts = None self.one_topic_name = one_topic_name if auto_flush_size: self.auto_flush = True else: self.auto_flush = False self.auto_flush_size = auto_flush_size self.auto_flush_itr = 0 self.flush_is_bad = flush_is_bad """ Контекст """ def __enter__(self): self.auto_flush_itr = 0 self.producer = Producer(self.configuration) self.update_partition_settings(name_topic=self.one_topic_name) if self.use_tx: try: self.producer.abort_transaction( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) except Exception: pass self.producer.init_transactions( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) self.producer.begin_transaction() return self def __exit__(self, exc_type, exc_val, exc_tb): """ После выхода :param exc_type: :param exc_val: :param exc_tb: :return: """ self.auto_flush_itr = 0 if self.use_tx: if exc_type: self.producer.abort_transaction() else: # flush вызывается под капотом commit_transaction self.producer.commit_transaction( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) else: self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) del self """ Вспомогательные операции """ def get_list_topics(self): """ Все топики :return: """ try: res = self.producer.list_topics().topics return res except Exception: return None def get_one_topic(self, name): """ Один топик по имени :param name: :return: """ try: res = self.producer.list_topics(topic=name).topics return res except Exception: return None def update_partition_settings(self, name_topic=None): """ Обновить настройки партиций всех топиков :param name_topic: - либо конкретного топика :return: """ if self.topic_parts is None: self.topic_part_itr = {} self.topic_parts = {} if name_topic is None: topics = self.get_list_topics() else: if self.topic_parts.get(name_topic) is not None: self.topic_parts.pop(name_topic) topics = self.get_one_topic(name_topic) for name, topic_obj in topics.items(): list_partitions = list(topic_obj.partitions) if len(list_partitions) <= 1: continue self.topic_parts[name] = list_partitions self.topic_part_itr[name] = 0 def put_data(self, key, value, topic=None, callback=None, partition=None, poll_time=0): """ Поместить данные в очередь на обработку для брокера сообщений Чтобы не думать об этом - дампим в строку джсона сразу. Имя топика и ключа - строго строкой :param key: - ключ сообщения. Сделать пустым если исползуется автопопил сообщений средствами кафки :param value: - значение сообщения :param topic: - имя топика - если не задано -то будет применяться имя основного топика self.one_topic_name :param partition: - раздел топика(число). если не указано - то балансировка нагрузки по разделам :param callback: func(err, msg): if err is not None... :return: """ dict_args = self._put_validation_and_transform(key=key, value=value, topic=topic, callback=callback, partition=partition) self._put_data_default(dict_args=dict_args, poll_time=poll_time) def _put_validation_and_transform(self, key, value, topic=None, callback=None, partition=None): """ Для разных алгоритмов вставки - формирует словарь аргументов вставки """ if topic is None and self.one_topic_name is None: raise AttributeError('NEED TOPIC NAME!') if topic is None: topic = self.one_topic_name dict_args = { 'topic': str(topic), 'value': jsd(value), } if key: dict_args['key']: str(key) if callback: dict_args['callback'] = callback if partition: # Прямое задание позиции dict_args['partition'] = partition else: # Смещение позиции равномерно top_name = dict_args['topic'] topic_parts = self.topic_parts.get(top_name) if topic_parts: current_position = self.topic_part_itr[top_name] if key: # Партиция нужна если есть ключ dict_args['partition'] = topic_parts[current_position] current_position += 1 if current_position >= len(topic_parts): current_position = 0 self.topic_part_itr[top_name] = current_position return dict_args def _put_data_default(self, dict_args, poll_time=0): """ Первоначальный замысел вставки с доработками """ if self.auto_flush: # Авто-ожидание приёма буфера сообщений - третья версия self.producer.produce(**dict_args) self.producer.poll(poll_time) self.auto_flush_itr = self.auto_flush_itr + 1 if self.auto_flush_itr >= self.auto_flush_size: self.auto_flush_itr = 0 self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: if self.flush_is_bad: # Вторая версия алгоритма - флушить по факту try: self.producer.produce(**dict_args) self.producer.poll(poll_time) except BufferError: # Дожидаемся когда кафка разгребёт очередь self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: # Первая версия self.producer.produce(**dict_args) self.producer.poll(poll_time) def put_data_direct(self, key, value, topic=None, callback=None, partition=None): """ Прямая вставка с преобразованием данных. Метод poll не используется """ dict_args = self._put_validation_and_transform(key=key, value=value, topic=topic, callback=callback, partition=partition) if self.auto_flush: self.producer.produce(**dict_args) self.auto_flush_itr = self.auto_flush_itr + 1 if self.auto_flush_itr >= self.auto_flush_size: self.auto_flush_itr = 0 self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: if self.flush_is_bad: try: self.producer.produce(**dict_args) except BufferError: # Дожидаемся когда кафка разгребёт очередь self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: self.producer.produce(**dict_args)
def delivery_report(err, msg): """ Called once for each message produced to indicate delivery result. Triggered by poll() or flush(). """ if err is not None: print('Message delivery failed: {}'.format(err)) else: print('Message delivered to {} [{}]'.format(msg.topic(), msg.partition())) p.init_transactions() starttime = datetime.datetime.now() for loop in range(65536): p.begin_transaction() p.produce(topicfile, lines[0]) p.commit_transaction() endtime = datetime.datetime.now() # time.sleep(5) # for loop in range(1000): # p.begin_transaction() # p.produce(topicfile, str(datetime.datetime.timestamp(datetime.datetime.now()))) # p.commit_transaction() # endtime = datetime.datetime.now() f = open('./Performance/Throughput', 'a') f.write('Time for 64k msgs = ' + str(endtime - starttime) + '\n') f.close()
class KafkaProducerChannel(Channel): """ Represents kafka producer channel for communication. """ def __init__(self, **kwargs): Channel.__init__(self) self._hosts = kwargs.get("hosts") self._client_id = kwargs.get("client_id") self._retry_counter = kwargs.get("retry_counter", 5) self._topic = None self._channel = None def get_topic(self): return self._topic def set_topic(self, topic): if topic: self._topic = topic def init(self): """ Initialize the object usinf configuration params passed. Establish connection with Kafka broker. """ self._channel = None retry_count = 0 try: while self._channel is None and int( self._retry_counter) > retry_count: self.connect() if self._channel is None: Log.warn(f"message bus producer connection Failed. Retry Attempt: {retry_count+1}" \ f" in {2**retry_count} seconds") time.sleep(2**retry_count) retry_count += 1 else: Log.debug(f"message bus producer connection is Initialized."\ f"Attempts:{retry_count+1}") except Exception as ex: Log.error(f"message bus producer initialization failed. {ex}") raise ConnectionEstError( f"Unable to connect to message bus broker. {ex}") def connect(self): """ Initiate the connection with Kafka broker and open the necessary communication channel. """ try: conf = { 'bootstrap.servers': str(self._hosts), 'request.required.acks': 'all', 'max.in.flight.requests.per.connection': 1, 'client.id': self._client_id, 'transactional.id': uuid.uuid4(), 'enable.idempotence': True } self._channel = Producer(conf) self._channel.init_transactions() except Exception as ex: Log.error(f"Unable to connect to message bus broker. {ex}") raise ConnectionEstError( f"Unable to connect to message bus broker. {ex}") @classmethod def disconnect(self): raise Exception('recv not implemented for Kafka producer Channel') @classmethod def recv(self, message=None): raise Exception('recv not implemented for Kafka producer Channel') def channel(self): return self._channel def send(self, message): """ Publish the message to kafka broker topic. """ try: if self._channel is not None: self._channel.begin_transaction() self._channel.produce(self._topic, message) self._channel.commit_transaction() Log.info(f"Message Published to Topic: {self._topic},"\ f"Msg Details: {message}") except KafkaException as e: if e.args[0].retriable(): """Retriable error, try again""" self.send(message) elif e.args[0].txn_requires_abort(): """ Abort current transaction, begin a new transaction, and rewind the consumer to start over. """ self._channel.abort_transaction() self.send(message) #TODO #rewind_consumer_offsets...() else: """Treat all other errors as fatal""" Log.error( f"Failed to publish message to topic : {self._topic}. {e}") raise SendError( f"Unable to send message to message bus broker. {e}") @classmethod def recv_file(self, remote_file, local_file): raise Exception('recv_file not implemented for Kafka producer Channel') @classmethod def send_file(self, local_file, remote_file): raise Exception('send_file not implemented for Kafka producer Channel') @classmethod def acknowledge(self, delivery_tag=None): raise Exception('send_file not implemented for Kafka producer Channel')