def test_transaction_api(): """ Excercise the transactional API """ p = Producer({"transactional.id": "test"}) with pytest.raises(KafkaException) as ex: p.init_transactions(0.5) assert ex.value.args[0].code() == KafkaError._TIMED_OUT assert ex.value.args[0].retriable() is True assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False # Any subsequent APIs will fail since init did not succeed. with pytest.raises(KafkaException) as ex: p.begin_transaction() assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False consumer = Consumer({"group.id": "testgroup"}) group_metadata = consumer.consumer_group_metadata() consumer.close() with pytest.raises(KafkaException) as ex: p.send_offsets_to_transaction([TopicPartition("topic", 0, 123)], group_metadata) assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False with pytest.raises(KafkaException) as ex: p.commit_transaction(0.5) assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False with pytest.raises(KafkaException) as ex: p.abort_transaction(0.5) assert ex.value.args[0].code() == KafkaError._STATE assert ex.value.args[0].retriable() is False assert ex.value.args[0].fatal() is False assert ex.value.args[0].txn_requires_abort() is False
class KafkaProducerConfluent: """ Продюсер (Производитель). confluent_kafka """ """ Инициализация """ def __init__(self, hosts=None, configuration=None, use_tx=False, one_topic_name=None, auto_flush_size=0, flush_is_bad=False): """ :param configuration: https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md """ if configuration is None: self.configuration = { 'client.id': default_cfg.DEFAULT_CONNECTION_OPTION_ADMIN['client_id'], 'socket.timeout.ms': default_cfg.DEFAULT_BROKER_TIMEOUT_MS_OPERATIONS } if use_tx: self.configuration['transactional.id'] = str(uuid4()) else: self.configuration = configuration if hosts: self.configuration['bootstrap.servers'] = hosts else: if not self.configuration.get('bootstrap.servers'): self.configuration[ 'bootstrap.servers'] = GeneralConfig.KAFKA_URL self.use_tx = use_tx self.topic_part_itr = None self.topic_parts = None self.one_topic_name = one_topic_name if auto_flush_size: self.auto_flush = True else: self.auto_flush = False self.auto_flush_size = auto_flush_size self.auto_flush_itr = 0 self.flush_is_bad = flush_is_bad """ Контекст """ def __enter__(self): self.auto_flush_itr = 0 self.producer = Producer(self.configuration) self.update_partition_settings(name_topic=self.one_topic_name) if self.use_tx: try: self.producer.abort_transaction( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) except Exception: pass self.producer.init_transactions( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) self.producer.begin_transaction() return self def __exit__(self, exc_type, exc_val, exc_tb): """ После выхода :param exc_type: :param exc_val: :param exc_tb: :return: """ self.auto_flush_itr = 0 if self.use_tx: if exc_type: self.producer.abort_transaction() else: # flush вызывается под капотом commit_transaction self.producer.commit_transaction( default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC) else: self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) del self """ Вспомогательные операции """ def get_list_topics(self): """ Все топики :return: """ try: res = self.producer.list_topics().topics return res except Exception: return None def get_one_topic(self, name): """ Один топик по имени :param name: :return: """ try: res = self.producer.list_topics(topic=name).topics return res except Exception: return None def update_partition_settings(self, name_topic=None): """ Обновить настройки партиций всех топиков :param name_topic: - либо конкретного топика :return: """ if self.topic_parts is None: self.topic_part_itr = {} self.topic_parts = {} if name_topic is None: topics = self.get_list_topics() else: if self.topic_parts.get(name_topic) is not None: self.topic_parts.pop(name_topic) topics = self.get_one_topic(name_topic) for name, topic_obj in topics.items(): list_partitions = list(topic_obj.partitions) if len(list_partitions) <= 1: continue self.topic_parts[name] = list_partitions self.topic_part_itr[name] = 0 def put_data(self, key, value, topic=None, callback=None, partition=None, poll_time=0): """ Поместить данные в очередь на обработку для брокера сообщений Чтобы не думать об этом - дампим в строку джсона сразу. Имя топика и ключа - строго строкой :param key: - ключ сообщения. Сделать пустым если исползуется автопопил сообщений средствами кафки :param value: - значение сообщения :param topic: - имя топика - если не задано -то будет применяться имя основного топика self.one_topic_name :param partition: - раздел топика(число). если не указано - то балансировка нагрузки по разделам :param callback: func(err, msg): if err is not None... :return: """ dict_args = self._put_validation_and_transform(key=key, value=value, topic=topic, callback=callback, partition=partition) self._put_data_default(dict_args=dict_args, poll_time=poll_time) def _put_validation_and_transform(self, key, value, topic=None, callback=None, partition=None): """ Для разных алгоритмов вставки - формирует словарь аргументов вставки """ if topic is None and self.one_topic_name is None: raise AttributeError('NEED TOPIC NAME!') if topic is None: topic = self.one_topic_name dict_args = { 'topic': str(topic), 'value': jsd(value), } if key: dict_args['key']: str(key) if callback: dict_args['callback'] = callback if partition: # Прямое задание позиции dict_args['partition'] = partition else: # Смещение позиции равномерно top_name = dict_args['topic'] topic_parts = self.topic_parts.get(top_name) if topic_parts: current_position = self.topic_part_itr[top_name] if key: # Партиция нужна если есть ключ dict_args['partition'] = topic_parts[current_position] current_position += 1 if current_position >= len(topic_parts): current_position = 0 self.topic_part_itr[top_name] = current_position return dict_args def _put_data_default(self, dict_args, poll_time=0): """ Первоначальный замысел вставки с доработками """ if self.auto_flush: # Авто-ожидание приёма буфера сообщений - третья версия self.producer.produce(**dict_args) self.producer.poll(poll_time) self.auto_flush_itr = self.auto_flush_itr + 1 if self.auto_flush_itr >= self.auto_flush_size: self.auto_flush_itr = 0 self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: if self.flush_is_bad: # Вторая версия алгоритма - флушить по факту try: self.producer.produce(**dict_args) self.producer.poll(poll_time) except BufferError: # Дожидаемся когда кафка разгребёт очередь self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: # Первая версия self.producer.produce(**dict_args) self.producer.poll(poll_time) def put_data_direct(self, key, value, topic=None, callback=None, partition=None): """ Прямая вставка с преобразованием данных. Метод poll не используется """ dict_args = self._put_validation_and_transform(key=key, value=value, topic=topic, callback=callback, partition=partition) if self.auto_flush: self.producer.produce(**dict_args) self.auto_flush_itr = self.auto_flush_itr + 1 if self.auto_flush_itr >= self.auto_flush_size: self.auto_flush_itr = 0 self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: if self.flush_is_bad: try: self.producer.produce(**dict_args) except BufferError: # Дожидаемся когда кафка разгребёт очередь self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC) else: self.producer.produce(**dict_args)
# # from previous produce() calls. # producer.poll(0) with open('bcsample.json') as f: # return JSON object as a dictionary bcsample_data = json.load(f) for bc_data in bcsample_data: # record_key = "breadcrumb" # Choose a random number between 1 and 5 for each record’s key record_key = str(random.randint(1, 5)) record_value = json.dumps(bc_data) print("Producing record key: {}".format(record_key)) producer.begin_transaction() producer.produce(topic, key=record_key, value=record_value, on_delivery=acked) # p.poll() serves delivery reports (on_delivery) # from previous produce() calls. producer.poll(2000) # CShoose True/False randomly with equal probability if decision(0.5): print("Commiting record key: {}".format(record_key)) producer.commit_transaction() else: producer.abort_transaction() producer.flush() print("{} messages were produced to topic {}!".format(delivered_records, topic))
class KafkaProducerChannel(Channel): """ Represents kafka producer channel for communication. """ def __init__(self, **kwargs): Channel.__init__(self) self._hosts = kwargs.get("hosts") self._client_id = kwargs.get("client_id") self._retry_counter = kwargs.get("retry_counter", 5) self._topic = None self._channel = None def get_topic(self): return self._topic def set_topic(self, topic): if topic: self._topic = topic def init(self): """ Initialize the object usinf configuration params passed. Establish connection with Kafka broker. """ self._channel = None retry_count = 0 try: while self._channel is None and int( self._retry_counter) > retry_count: self.connect() if self._channel is None: Log.warn(f"message bus producer connection Failed. Retry Attempt: {retry_count+1}" \ f" in {2**retry_count} seconds") time.sleep(2**retry_count) retry_count += 1 else: Log.debug(f"message bus producer connection is Initialized."\ f"Attempts:{retry_count+1}") except Exception as ex: Log.error(f"message bus producer initialization failed. {ex}") raise ConnectionEstError( f"Unable to connect to message bus broker. {ex}") def connect(self): """ Initiate the connection with Kafka broker and open the necessary communication channel. """ try: conf = { 'bootstrap.servers': str(self._hosts), 'request.required.acks': 'all', 'max.in.flight.requests.per.connection': 1, 'client.id': self._client_id, 'transactional.id': uuid.uuid4(), 'enable.idempotence': True } self._channel = Producer(conf) self._channel.init_transactions() except Exception as ex: Log.error(f"Unable to connect to message bus broker. {ex}") raise ConnectionEstError( f"Unable to connect to message bus broker. {ex}") @classmethod def disconnect(self): raise Exception('recv not implemented for Kafka producer Channel') @classmethod def recv(self, message=None): raise Exception('recv not implemented for Kafka producer Channel') def channel(self): return self._channel def send(self, message): """ Publish the message to kafka broker topic. """ try: if self._channel is not None: self._channel.begin_transaction() self._channel.produce(self._topic, message) self._channel.commit_transaction() Log.info(f"Message Published to Topic: {self._topic},"\ f"Msg Details: {message}") except KafkaException as e: if e.args[0].retriable(): """Retriable error, try again""" self.send(message) elif e.args[0].txn_requires_abort(): """ Abort current transaction, begin a new transaction, and rewind the consumer to start over. """ self._channel.abort_transaction() self.send(message) #TODO #rewind_consumer_offsets...() else: """Treat all other errors as fatal""" Log.error( f"Failed to publish message to topic : {self._topic}. {e}") raise SendError( f"Unable to send message to message bus broker. {e}") @classmethod def recv_file(self, remote_file, local_file): raise Exception('recv_file not implemented for Kafka producer Channel') @classmethod def send_file(self, local_file, remote_file): raise Exception('send_file not implemented for Kafka producer Channel') @classmethod def acknowledge(self, delivery_tag=None): raise Exception('send_file not implemented for Kafka producer Channel')