Пример #1
0
def test_transaction_api():
    """ Excercise the transactional API """
    p = Producer({"transactional.id": "test"})

    with pytest.raises(KafkaException) as ex:
        p.init_transactions(0.5)
    assert ex.value.args[0].code() == KafkaError._TIMED_OUT
    assert ex.value.args[0].retriable() is True
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    # Any subsequent APIs will fail since init did not succeed.
    with pytest.raises(KafkaException) as ex:
        p.begin_transaction()
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    consumer = Consumer({"group.id": "testgroup"})
    group_metadata = consumer.consumer_group_metadata()
    consumer.close()

    with pytest.raises(KafkaException) as ex:
        p.send_offsets_to_transaction([TopicPartition("topic", 0, 123)],
                                      group_metadata)
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    with pytest.raises(KafkaException) as ex:
        p.commit_transaction(0.5)
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False

    with pytest.raises(KafkaException) as ex:
        p.abort_transaction(0.5)
    assert ex.value.args[0].code() == KafkaError._STATE
    assert ex.value.args[0].retriable() is False
    assert ex.value.args[0].fatal() is False
    assert ex.value.args[0].txn_requires_abort() is False
Пример #2
0
def main(args):
    brokers = args.brokers
    group_id = args.group_id
    input_topic = args.input_topic
    input_partition = args.input_partition
    output_topic = args.output_topic

    consumer = Consumer({
        'bootstrap.servers': brokers,
        'group.id': group_id,
        'auto.offset.reset': 'earliest',
        # Do not advance committed offsets outside of the transaction.
        # Consumer offsets are committed along with the transaction
        # using the producer's send_offsets_to_transaction() API.
        'enable.auto.commit': False,
        'enable.partition.eof': True,
    })

    # Prior to KIP-447 being supported each input partition requires
    # its own transactional producer, so in this example we use
    # assign() to a single partition rather than subscribe().
    # A more complex alternative is to dynamically create a producer per
    # partition in subscribe's rebalance callback.
    consumer.assign([TopicPartition(input_topic, input_partition)])

    producer = Producer({
        'bootstrap.servers': brokers,
        'transactional.id': 'eos-transactions.py'
    })

    # Initialize producer transaction.
    producer.init_transactions()
    # Start producer transaction.
    producer.begin_transaction()

    eof = {}
    msg_cnt = 0
    print("=== Starting Consume-Transform-Process loop ===")
    while True:
        # serve delivery reports from previous produce()s
        producer.poll(0)

        # read message from input_topic
        msg = consumer.poll(timeout=1.0)
        if msg is None:
            continue

        topic, partition = msg.topic(), msg.partition()
        if msg.error():
            if msg.error().code() == KafkaError._PARTITION_EOF:
                eof[(topic, partition)] = True
                print("=== Reached the end of {} [{}] at {}====".format(
                    topic, partition, msg.offset()))

                if len(eof) == len(consumer.assignment()):
                    print("=== Reached end of input ===")
                    break
            continue
        # clear EOF if a new message has been received
        eof.pop((topic, partition), None)

        msg_cnt += 1

        # process message
        processed_key, processed_value = process_input(msg)

        # produce transformed message to output topic
        producer.produce(output_topic,
                         processed_value,
                         processed_key,
                         on_delivery=delivery_report)

        if msg_cnt % 100 == 0:
            print(
                "=== Committing transaction with {} messages at input offset {} ==="
                .format(msg_cnt, msg.offset()))
            # Send the consumer's position to transaction to commit
            # them along with the transaction, committing both
            # input and outputs in the same transaction is what provides EOS.
            producer.send_offsets_to_transaction(
                consumer.position(consumer.assignment()),
                consumer.consumer_group_metadata())

            # Commit the transaction
            producer.commit_transaction()

            # Begin new transaction
            producer.begin_transaction()
            msg_cnt = 0

    print("=== Committing final transaction with {} messages ===".format(
        msg_cnt))
    # commit processed message offsets to the transaction
    producer.send_offsets_to_transaction(
        consumer.position(consumer.assignment()),
        consumer.consumer_group_metadata())

    # commit transaction
    producer.commit_transaction()

    consumer.close()
Пример #3
0
    'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS,
    'group.id': f"TS-joiner_{__name__}_2",
    'auto.offset.reset': 'earliest',
    'enable.auto.commit': False,
    'enable.auto.offset.store': False
})
kafka_consumer.subscribe([KAFKA_TOPIC_IN_1, KAFKA_TOPIC_IN_2])
kafka_consumer.assign([TopicPartition(KAFKA_TOPIC_IN_1), TopicPartition(KAFKA_TOPIC_IN_2)])

# Create a Kafka producer
kafka_producer = Producer({'bootstrap.servers': KAFKA_BOOTSTRAP_SERVERS,
                           "transactional.id": 'eos-transactions.py'})
# Initialize producer transaction.
kafka_producer.init_transactions()
# Start producer transaction.
kafka_producer.begin_transaction()


def delivery_report(err, msg):
    """ Delivery callback for Kafka Produce. Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        if VERBOSE:
            # get the sent message using msg.value()
            print(f"Message '{msg.key().decode('utf-8')}'  \tdelivered to topic '{msg.topic()}' [{msg.partition()}].")


# define customized function for join
def join_fct(record_left, record_right):
Пример #4
0
    return account_event


def create_price_event():
    price = random.choice(list(range(1, 100)))
    price_event = {
        "id": f"{uuid.uuid4()}",
        "newPrice": price,
        "stock": random.choice(["StockA", "StockB"]),
    }
    return price_event


def get_account_number():
    return str(random.choice(list(range(1000, 1003))))


for i in range(1):
    event = create_order_event()

    message = json.dumps(event)
    print(f'Sending event {message}')

    producer.begin_transaction()
    producer.produce('incoming-orders', f'{message}'.encode('utf-8'), event['transactionId'].encode("utf-8"))
    producer.flush()

    producer.commit_transaction()

    time.sleep(0.5)
Пример #5
0
class KafkaProducerConfluent:
    """
    Продюсер (Производитель). confluent_kafka
    """
    """
    Инициализация
    """
    def __init__(self,
                 hosts=None,
                 configuration=None,
                 use_tx=False,
                 one_topic_name=None,
                 auto_flush_size=0,
                 flush_is_bad=False):
        """

        :param configuration:
        https://github.com/edenhill/librdkafka/blob/master/CONFIGURATION.md
        """

        if configuration is None:
            self.configuration = {
                'client.id':
                default_cfg.DEFAULT_CONNECTION_OPTION_ADMIN['client_id'],
                'socket.timeout.ms':
                default_cfg.DEFAULT_BROKER_TIMEOUT_MS_OPERATIONS
            }

            if use_tx:
                self.configuration['transactional.id'] = str(uuid4())
        else:
            self.configuration = configuration

        if hosts:
            self.configuration['bootstrap.servers'] = hosts
        else:
            if not self.configuration.get('bootstrap.servers'):
                self.configuration[
                    'bootstrap.servers'] = GeneralConfig.KAFKA_URL

        self.use_tx = use_tx
        self.topic_part_itr = None
        self.topic_parts = None
        self.one_topic_name = one_topic_name

        if auto_flush_size:
            self.auto_flush = True
        else:
            self.auto_flush = False

        self.auto_flush_size = auto_flush_size
        self.auto_flush_itr = 0
        self.flush_is_bad = flush_is_bad

    """
    Контекст
    """

    def __enter__(self):

        self.auto_flush_itr = 0
        self.producer = Producer(self.configuration)
        self.update_partition_settings(name_topic=self.one_topic_name)

        if self.use_tx:
            try:
                self.producer.abort_transaction(
                    default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
            except Exception:
                pass

            self.producer.init_transactions(
                default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
            self.producer.begin_transaction()

        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """
        После выхода

        :param exc_type:
        :param exc_val:
        :param exc_tb:
        :return:
        """

        self.auto_flush_itr = 0
        if self.use_tx:
            if exc_type:
                self.producer.abort_transaction()
            else:
                # flush вызывается под капотом commit_transaction
                self.producer.commit_transaction(
                    default_cfg.DEFAULT_TRANSACTION_TIMEOUT_SEC)
        else:
            self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)

        del self

    """
    Вспомогательные операции
    """

    def get_list_topics(self):
        """
        Все топики
        :return:
        """
        try:
            res = self.producer.list_topics().topics
            return res
        except Exception:
            return None

    def get_one_topic(self, name):
        """
        Один топик по имени
        :param name:
        :return:
        """
        try:
            res = self.producer.list_topics(topic=name).topics
            return res
        except Exception:
            return None

    def update_partition_settings(self, name_topic=None):
        """
        Обновить настройки партиций всех топиков

        :param name_topic: - либо конкретного топика
        :return:
        """

        if self.topic_parts is None:
            self.topic_part_itr = {}
            self.topic_parts = {}

        if name_topic is None:
            topics = self.get_list_topics()
        else:
            if self.topic_parts.get(name_topic) is not None:
                self.topic_parts.pop(name_topic)

            topics = self.get_one_topic(name_topic)

        for name, topic_obj in topics.items():
            list_partitions = list(topic_obj.partitions)
            if len(list_partitions) <= 1:
                continue

            self.topic_parts[name] = list_partitions
            self.topic_part_itr[name] = 0

    def put_data(self,
                 key,
                 value,
                 topic=None,
                 callback=None,
                 partition=None,
                 poll_time=0):
        """
        Поместить данные в очередь на обработку для брокера сообщений
        Чтобы не думать об этом - дампим в строку джсона сразу. Имя топика и ключа - строго строкой

        :param key: - ключ сообщения. Сделать пустым если исползуется автопопил сообщений средствами кафки
        :param value: - значение сообщения

        :param topic: - имя топика - если не задано -то будет применяться имя основного топика self.one_topic_name
        :param partition: - раздел топика(число). если не указано - то балансировка нагрузки по разделам

        :param callback: func(err, msg): if err is not None...
        :return:
        """

        dict_args = self._put_validation_and_transform(key=key,
                                                       value=value,
                                                       topic=topic,
                                                       callback=callback,
                                                       partition=partition)

        self._put_data_default(dict_args=dict_args, poll_time=poll_time)

    def _put_validation_and_transform(self,
                                      key,
                                      value,
                                      topic=None,
                                      callback=None,
                                      partition=None):
        """
        Для разных алгоритмов вставки - формирует словарь аргументов вставки
        """

        if topic is None and self.one_topic_name is None:
            raise AttributeError('NEED TOPIC NAME!')

        if topic is None:
            topic = self.one_topic_name

        dict_args = {
            'topic': str(topic),
            'value': jsd(value),
        }

        if key:
            dict_args['key']: str(key)

        if callback:
            dict_args['callback'] = callback

        if partition:
            # Прямое задание позиции

            dict_args['partition'] = partition
        else:
            # Смещение позиции равномерно

            top_name = dict_args['topic']
            topic_parts = self.topic_parts.get(top_name)
            if topic_parts:

                current_position = self.topic_part_itr[top_name]

                if key:
                    # Партиция нужна если есть ключ
                    dict_args['partition'] = topic_parts[current_position]

                current_position += 1
                if current_position >= len(topic_parts):
                    current_position = 0

                self.topic_part_itr[top_name] = current_position

        return dict_args

    def _put_data_default(self, dict_args, poll_time=0):
        """
        Первоначальный замысел вставки с доработками
        """

        if self.auto_flush:
            # Авто-ожидание приёма буфера сообщений - третья версия

            self.producer.produce(**dict_args)
            self.producer.poll(poll_time)

            self.auto_flush_itr = self.auto_flush_itr + 1
            if self.auto_flush_itr >= self.auto_flush_size:
                self.auto_flush_itr = 0
                self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
        else:
            if self.flush_is_bad:
                # Вторая версия алгоритма - флушить по факту
                try:
                    self.producer.produce(**dict_args)
                    self.producer.poll(poll_time)
                except BufferError:
                    #  Дожидаемся когда кафка разгребёт очередь
                    self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
            else:
                # Первая версия
                self.producer.produce(**dict_args)
                self.producer.poll(poll_time)

    def put_data_direct(self,
                        key,
                        value,
                        topic=None,
                        callback=None,
                        partition=None):
        """
        Прямая вставка с преобразованием данных. Метод poll не используется
        """

        dict_args = self._put_validation_and_transform(key=key,
                                                       value=value,
                                                       topic=topic,
                                                       callback=callback,
                                                       partition=partition)

        if self.auto_flush:
            self.producer.produce(**dict_args)

            self.auto_flush_itr = self.auto_flush_itr + 1
            if self.auto_flush_itr >= self.auto_flush_size:
                self.auto_flush_itr = 0
                self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
        else:
            if self.flush_is_bad:
                try:
                    self.producer.produce(**dict_args)
                except BufferError:
                    #  Дожидаемся когда кафка разгребёт очередь
                    self.producer.flush(default_cfg.DEFAULT_FLUSH_TIMER_SEC)
            else:
                self.producer.produce(**dict_args)

def delivery_report(err, msg):
    """ Called once for each message produced to indicate delivery result.
        Triggered by poll() or flush(). """
    if err is not None:
        print('Message delivery failed: {}'.format(err))
    else:
        print('Message delivered to {} [{}]'.format(msg.topic(),
                                                    msg.partition()))


p.init_transactions()
starttime = datetime.datetime.now()
for loop in range(65536):
    p.begin_transaction()
    p.produce(topicfile, lines[0])
    p.commit_transaction()
endtime = datetime.datetime.now()

# time.sleep(5)

# for loop in range(1000):
#     p.begin_transaction()
#     p.produce(topicfile, str(datetime.datetime.timestamp(datetime.datetime.now())))
#     p.commit_transaction()
# endtime = datetime.datetime.now()
f = open('./Performance/Throughput', 'a')
f.write('Time for 64k msgs = ' + str(endtime - starttime) + '\n')
f.close()
Пример #7
0
class KafkaProducerChannel(Channel):
    """
    Represents kafka producer channel for communication.
    """
    def __init__(self, **kwargs):
        Channel.__init__(self)
        self._hosts = kwargs.get("hosts")
        self._client_id = kwargs.get("client_id")
        self._retry_counter = kwargs.get("retry_counter", 5)
        self._topic = None
        self._channel = None

    def get_topic(self):
        return self._topic

    def set_topic(self, topic):
        if topic:
            self._topic = topic

    def init(self):
        """
        Initialize the object usinf configuration params passed.
        Establish connection with Kafka broker.
        """
        self._channel = None
        retry_count = 0
        try:
            while self._channel is None and int(
                    self._retry_counter) > retry_count:
                self.connect()
                if self._channel is None:
                    Log.warn(f"message bus producer connection Failed. Retry Attempt: {retry_count+1}" \
                        f" in {2**retry_count} seconds")
                    time.sleep(2**retry_count)
                    retry_count += 1
                else:
                    Log.debug(f"message bus producer connection is Initialized."\
                    f"Attempts:{retry_count+1}")
        except Exception as ex:
            Log.error(f"message bus producer initialization failed. {ex}")
            raise ConnectionEstError(
                f"Unable to connect to message bus broker. {ex}")

    def connect(self):
        """
        Initiate the connection with Kafka broker and open the
        necessary communication channel.
        """
        try:
            conf = {
                'bootstrap.servers': str(self._hosts),
                'request.required.acks': 'all',
                'max.in.flight.requests.per.connection': 1,
                'client.id': self._client_id,
                'transactional.id': uuid.uuid4(),
                'enable.idempotence': True
            }
            self._channel = Producer(conf)
            self._channel.init_transactions()
        except Exception as ex:
            Log.error(f"Unable to connect to message bus broker. {ex}")
            raise ConnectionEstError(
                f"Unable to connect to message bus broker. {ex}")

    @classmethod
    def disconnect(self):
        raise Exception('recv not implemented for Kafka producer Channel')

    @classmethod
    def recv(self, message=None):
        raise Exception('recv not implemented for Kafka producer Channel')

    def channel(self):
        return self._channel

    def send(self, message):
        """
        Publish the message to kafka broker topic.
        """
        try:
            if self._channel is not None:
                self._channel.begin_transaction()
                self._channel.produce(self._topic, message)
                self._channel.commit_transaction()
                Log.info(f"Message Published to Topic: {self._topic},"\
                    f"Msg Details: {message}")
        except KafkaException as e:
            if e.args[0].retriable():
                """Retriable error, try again"""
                self.send(message)
            elif e.args[0].txn_requires_abort():
                """
                Abort current transaction, begin a new transaction,
                and rewind the consumer to start over.
                """
                self._channel.abort_transaction()
                self.send(message)
                #TODO
                #rewind_consumer_offsets...()
            else:
                """Treat all other errors as fatal"""
                Log.error(
                    f"Failed to publish message to topic : {self._topic}. {e}")
                raise SendError(
                    f"Unable to send message to message bus broker. {e}")

    @classmethod
    def recv_file(self, remote_file, local_file):
        raise Exception('recv_file not implemented for Kafka producer Channel')

    @classmethod
    def send_file(self, local_file, remote_file):
        raise Exception('send_file not implemented for Kafka producer Channel')

    @classmethod
    def acknowledge(self, delivery_tag=None):
        raise Exception('send_file not implemented for Kafka producer Channel')