Exemplo n.º 1
0
    def test_id_allocator_leader_isolation(self):
        """
        Isolate id allocator leader. This test validates whether the cluster
        is still available when `kafka_internal/id_allocator` leader has been isolated.
        """
        admin = Admin(self.redpanda)
        self._expect_available()
        # Find which node is the leader for id allocator partition
        admin.wait_stable_configuration(namespace='kafka_internal',
                                        topic='id_allocator',
                                        replication=3)
        initial_leader_id = admin.get_partition_leader(
            namespace='kafka_internal', topic='id_allocator', partition=0)
        leader_node = self.redpanda.get_node(initial_leader_id)
        self.logger.info(
            f"kafka_internal/id_allocator/0 leader: {initial_leader_id}, node: {leader_node.account.hostname}"
        )

        self._expect_available()

        with FailureInjector(self.redpanda) as fi:
            # isolate id_allocator
            fi.inject_failure(
                FailureSpec(FailureSpec.FAILURE_ISOLATE,
                            self.redpanda.get_node(initial_leader_id)))

            # expect messages to be produced and consumed without a timeout
            connection = self.ping_pong()
            connection.ping_pong(timeout_s=10, retries=10)
            for i in range(0, 127):
                connection.ping_pong()
Exemplo n.º 2
0
class TxAdminTest(RedpandaTest):
    topics = (TopicSpec(name="tx_test",
                        partition_count=3,
                        replication_factor=3), )

    def __init__(self, test_context):
        super(TxAdminTest,
              self).__init__(test_context=test_context,
                             num_brokers=3,
                             extra_rp_conf={
                                 "enable_idempotence": True,
                                 "enable_transactions": True,
                                 "tx_timeout_delay_ms": 10000000,
                                 "abort_timed_out_transactions_interval_ms":
                                 10000000,
                                 'enable_leader_balancer': False
                             })

        self.admin = Admin(self.redpanda)

    def extract_pid(self, tx):
        return (tx["producer_id"]["id"], tx["producer_id"]["epoch"])

    @cluster(num_nodes=3)
    def test_simple_get_transaction(self):
        producer1 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '0',
        })
        producer2 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '1',
        })
        producer1.init_transactions()
        producer2.init_transactions()
        producer1.begin_transaction()
        producer2.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer1.produce(topic.name, '0', '0', partition)
                producer2.produce(topic.name, '0', '1', partition)

        producer1.flush()
        producer2.flush()

        expected_pids = None

        for topic in self.topics:
            for partition in range(topic.partition_count):
                txs_info = self.admin.get_transactions(topic.name, partition,
                                                       "kafka")
                assert ('expired_transactions' not in txs_info)
                if expected_pids == None:
                    expected_pids = set(
                        map(self.extract_pid, txs_info['active_transactions']))
                    assert (len(expected_pids) == 2)

                assert (len(expected_pids) == len(
                    txs_info['active_transactions']))
                for tx in txs_info['active_transactions']:
                    assert (self.extract_pid(tx) in expected_pids)
                    assert (tx['status'] == 'ongoing')
                    assert (tx['timeout_ms'] == 60000)

    @cluster(num_nodes=3)
    def test_expired_transaction(self):
        '''
        Problem: rm_stm contains timer to run try_abort_old_txs.
        This timer rearm on begin_tx to smallest transaction deadline,
        and after try_abort_old_txs to one of settings 
        abort_timed_out_transactions_interval_ms or tx_timeout_delay_ms.
        If we do sleep to transaction timeout and try to get expired 
        transaction, timer can be signal before our request and after clean
        expire transactions.

        How to solve:
        0) Run transaction
        1) Change leader for parititons 
        (New leader did not get requests for begin_txs,
        so his timer is armed on one of settings)
        2) Get expired transactions
        '''
        assert (len(self.redpanda.nodes) >= 2)

        producer1 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '0',
            'transaction.timeout.ms': '30000'
        })
        producer1.init_transactions()
        producer1.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer1.produce(topic.name, '0', '0', partition)

        producer1.flush()

        # We need to change leader for all partition to another node
        for topic in self.topics:
            for partition in range(topic.partition_count):
                old_leader = self.admin.get_partition_leader(
                    namespace="kafka", topic=self.topic, partition=partition)

                self.admin.transfer_leadership_to(namespace="kafka",
                                                  topic=self.topic,
                                                  partition=partition,
                                                  target=None)

                def leader_is_changed():
                    return self.admin.get_partition_leader(
                        namespace="kafka",
                        topic=self.topic,
                        partition=partition) != old_leader

                wait_until(leader_is_changed,
                           timeout_sec=30,
                           backoff_sec=2,
                           err_msg="Failed to establish current leader")

        expected_pids = None

        for topic in self.topics:
            for partition in range(topic.partition_count):
                txs_info = self.admin.get_transactions(topic.name, partition,
                                                       "kafka")
                assert ('active_transactions' not in txs_info)
                if expected_pids == None:
                    expected_pids = set(
                        map(self.extract_pid,
                            txs_info['expired_transactions']))
                    assert (len(expected_pids) == 1)

                assert (len(expected_pids) == len(
                    txs_info['expired_transactions']))
                for tx in txs_info['expired_transactions']:
                    assert (self.extract_pid(tx) in expected_pids)
                    assert (tx['status'] == 'ongoing')
                    assert (tx['timeout_ms'] == -1)

    @cluster(num_nodes=3)
    def test_mark_transaction_expired(self):
        producer1 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '0',
        })
        producer2 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '1',
        })
        producer1.init_transactions()
        producer2.init_transactions()
        producer1.begin_transaction()
        producer2.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer1.produce(topic.name, '0', '0', partition)
                producer2.produce(topic.name, '0', '1', partition)

        producer1.flush()
        producer2.flush()

        expected_pids = None

        txs_info = self.admin.get_transactions(topic.name, partition, "kafka")

        expected_pids = set(
            map(self.extract_pid, txs_info['active_transactions']))
        assert (len(expected_pids) == 2)

        abort_tx = list(expected_pids)[0]
        expected_pids.discard(abort_tx)

        for topic in self.topics:
            for partition in range(topic.partition_count):
                self.admin.mark_transaction_expired(topic.name, partition, {
                    "id": abort_tx[0],
                    "epoch": abort_tx[1]
                }, "kafka")

                txs_info = self.admin.get_transactions(topic.name, partition,
                                                       "kafka")
                assert ('expired_transactions' not in txs_info)

                assert (len(expected_pids) == len(
                    txs_info['active_transactions']))
                for tx in txs_info['active_transactions']:
                    assert (self.extract_pid(tx) in expected_pids)
                    assert (tx['status'] == 'ongoing')
                    assert (tx['timeout_ms'] == 60000)
Exemplo n.º 3
0
class TxAdminTest(RedpandaTest):
    topics = (TopicSpec(partition_count=3, replication_factor=3),
              TopicSpec(partition_count=3, replication_factor=3))

    def __init__(self, test_context):
        super(TxAdminTest,
              self).__init__(test_context=test_context,
                             num_brokers=3,
                             extra_rp_conf={
                                 "enable_idempotence": True,
                                 "enable_transactions": True,
                                 "tx_timeout_delay_ms": 10000000,
                                 "abort_timed_out_transactions_interval_ms":
                                 10000000,
                                 'enable_leader_balancer': False
                             })

        self.admin = Admin(self.redpanda)

    def extract_pid(self, tx):
        return (tx["producer_id"]["id"], tx["producer_id"]["epoch"])

    @cluster(num_nodes=3)
    def test_simple_get_transaction(self):
        producer1 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '0',
        })
        producer2 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '1',
        })
        producer1.init_transactions()
        producer2.init_transactions()
        producer1.begin_transaction()
        producer2.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer1.produce(topic.name, '0', '0', partition)
                producer2.produce(topic.name, '0', '1', partition)

        producer1.flush()
        producer2.flush()

        expected_pids = None

        for topic in self.topics:
            for partition in range(topic.partition_count):
                txs_info = self.admin.get_transactions(topic.name, partition,
                                                       "kafka")
                assert ('expired_transactions' not in txs_info)
                if expected_pids == None:
                    expected_pids = set(
                        map(self.extract_pid, txs_info['active_transactions']))
                    assert (len(expected_pids) == 2)

                assert (len(expected_pids) == len(
                    txs_info['active_transactions']))
                for tx in txs_info['active_transactions']:
                    assert (self.extract_pid(tx) in expected_pids)
                    assert (tx['status'] == 'ongoing')
                    assert (tx['timeout_ms'] == 60000)

    @cluster(num_nodes=3)
    def test_expired_transaction(self):
        '''
        Problem: rm_stm contains timer to run try_abort_old_txs.
        This timer rearm on begin_tx to smallest transaction deadline,
        and after try_abort_old_txs to one of settings 
        abort_timed_out_transactions_interval_ms or tx_timeout_delay_ms.
        If we do sleep to transaction timeout and try to get expired 
        transaction, timer can be signal before our request and after clean
        expire transactions.

        How to solve:
        0) Run transaction
        1) Change leader for parititons 
        (New leader did not get requests for begin_txs,
        so his timer is armed on one of settings)
        2) Get expired transactions
        '''
        assert (len(self.redpanda.nodes) >= 2)

        producer1 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '0',
            'transaction.timeout.ms': '900000'
        })
        producer1.init_transactions()
        producer1.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer1.produce(topic.name, '0', '0', partition)

        producer1.flush()

        # We need to change leader for all partition to another node
        for topic in self.topics:
            for partition in range(topic.partition_count):
                old_leader = self.admin.get_partition_leader(
                    namespace="kafka", topic=topic, partition=partition)

                self.admin.transfer_leadership_to(namespace="kafka",
                                                  topic=topic,
                                                  partition=partition,
                                                  target=None)

                def leader_is_changed():
                    new_leader = self.admin.get_partition_leader(
                        namespace="kafka", topic=topic, partition=partition)
                    return (new_leader != -1) and (new_leader != old_leader)

                wait_until(leader_is_changed,
                           timeout_sec=30,
                           backoff_sec=2,
                           err_msg="Failed to establish current leader")

        expected_pids = None

        for topic in self.topics:
            for partition in range(topic.partition_count):
                txs_info = self.admin.get_transactions(topic.name, partition,
                                                       "kafka")
                assert ('active_transactions' not in txs_info)
                if expected_pids == None:
                    expected_pids = set(
                        map(self.extract_pid,
                            txs_info['expired_transactions']))
                    assert (len(expected_pids) == 1)

                assert (len(expected_pids) == len(
                    txs_info['expired_transactions']))
                for tx in txs_info['expired_transactions']:
                    assert (self.extract_pid(tx) in expected_pids)
                    assert (tx['status'] == 'ongoing')
                    assert (tx['timeout_ms'] == -1)

    @cluster(num_nodes=3)
    def test_mark_transaction_expired(self):
        producer1 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '0',
        })
        producer2 = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': '1',
        })
        producer1.init_transactions()
        producer2.init_transactions()
        producer1.begin_transaction()
        producer2.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer1.produce(topic.name, '0', '0', partition)
                producer2.produce(topic.name, '0', '1', partition)

        producer1.flush()
        producer2.flush()

        expected_pids = None

        txs_info = self.admin.get_transactions(topic.name, partition, "kafka")

        expected_pids = set(
            map(self.extract_pid, txs_info['active_transactions']))
        assert (len(expected_pids) == 2)

        abort_tx = list(expected_pids)[0]
        expected_pids.discard(abort_tx)

        for topic in self.topics:
            for partition in range(topic.partition_count):
                self.admin.mark_transaction_expired(topic.name, partition, {
                    "id": abort_tx[0],
                    "epoch": abort_tx[1]
                }, "kafka")

                txs_info = self.admin.get_transactions(topic.name, partition,
                                                       "kafka")
                assert ('expired_transactions' not in txs_info)

                assert (len(expected_pids) == len(
                    txs_info['active_transactions']))
                for tx in txs_info['active_transactions']:
                    assert (self.extract_pid(tx) in expected_pids)
                    assert (tx['status'] == 'ongoing')
                    assert (tx['timeout_ms'] == 60000)

    @cluster(num_nodes=3)
    def test_all_transactions(self):
        tx_id = "0"
        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer.produce(topic.name, '0', '0', partition)

        producer.flush()

        txs_info = self.admin.get_all_transactions()
        assert len(txs_info) == 1

        expected_partitions = dict()
        tx = txs_info[0]

        assert tx["transactional_id"] == tx_id
        assert tx["timeout_ms"] == 60000

        for partition in tx["partitions"]:
            assert partition["ns"] == "kafka"
            if partition["topic"] not in expected_partitions:
                expected_partitions[partition["topic"]] = set()
            expected_partitions[partition["topic"]].add(
                partition["partition_id"])

        for topic in self.topics:
            assert len(
                expected_partitions[topic.name]) == topic.partition_count
            for partition in range(topic.partition_count):
                assert partition in expected_partitions[topic.name]

    @cluster(num_nodes=3)
    def test_delete_topic_from_ongoin_tx(self):
        tx_id = "0"
        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer.produce(topic.name, '0', '0', partition)

        producer.flush()

        txs_info = self.admin.get_all_transactions()
        assert len(
            txs_info) == 1, "Should be only one transaction in current time"

        rpk = RpkTool(self.redpanda)
        topic_name = self.topics[0].name
        rpk.delete_topic(topic_name)

        tx = txs_info[0]
        assert tx[
            "transactional_id"] == tx_id, f"Expected transactional_id: {tx_id}, but got {tx['transactional_id']}"

        for partition in tx["partitions"]:
            assert (partition["ns"] == "kafka")
            if partition["topic"] == topic_name:
                self.admin.delete_partition_from_transaction(
                    tx["transactional_id"], partition["ns"],
                    partition["topic"], partition["partition_id"],
                    partition["etag"])

        producer.commit_transaction()

        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            if topic.name is not topic_name:
                for partition in range(topic.partition_count):
                    producer.produce(topic.name, '0', '0', partition)

        producer.commit_transaction()

    @cluster(num_nodes=3)
    def test_delete_topic_from_prepared_tx(self):
        tx_id = "0"
        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer.produce(topic.name, '0', '0', partition)

        producer.flush()

        rpk = RpkTool(self.redpanda)
        topic_name = self.topics[0].name
        rpk.delete_topic(topic_name)

        try:
            producer.commit_transaction()
            raise Exception("commit_transaction should fail")
        except ck.cimpl.KafkaException as e:
            kafka_error = e.args[0]
            assert kafka_error.code() == ck.cimpl.KafkaError.UNKNOWN

        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        try:
            producer.init_transactions()
            raise Exception("init_transaction should fail")
        except ck.cimpl.KafkaException as e:
            kafka_error = e.args[0]
            assert kafka_error.code(
            ) == ck.cimpl.KafkaError.BROKER_NOT_AVAILABLE

        txs_info = self.admin.get_all_transactions()
        assert len(
            txs_info) == 1, "Should be only one transaction in current time"
        tx = txs_info[0]

        for partition in tx["partitions"]:
            assert (partition["ns"] == "kafka")
            if partition["topic"] == topic_name:
                self.admin.delete_partition_from_transaction(
                    tx["transactional_id"], partition["ns"],
                    partition["topic"], partition["partition_id"],
                    partition["etag"])

        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            if topic.name is not topic_name:
                for partition in range(topic.partition_count):
                    producer.produce(topic.name, '0', '0', partition)

        producer.commit_transaction()

    @cluster(num_nodes=3)
    def test_delete_non_existent_topic(self):
        tx_id = "0"
        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        error_topic_name = "error_topic"

        for topic in self.topics:
            assert error_topic_name is not topic.name
            for partition in range(topic.partition_count):
                producer.produce(topic.name, '0', '0', partition)

        producer.flush()

        try:
            self.admin.delete_partition_from_transaction(
                tx_id, "kafka", error_topic_name, 0, 0)
        except requests.exceptions.HTTPError as e:
            assert e.response.text == '{"message": "Can not find partition({kafka/error_topic/0}) in transaction for delete", "code": 400}'

        producer.commit_transaction()

    @cluster(num_nodes=3)
    def test_delete_non_existent_tid(self):
        tx_id = "0"
        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer.produce(topic.name, '0', '0', partition)

        producer.flush()

        txs_info = self.admin.get_all_transactions()
        tx = txs_info[0]

        topic_name = self.topics[0].name
        error_tx_id = "1"

        for partition in tx["partitions"]:
            if partition["topic"] == topic_name:
                try:
                    self.admin.delete_partition_from_transaction(
                        error_tx_id, partition["ns"], partition["topic"],
                        partition["partition_id"], partition["etag"])
                except requests.exceptions.HTTPError as e:
                    assert e.response.text == '{"message": "Unexpected tx_error error: Unknown server error", "code": 500}'

        producer.commit_transaction()

    @cluster(num_nodes=3)
    def test_delete_non_existent_etag(self):
        tx_id = "0"
        producer = ck.Producer({
            'bootstrap.servers': self.redpanda.brokers(),
            'transactional.id': tx_id,
        })
        producer.init_transactions()
        producer.begin_transaction()

        for topic in self.topics:
            for partition in range(topic.partition_count):
                producer.produce(topic.name, '0', '0', partition)

        producer.flush()

        txs_info = self.admin.get_all_transactions()
        tx = txs_info[0]

        topic_name = self.topics[0].name

        for partition in tx["partitions"]:
            if partition["topic"] == topic_name:
                try:
                    self.admin.delete_partition_from_transaction(
                        tx_id, partition["ns"], partition["topic"],
                        partition["partition_id"], partition["etag"] + 100)
                except requests.exceptions.HTTPError as e:
                    e.response.text == '{{"message": "Can not find partition({{{}/{}/{}}}) in transaction for delete", "code": 400}}'.format(
                        partition["ns"], partition["topic"],
                        partition["partition_id"])

        producer.commit_transaction()
Exemplo n.º 4
0
class LeadersInfoApiTest(RedpandaTest):
    topics = (TopicSpec(partition_count=3, replication_factor=3), )

    def __init__(self, test_context):
        super(LeadersInfoApiTest, self).__init__(test_context=test_context,
                                                 num_brokers=3)

        self.admin = Admin(self.redpanda)

    @cluster(num_nodes=3)
    def reset_leaders_info_test(self):
        def check_reset_leaders():
            node = self.redpanda.nodes[0]
            self.admin.reset_leaders_info(node)

            partition_without_leader = 0
            for partition in range(self.topics[0].partition_count):
                leader = self.admin.get_partition_leader(namespace="kafka",
                                                         topic=self.topics[0],
                                                         partition=partition,
                                                         node=node)
                if leader == -1:
                    partition_without_leader += 1

            return partition_without_leader >= 2

        wait_until(check_reset_leaders,
                   timeout_sec=180,
                   backoff_sec=1,
                   err_msg="Can not reset leaders_table for nodes")

        def check_get_leaders():
            for partition in range(self.topics[0].partition_count):
                leader0 = self.admin.get_partition_leader(
                    namespace="kafka",
                    topic=self.topics[0],
                    partition=partition,
                    node=self.redpanda.nodes[0])

                leader1 = self.admin.get_partition_leader(
                    namespace="kafka",
                    topic=self.topics[0],
                    partition=partition,
                    node=self.redpanda.nodes[1])

                return leader0 == leader1

        wait_until(check_get_leaders,
                   timeout_sec=30,
                   backoff_sec=1,
                   err_msg="Can not refresh leaders")

    @cluster(num_nodes=3)
    def get_leaders_info_test(self):
        def check_reset_leaders():
            node = self.redpanda.nodes[0]
            self.admin.reset_leaders_info(node)
            leaders = self.admin.get_leaders_info(node)
            return len(leaders) == 0

        wait_until(check_reset_leaders,
                   timeout_sec=180,
                   backoff_sec=1,
                   err_msg="Can not reset leaders_table for nodes")

        def check_get_leaders():
            def compare_key(e):
                return (e["ns"], e["topic"], e["partition_id"])

            leaders_node1 = self.admin.get_leaders_info(self.redpanda.nodes[0])
            leaders_node1.sort(key=compare_key)
            leaders_node2 = self.admin.get_leaders_info(self.redpanda.nodes[1])
            leaders_node2.sort(key=compare_key)
            return leaders_node1 == leaders_node2

        wait_until(check_get_leaders,
                   timeout_sec=180,
                   backoff_sec=1,
                   err_msg="Can not refresh leaders")