Exemplo n.º 1
0
    def test_commits_offsets(self, fetch):
        """Test that message offsets are persisted to ZK.

        """
        self._register_fake_brokers(1)
        t = Topic(self.c, 'testtopic')

        c = t.subscribe('group')
        msgs = []
        for i in xrange(1, 10):
            msg = mock.Mock()
            msg.next_offset = 3 * i
            msg.payload = str(i) * 3
            msgs.append(msg)
        fetch.return_value = msgs

        self.assertPassesWithMultipleAttempts(
            lambda: self.assertTrue(c.next_message(10) is not None), 5)
        self.assertEquals(len(c.partitions), 1)
        p = list(c.partitions)[0]

        self.assertEquals(p.offset, 3)
        c.commit_offsets()

        d, stat = self.client.get(p.path)
        self.assertEquals(d, '3')
        c.stop_partitions()
Exemplo n.º 2
0
    def test_consumer_remembers_offset(self, fetch):
        """Test that offsets are successfully retrieved from zk.

        """
        topic = 'testtopic'
        group = 'testgroup'
        offset = 10
        ev = Event()

        fake_partition = mock.Mock()
        fake_partition.cluster = self.c
        fake_partition.topic.name = topic
        fake_partition.broker.id = 0
        fake_partition.number = 0
        def fake_fetch(*args, **kwargs):
            ev.set()
            return ()
        fetch.side_effect = fake_fetch

        op = OwnedPartition(fake_partition, group)
        op._current_offset = offset
        op.commit_offset()

        self._register_fake_brokers(1)
        t = Topic(self.c, topic)
        c = t.subscribe(group)

        self.assertEquals(len(c.partitions), 1)
        p = list(c.partitions)[0]
        self.assertEquals(p.offset, offset)

        self.assertEquals(None, p.next_message(0))
        ev.wait(1)
        fetch.assert_called_with(offset, ConsumerConfig.fetch_size)
        c.stop_partitions()
Exemplo n.º 3
0
    def test_commits_offsets(self, fetch):
        """Test that message offsets are persisted to ZK.

        """
        self._register_fake_brokers(1)
        t = Topic(self.c, 'testtopic')

        c = t.subscribe('group')
        msgs = []
        for i in xrange(1, 10):
            msg = mock.Mock()
            msg.next_offset = 3 * i
            msg.payload = str(i) * 3
            msgs.append(msg)
        fetch.return_value = msgs

        self.assertPassesWithMultipleAttempts(
            lambda: self.assertTrue(c.next_message(10) is not None),
            5
        )
        self.assertEquals(len(c.partitions), 1)
        p = list(c.partitions)[0]

        self.assertEquals(p.offset, 3)
        c.commit_offsets()

        d, stat = self.client.get(p.path)
        self.assertEquals(d, '3')
        c.stop_partitions()
Exemplo n.º 4
0
    def test_empty_topic(self):
        """Test that consuming an empty topic returns an empty list.

        """
        topic = 'topic'
        t = Topic(self.kafka_cluster, topic)

        consumer = t.subscribe('group2')
        self.assertTrue(consumer.empty())
Exemplo n.º 5
0
    def test_empty_topic(self):
        """Test that consuming an empty topic returns an empty list.

        """
        topic = self.get_topic().name
        t = Topic(self.kafka_cluster, topic)

        consumer = t.subscribe('group2')
        self.assertTrue(consumer.empty())
        consumer.stop_partitions()
Exemplo n.º 6
0
    def test_empty_topic(self):
        """Test that consuming an empty topic returns an empty list.

        """
        topic = self.get_topic().name
        t = Topic(self.kafka_cluster, topic)

        consumer = t.subscribe('group2')
        self.assertTrue(consumer.empty())
        consumer.stop_partitions()
Exemplo n.º 7
0
    def test_too_many_consumers(self, *args):
        """Test graceful failure when # of consumers exceeds partitions

        """
        n_partitions = 1
        n_consumers = 2
        self._register_fake_brokers(n_partitions)
        t = Topic(self.c, 'testtopic')

        with self.assertRaises(NoAvailablePartitionsError):
            consumers = [t.subscribe('group1') for i in xrange(n_consumers)]
Exemplo n.º 8
0
    def test_too_many_consumers(self, *args):
        """Test graceful failure when # of consumers exceeds partitions

        """
        n_partitions = 1
        n_consumers = 2
        self._register_fake_brokers(n_partitions)
        t = Topic(self.c, 'testtopic')

        with self.assertRaises(NoAvailablePartitionsError):
            consumers = [t.subscribe('group1') for i in xrange(n_consumers)]
Exemplo n.º 9
0
    def test_multiclient_rebalance(self, *args):
        """Test rebalancing with many connected clients

        This test is primarily good at ferreting out concurrency bugs
        and therefore doesn't test one specific thing, since such
        bugs are fundamentally hard to trap.

        To test, it simulates 10 consumer connecting over time, rebalancing,
        and eventually disconnecting one by one. In order to accomplish
        this, it creates a separate kazoo client for each consumer. This
        is required because otherwise there is too much thread contention
        over the single kazoo client. Some callbacks won't happen until
        much too late because there aren't enough threads to go around.

        """
        n_partitions = 10
        n_consumers = 10
        t = Topic(self.c, 'testtopic')

        # with self.client, new clients don't see the brokers -- unsure why
        from kazoo.client import KazooClient
        zk_hosts = ','.join(
            ['%s:%s' % (h, p) for h, p in self.client.hosts.hosts])
        zkclient = KazooClient(hosts=zk_hosts)
        zkclient.start()
        self._register_fake_brokers(n_partitions, client=zkclient)
        zkclient.ensure_path('/brokers/topics')

        # bring up consumers
        consumers = []
        for i in xrange(n_consumers):
            newclient = KazooClient(hosts=zk_hosts)
            newclient.start()
            cluster = Cluster(newclient)
            topic = Topic(cluster, 'testtopic')
            consumers.append((newclient, topic.subscribe('group1')))
            time.sleep(1)

        time.sleep(5)  # let things settle

        # bring down consumers
        for client, consumer in consumers:
            consumer.stop_partitions()
            client.stop()
            time.sleep(
                2)  # a little more time so we don't kill during rebalance

        newclient.stop()
Exemplo n.º 10
0
    def test_configuration_with_nodes(self, cw):
        nodes = {
            '0': '5',
            '1': '2',
            '3': '4',
        }

        def get_node_data(path):
            id = path.rsplit('/', 1)[-1]
            return nodes[str(id)]

        broker_map = brokers.BrokerMap(self.cluster)
        self.cluster.brokers = broker_map
        with mock.patch('samsa.brokers.DataWatch'):
            # Tell the BrokerMap which brokers it's managing.
            broker_map._configure(nodes.keys())

        with mock.patch('samsa.partitions.DataWatch'):
            topic = Topic(self.cluster, 'topic')
            # Tell the PartitionMap which brokers it knows of.
            topic.partitions._configure(nodes.keys())
            # Tell each PartitionSet how many partitions it's managing.
            for partition in topic.partitions._partition_sets:
                partition._configure(nodes[str(partition.broker.id)],
                                     mock.Mock())

        self.assertEqual(len(topic.partitions), sum(map(int, nodes.values())))
Exemplo n.º 11
0
    def test_multiclient_rebalance(self, *args):
        """Test rebalancing with many connected clients

        This test is primarily good at ferreting out concurrency bugs
        and therefore doesn't test one specific thing, since such
        bugs are fundamentally hard to trap.

        To test, it simulates 10 consumer connecting over time, rebalancing,
        and eventually disconnecting one by one. In order to accomplish
        this, it creates a separate kazoo client for each consumer. This
        is required because otherwise there is too much thread contention
        over the single kazoo client. Some callbacks won't happen until
        much too late because there aren't enough threads to go around.

        """
        n_partitions = 10
        n_consumers = 10
        t = Topic(self.c, 'testtopic')

        # with self.client, new clients don't see the brokers -- unsure why
        from kazoo.client import KazooClient
        zk_hosts = ','.join(
                ['%s:%s' % (h,p) for h,p in self.client.hosts.hosts])
        zkclient = KazooClient(hosts=zk_hosts)
        zkclient.start()
        self._register_fake_brokers(n_partitions, client=zkclient)
        zkclient.ensure_path('/brokers/topics')

        # bring up consumers
        consumers = []
        for i in xrange(n_consumers):
            newclient = KazooClient(hosts=zk_hosts)
            newclient.start()
            cluster = Cluster(newclient)
            topic = Topic(cluster, 'testtopic')
            consumers.append((newclient, topic.subscribe('group1')))
            time.sleep(1)

        time.sleep(5) # let things settle

        # bring down consumers
        for client,consumer in consumers:
            consumer.stop_partitions()
            client.stop()
            time.sleep(2) # a little more time so we don't kill during rebalance

        newclient.stop()
Exemplo n.º 12
0
    def test_consumes(self):
        """Test that we can consume messages from kafka.

        """
        topic = 'topic'
        messages = ['hello world', 'foobar']

        # publish `messages` to `topic`
        self.kafka.produce(topic, 0, messages)

        t = Topic(self.kafka_cluster, topic)

        # subscribe to `topic`
        consumer = t.subscribe('group2')

        def test():
            """Test that `consumer` can see `messages`.

            catches exceptions so we can retry while we wait for kafka to
            coallesce.

            """
            logger.debug('Running `test`...')
            try:
                self.assertEquals(
                    list(islice(consumer, 0, len(messages))),
                    messages
                )
                return True
            except AssertionError as e:
                logger.exception('Caught exception: %s', e)
                return False

        # wait for one second for :func:`test` to return true or raise an error
        polling_timeout(test, 1)

        old_offset = [p.offset for p in consumer.partitions][0]
        # test that the offset of our 1 partition is not 0
        self.assertTrue(old_offset > 0)
        # and that consumer contains no more messages.
        self.assertTrue(consumer.empty())

        # repeat and see if offset grows.
        self.kafka.produce(topic, 0, messages)
        polling_timeout(test, 1)
        self.assertTrue([p.offset for p in consumer.partitions][0] > old_offset)
        consumer.stop_partitions()
Exemplo n.º 13
0
    def test_consumes(self):
        """Test that we can consume messages from kafka.

        """
        topic = self.get_topic().name
        messages = ['hello world', 'foobar']

        # publish `messages` to topic
        self.kafka.produce(topic, 0, messages)

        t = Topic(self.kafka_cluster, topic)

        # subscribe to topic
        consumer = t.subscribe('group2')

        def test():
            """Test that `consumer` can see `messages`.

            catches exceptions so we can retry while we wait for kafka to
            coallesce.

            """
            logger.debug('Running `test`...')
            try:
                self.assertEquals(list(islice(consumer, 0, len(messages))),
                                  messages)
                return True
            except AssertionError as e:
                logger.exception('Caught exception: %s', e)
                return False

        # wait for one second for :func:`test` to return true or raise an error
        polling_timeout(test, 1)

        old_offset = [p.offset for p in consumer.partitions][0]
        # test that the offset of our 1 partition is not 0
        self.assertTrue(old_offset > 0)
        # and that consumer contains no more messages.
        self.assertTrue(consumer.empty())

        # repeat and see if offset grows.
        self.kafka.produce(topic, 0, messages)
        polling_timeout(test, 1)
        self.assertTrue([p.offset
                         for p in consumer.partitions][0] > old_offset)
        consumer.stop_partitions()
Exemplo n.º 14
0
    def test_fetch_invalid_offset(self):
        """Test that fetching rolled-over offset skips to lowest valid offset.

        Bad offsets happen when kafka logs are rolled over automatically.
        This could happen when a consumer is offline for a long time and
        zookeeper has an old offset stored. It could also happen with a
        consumer near the end of a log that's being rolled over and its
        previous spot no longer exists.
        """
        topic = 'topic'
        messages = ['hello world', 'foobar']

        # publish `messages` to `topic`
        self.kafka.produce(topic, 0, messages)

        t = Topic(self.kafka_cluster, topic)

        # get the consumer and set the offset to -1
        consumer = t.subscribe('group2')
        list(consumer.partitions)[0]._next_offset = -1

        def test():
            """Test that `consumer` can see `messages`.

            catches exceptions so we can retry while we wait for kafka to
            coallesce.

            """
            logger.debug('Running `test`...')
            try:
                self.assertEquals(
                    list(islice(consumer, 0, len(messages))),
                    messages
                )
                return True
            except AssertionError as e:
                logger.exception('Caught exception: %s', e)
                return False

        # wait for one second for :func:`test` to return true or raise an error
        polling_timeout(test, 1)
        consumer.stop_partitions()
Exemplo n.º 15
0
    def test_configuration_no_node(self, cw):
        """Test that we get al the brokers. TODO"""

        broker_map = brokers.BrokerMap(self.cluster)
        self.cluster.brokers = broker_map
        with mock.patch('samsa.brokers.DataWatch'):
            broker_map._configure(['0', '1', '2'])
        with mock.patch('samsa.partitions.DataWatch'):
            topic = Topic(self.cluster, 'topic')

        self.assertEqual(len(topic.partitions), len(broker_map))
Exemplo n.º 16
0
    def test_broker_addition(self, rebalance):
        """Test adding a broker, and ensure all partitions are discovered

        Testing this makes sure all the relevant zookeeper watches are
        functioning and rebalancing is happening when partitions or brokers
        are added or removed
        """
        t = Topic(self.c, 'testtopic')

        self._register_fake_broker(0, "creator:127.0.0.1:9092")
        self._register_fake_partitions('testtopic', n_partitions=2)

        consumer = t.subscribe('group1')

        self._register_fake_broker(1, "creator:127.0.0.1:9093")
        self._register_fake_partitions('testtopic', n_partitions=2, brokers=['1'])

        time.sleep(1) # let watches resolve
        self.assertEqual(len(t.partitions), 4)
        self.assertEqual(len(consumer.partitions), 4)
Exemplo n.º 17
0
    def test_fetch_invalid_offset(self):
        """Test that fetching rolled-over offset skips to lowest valid offset.

        Bad offsets happen when kafka logs are rolled over automatically.
        This could happen when a consumer is offline for a long time and
        zookeeper has an old offset stored. It could also happen with a
        consumer near the end of a log that's being rolled over and its
        previous spot no longer exists.
        """
        topic = self.get_topic().name
        messages = ['hello world', 'foobar']

        # publish `messages` to topic
        self.kafka.produce(topic, 0, messages)

        t = Topic(self.kafka_cluster, topic)

        # get the consumer and set the offset to -1
        consumer = t.subscribe('group2')
        list(consumer.partitions)[0]._next_offset = -1

        def test():
            """Test that `consumer` can see `messages`.

            catches exceptions so we can retry while we wait for kafka to
            coallesce.

            """
            logger.debug('Running `test`...')
            try:
                self.assertEquals(list(islice(consumer, 0, len(messages))),
                                  messages)
                return True
            except AssertionError as e:
                logger.exception('Caught exception: %s', e)
                return False

        # wait for one second for :func:`test` to return true or raise an error
        polling_timeout(test, 1)
        consumer.stop_partitions()
Exemplo n.º 18
0
    def test_assigns_partitions(self, *args):
        """
        Test rebalance

        Adjust n_* to see how rebalancing performs.

        """
        n_partitions = 10
        n_consumers = 3
        self._register_fake_brokers(n_partitions)
        t = Topic(self.c, 'testtopic')

        consumers = [t.subscribe('group1') for i in xrange(n_consumers)]

        partitions = []
        for c in consumers:
            partitions.extend(c.partitions)

        # test that there are no duplicates.
        self.assertEquals(len(partitions), n_partitions)
        # test that every partitions is represented.
        self.assertEquals(len(set(partitions)), n_partitions)
Exemplo n.º 19
0
    def test_consumer_remembers_offset(self, fetch):
        """Test that offsets are successfully retrieved from zk.

        """
        return  # TODO: Fix this test
        topic = 'testtopic'
        group = 'testgroup'
        offset = 10
        ev = Event()

        fake_partition = mock.Mock()
        fake_partition.cluster = self.c
        fake_partition.topic.name = topic
        fake_partition.broker.id = 0
        fake_partition.number = 0

        def fake_fetch(*args, **kwargs):
            ev.set()
            return ()

        fetch.side_effect = fake_fetch

        msgqueue = Queue.Queue()
        op = OwnedPartition(fake_partition, group, msgqueue)
        op._current_offset = offset
        op.commit_offset()

        self._register_fake_brokers(1)
        t = Topic(self.c, topic)
        c = t.subscribe(group)

        self.assertEquals(len(c.partitions), 1)
        p = list(c.partitions)[0]
        self.assertEquals(p.offset, offset)

        self.assertEquals(None, p.next_message(0))
        ev.wait(1)
        fetch.assert_called_with(offset, ConsumerConfig.fetch_size)
        c.stop_partitions()
Exemplo n.º 20
0
    def test_broker_addition(self, rebalance):
        """Test adding a broker, and ensure all partitions are discovered

        Testing this makes sure all the relevant zookeeper watches are
        functioning and rebalancing is happening when partitions or brokers
        are added or removed
        """
        t = Topic(self.c, 'testtopic')

        self._register_fake_broker(0, "creator:127.0.0.1:9092")
        self._register_fake_partitions('testtopic', n_partitions=2)

        consumer = t.subscribe('group1')

        self._register_fake_broker(1, "creator:127.0.0.1:9093")
        self._register_fake_partitions('testtopic',
                                       n_partitions=2,
                                       brokers=['1'])

        time.sleep(1)  # let watches resolve
        self.assertEqual(len(t.partitions), 4)
        self.assertEqual(len(consumer.partitions), 4)
Exemplo n.º 21
0
    def test_assigns_partitions(self, *args):
        """
        Test rebalance

        Adjust n_* to see how rebalancing performs.

        """
        n_partitions = 10
        n_consumers = 3
        self._register_fake_brokers(n_partitions)
        t = Topic(self.c, 'testtopic')

        consumers = [t.subscribe('group1') for i in xrange(n_consumers)]

        partitions = []
        for c in consumers:
            partitions.extend(c.partitions)

        # test that there are no duplicates.
        self.assertEquals(len(partitions), n_partitions)
        # test that every partitions is represented.
        self.assertEquals(len(set(partitions)), n_partitions)