Exemplo n.º 1
0
    def test_get_leader_exceptions_when_noleader(self, protocol, conn):

        mock_conn(conn)

        brokers = [BrokerMetadata(0, "broker_1", 4567, None), BrokerMetadata(1, "broker_2", 5678, None)]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [(NO_ERROR, "topic_noleader", [(NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], [])])]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=["broker_1:4567"])
        self.assertDictEqual(
            {TopicPartition("topic_noleader", 0): None, TopicPartition("topic_noleader", 1): None},
            client.topics_to_brokers,
        )

        # No leader partitions -- raise LeaderNotAvailableError
        with self.assertRaises(LeaderNotAvailableError):
            self.assertIsNone(client._get_leader_for_partition("topic_noleader", 0))
        with self.assertRaises(LeaderNotAvailableError):
            self.assertIsNone(client._get_leader_for_partition("topic_noleader", 1))

        # Unknown partitions -- raise UnknownTopicOrPartitionError
        with self.assertRaises(UnknownTopicOrPartitionError):
            self.assertIsNone(client._get_leader_for_partition("topic_noleader", 2))

        topics = [(NO_ERROR, "topic_noleader", [(NO_ERROR, 0, 0, [0, 1], [0, 1]), (NO_ERROR, 1, 1, [1, 0], [1, 0])])]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)
        self.assertEqual(brokers[0], client._get_leader_for_partition("topic_noleader", 0))
        self.assertEqual(brokers[1], client._get_leader_for_partition("topic_noleader", 1))
Exemplo n.º 2
0
    def test_get_leader_for_unassigned_partitions(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_LEADER, 'topic_no_partitions', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        self.assertDictEqual({}, client.topics_to_brokers)

        with self.assertRaises(LeaderNotAvailableError):
            client._get_leader_for_partition('topic_no_partitions', 0)

        with self.assertRaises(UnknownTopicOrPartitionError):
            client._get_leader_for_partition('topic_unknown', 0)
Exemplo n.º 3
0
    def test_send_produce_request_raises_when_noleader(self, protocol, conn):
        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [ProduceRequestPayload(
            "topic_noleader", 0,
            [create_message("a"), create_message("b")])]

        with self.assertRaises(FailedPayloadsError):
            client.send_produce_request(requests)
Exemplo n.º 4
0
    def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn):
        "Get leader for partitions reload metadata if it is not available"

        mock_conn(conn)

        brokers = [BrokerMetadata(0, "broker_1", 4567, None), BrokerMetadata(1, "broker_2", 5678, None)]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [(NO_LEADER, "topic_no_partitions", [])]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=["broker_1:4567"])

        # topic metadata is loaded but empty
        self.assertDictEqual({}, client.topics_to_brokers)

        topics = [(NO_ERROR, "topic_one_partition", [(NO_ERROR, 0, 0, [0, 1], [0, 1])])]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        # calling _get_leader_for_partition (from any broker aware request)
        # will try loading metadata again for the same topic
        leader = client._get_leader_for_partition("topic_one_partition", 0)

        self.assertEqual(brokers[0], leader)
        self.assertDictEqual({TopicPartition("topic_one_partition", 0): brokers[0]}, client.topics_to_brokers)
Exemplo n.º 5
0
    def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn):
        "Get leader for partitions reload metadata if it is not available"

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            (NO_LEADER, 'topic_no_partitions', [])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        # topic metadata is loaded but empty
        self.assertDictEqual({}, client.topics_to_brokers)

        topics = [
            (NO_ERROR, 'topic_one_partition', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1])
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        # calling _get_leader_for_partition (from any broker aware request)
        # will try loading metadata again for the same topic
        leader = client._get_leader_for_partition('topic_one_partition', 0)

        self.assertEqual(brokers[0], leader)
        self.assertDictEqual({
            TopicPartition('topic_one_partition', 0): brokers[0]},
            client.topics_to_brokers)
Exemplo n.º 6
0
 def test_correlation_rollover(self):
     with patch.object(SimpleClient, 'load_metadata_for_topics'):
         big_num = 2**31 - 3
         client = SimpleClient(hosts=(), correlation_id=big_num)
         self.assertEqual(big_num + 1, client._next_id())
         self.assertEqual(big_num + 2, client._next_id())
         self.assertEqual(0, client._next_id())
Exemplo n.º 7
0
    def test_has_metadata_for_topic(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_LEADER, 'topic_still_creating', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []),
            (NO_ERROR, 'topic_noleaders', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        # Topics with no partitions return False
        self.assertFalse(client.has_metadata_for_topic('topic_still_creating'))
        self.assertFalse(client.has_metadata_for_topic('topic_doesnt_exist'))

        # Topic with partition metadata, but no leaders return True
        self.assertTrue(client.has_metadata_for_topic('topic_noleaders'))
    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Exemplo n.º 9
0
    def test_send_broker_unaware_request(self):
        mocked_conns = {
            ('kafka01', 9092): MagicMock(),
            ('kafka02', 9092): MagicMock(),
            ('kafka03', 9092): MagicMock()
        }
        # inject BrokerConnection side effects
        mock_conn(mocked_conns[('kafka01', 9092)], success=False)
        mock_conn(mocked_conns[('kafka03', 9092)], success=False)
        future = Future()
        mocked_conns[('kafka02', 9092)].send.return_value = future
        mocked_conns[('kafka02', 9092)].recv.return_value = [('valid response', future)]

        def mock_get_conn(host, port, afi):
            return mocked_conns[(host, port)]

        # patch to avoid making requests before we want it
        with patch.object(SimpleClient, 'load_metadata_for_topics'):
            with patch.object(SimpleClient, '_get_conn', side_effect=mock_get_conn):

                client = SimpleClient(hosts='kafka01:9092,kafka02:9092')
                resp = client._send_broker_unaware_request(payloads=['fake request'],
                                                           encoder_fn=MagicMock(),
                                                           decoder_fn=lambda x: x)

                self.assertEqual('valid response', resp)
                mocked_conns[('kafka02', 9092)].recv.assert_called_once_with()
Exemplo n.º 10
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)])
        except:
            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 11
0
    def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn):

        mock_conn(conn)

        brokers = [BrokerMetadata(0, "broker_1", 4567, None), BrokerMetadata(1, "broker_2", 5678, None)]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [(UNKNOWN_TOPIC_OR_PARTITION, "topic_doesnt_exist", [])]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=["broker_1:4567"])

        requests = [ProduceRequestPayload("topic_doesnt_exist", 0, [create_message("a"), create_message("b")])]

        with self.assertRaises(FailedPayloadsError):
            client.send_produce_request(requests)
Exemplo n.º 12
0
    def test_ensure_topic_exists(self, decode_metadata_response, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_LEADER, 'topic_still_creating', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []),
            (NO_ERROR, 'topic_noleaders', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.ensure_topic_exists('topic_doesnt_exist', timeout=1)

        with self.assertRaises(KafkaTimeoutError):
            client.ensure_topic_exists('topic_still_creating', timeout=1)

        # This should not raise
        client.ensure_topic_exists('topic_noleaders', timeout=1)
Exemplo n.º 13
0
    def test_get_leader_exceptions_when_noleader(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])
        self.assertDictEqual(
            {
                TopicPartition('topic_noleader', 0): None,
                TopicPartition('topic_noleader', 1): None
            },
            client.topics_to_brokers)

        # No leader partitions -- raise LeaderNotAvailableError
        with self.assertRaises(LeaderNotAvailableError):
            self.assertIsNone(client._get_leader_for_partition('topic_noleader', 0))
        with self.assertRaises(LeaderNotAvailableError):
            self.assertIsNone(client._get_leader_for_partition('topic_noleader', 1))

        # Unknown partitions -- raise UnknownTopicOrPartitionError
        with self.assertRaises(UnknownTopicOrPartitionError):
            self.assertIsNone(client._get_leader_for_partition('topic_noleader', 2))

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1]),
                (NO_ERROR, 1, 1, [1, 0], [1, 0])
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)
        self.assertEqual(brokers[0], client._get_leader_for_partition('topic_noleader', 0))
        self.assertEqual(brokers[1], client._get_leader_for_partition('topic_noleader', 1))
Exemplo n.º 14
0
    def test_get_leader_for_unassigned_partitions(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_LEADER, 'topic_no_partitions', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        self.assertDictEqual({}, client.topics_to_brokers)

        with self.assertRaises(LeaderNotAvailableError):
            client._get_leader_for_partition('topic_no_partitions', 0)

        with self.assertRaises(UnknownTopicOrPartitionError):
            client._get_leader_for_partition('topic_unknown', 0)
Exemplo n.º 15
0
def topic_offsets(kafka_brokers, topic):
    client = SimpleClient(insure_is_array(kafka_brokers))
    topic_partitions = client.topic_partitions
    if topic not in topic_partitions:
        raise KafkaException("topic {} doesn't exists".format(topic))
    partitions = topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]
    offsets_responses = client.send_offset_request(offset_requests)
    client.close()
    partitions_and_offsets = {}
    for offset in offsets_responses:
        if offset.topic == topic:
            topic_offset = 0
            topic_partition = TopicPartition(topic=offset.topic,
                                             partition=offset.partition)
            if offset.offsets[0]:
                topic_offset = offset.offsets[0]
            partitions_and_offsets[topic_partition] = topic_offset

    return partitions_and_offsets
Exemplo n.º 16
0
    def test_send_broker_unaware_request_fail(self, load_metadata, conn):
        mocked_conns = {
            ('kafka01', 9092): MagicMock(),
            ('kafka02', 9092): MagicMock()
        }
        for val in mocked_conns.values():
            mock_conn(val, success=False)

        def mock_get_conn(host, port, afi):
            return mocked_conns[(host, port)]
        conn.side_effect = mock_get_conn

        client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092'])

        req = KafkaProtocol.encode_metadata_request()
        with self.assertRaises(KafkaUnavailableError):
            client._send_broker_unaware_request(payloads=['fake request'],
                                                encoder_fn=MagicMock(return_value='fake encoded message'),
                                                decoder_fn=lambda x: x)

        for key, conn in six.iteritems(mocked_conns):
            conn.send.assert_called_with('fake encoded message')
Exemplo n.º 17
0
    def test_send_broker_unaware_request_fail(self, load_metadata, conn):
        mocked_conns = {
            ('kafka01', 9092): MagicMock(),
            ('kafka02', 9092): MagicMock()
        }
        for val in mocked_conns.values():
            mock_conn(val, success=False)

        def mock_get_conn(host, port, afi):
            return mocked_conns[(host, port)]
        conn.side_effect = mock_get_conn

        client = SimpleClient(hosts=['kafka01:9092', 'kafka02:9092'])

        req = KafkaProtocol.encode_metadata_request()
        with self.assertRaises(KafkaUnavailableError):
            client._send_broker_unaware_request(payloads=['fake request'],
                                                encoder_fn=MagicMock(return_value='fake encoded message'),
                                                decoder_fn=lambda x: x)

        for key, conn in six.iteritems(mocked_conns):
            conn.send.assert_called_with('fake encoded message')
Exemplo n.º 18
0
    def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [ProduceRequestPayload(
            "topic_doesnt_exist", 0,
            [create_message("a"), create_message("b")])]

        with self.assertRaises(FailedPayloadsError):
            client.send_produce_request(requests)
Exemplo n.º 19
0
    def test_send_produce_request_raises_when_topic_unknown(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_doesnt_exist', []),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [ProduceRequestPayload(
            "topic_doesnt_exist", 0,
            [create_message("a"), create_message("b")])]

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.send_produce_request(requests)
Exemplo n.º 20
0
 def create_topic(self):
     client = SimpleClient(self.BOOTSTRAP_SERVER)
     broker_topics = client.topic_partitions
     admin_client = KafkaAdminClient(bootstrap_servers=self.BOOTSTRAP_SERVER, client_id='test')
     if self.TOPIC_NAME and self.TOPIC_NAME not in broker_topics:
         topic_list = [NewTopic(name=self.TOPIC_NAME, num_partitions=self.NUM_PARTITIONS,
                                replication_factor=self.REPLICATION_FACTOR)]
         try:
             admin_client.create_topics(new_topics=topic_list, validate_only=False)
         except Exception:
             raise Exception('Unable to create topic')
     elif self.TOPIC_NAME and self.TOPIC_NAME in broker_topics:
         print('Topic already created')
Exemplo n.º 21
0
    def test_load_metadata(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_1', [
                (NO_ERROR, 0, 1, [1, 2], [1, 2])
            ]),
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
            (NO_LEADER, 'topic_no_partitions', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []),
            (NO_ERROR, 'topic_3', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1]),
                (NO_ERROR, 1, 1, [1, 0], [1, 0]),
                (NO_ERROR, 2, 0, [0, 1], [0, 1])
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        # client loads metadata at init
        client = SimpleClient(hosts=['broker_1:4567'])
        self.assertDictEqual({
            TopicPartition('topic_1', 0): brokers[1],
            TopicPartition('topic_noleader', 0): None,
            TopicPartition('topic_noleader', 1): None,
            TopicPartition('topic_3', 0): brokers[0],
            TopicPartition('topic_3', 1): brokers[1],
            TopicPartition('topic_3', 2): brokers[0]},
            client.topics_to_brokers)

        # if we ask for metadata explicitly, it should raise errors
        with self.assertRaises(LeaderNotAvailableError):
            client.load_metadata_for_topics('topic_no_partitions')

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.load_metadata_for_topics('topic_unknown')

        # This should not raise
        client.load_metadata_for_topics('topic_no_leader')
Exemplo n.º 22
0
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_kwargs = {
            'zk_chroot': zk_chroot,
            'replicas': replicas,
            'partitions': partitions
        }
        self.brokers = [
            KafkaFixture.instance(i, self.zk, **kk_kwargs)
            for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()
Exemplo n.º 23
0
    def test_send_broker_unaware_request(self):
        mocked_conns = {("kafka01", 9092): MagicMock(), ("kafka02", 9092): MagicMock(), ("kafka03", 9092): MagicMock()}
        # inject BrokerConnection side effects
        mock_conn(mocked_conns[("kafka01", 9092)], success=False)
        mock_conn(mocked_conns[("kafka03", 9092)], success=False)
        future = Future()
        mocked_conns[("kafka02", 9092)].send.return_value = future
        mocked_conns[("kafka02", 9092)].recv.side_effect = lambda: future.success("valid response")

        def mock_get_conn(host, port, afi):
            return mocked_conns[(host, port)]

        # patch to avoid making requests before we want it
        with patch.object(SimpleClient, "load_metadata_for_topics"):
            with patch.object(SimpleClient, "_get_conn", side_effect=mock_get_conn):

                client = SimpleClient(hosts="kafka01:9092,kafka02:9092")
                resp = client._send_broker_unaware_request(
                    payloads=["fake request"], encoder_fn=MagicMock(), decoder_fn=lambda x: x
                )

                self.assertEqual("valid response", resp)
                mocked_conns[("kafka02", 9092)].recv.assert_called_once_with()
Exemplo n.º 24
0
        def replicationproducer(self):
            """Obtain a ``Producer`` instance to write to the replication log."""
            if not getattr(self, '_replicationproducer', None):
                client = SimpleClient(hosts,
                                      client_id=clientid,
                                      timeout=timeout)
                self._replicationproducer = vcsrproducer.Producer(
                    client,
                    topic,
                    batch_send=False,
                    req_acks=reqacks,
                    ack_timeout=acktimeout)

            return self._replicationproducer
Exemplo n.º 25
0
    def _get_highwater_offsets(self, kafka_hosts_ports):
        """
        Fetch highwater offsets for each topic/partition from Kafka cluster.

        Do this for all partitions in the cluster because even if it has no
        consumers, we may want to measure whether producers are successfully
        producing. No need to limit this for performance because fetching broker
        offsets from Kafka is a relatively inexpensive operation.
        """
        kafka_conn = SimpleClient(kafka_hosts_ports, timeout=self.kafka_timeout)
        try:
            broker_topics_partitions = kafka_conn.topics_to_brokers.keys()
            # batch a bunch of requests into a single network call
            offsets_request = [OffsetRequestPayload(topic, partition, -1, 1)
                for topic, partition in broker_topics_partitions]
            offsets_response = kafka_conn.send_offset_request(offsets_request)
            highwater_offsets = {(x.topic, x.partition): x.offsets[0] for x in offsets_response}
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')
        return highwater_offsets
Exemplo n.º 26
0
 def __init__(self,
              queue,
              kf_ip_port='localhost',
              zk_ip_port='localhost',
              sleep_time=10):
     # 连接 kafka
     self.kafka_hosts = kf_ip_port
     self.broker = SimpleClient(hosts=self.kafka_hosts)
     # 连接zookeeper
     self.zookeepers_hosts = zk_ip_port
     self.zk = KazooClient(hosts=self.zookeepers_hosts, read_only=True)
     # 数据存放
     self.queue = queue
     # 时间间隔
     self.sleep_time = sleep_time - 1
Exemplo n.º 27
0
    def test_send_produce_request_raises_when_noleader(self, protocol, conn):
        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [ProduceRequestPayload(
            "topic_noleader", 0,
            [create_message("a"), create_message("b")])]

        with self.assertRaises(FailedPayloadsError):
            client.send_produce_request(requests)
Exemplo n.º 28
0
    def test_get_leader_for_partitions_reloads_metadata(self, protocol, conn):
        "Get leader for partitions reload metadata if it is not available"

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_LEADER, 'topic_no_partitions', [])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        # topic metadata is loaded but empty
        self.assertDictEqual({}, client.topics_to_brokers)

        topics = [
            (NO_ERROR, 'topic_one_partition', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1])
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        # calling _get_leader_for_partition (from any broker aware request)
        # will try loading metadata again for the same topic
        leader = client._get_leader_for_partition('topic_one_partition', 0)

        self.assertEqual(brokers[0], leader)
        self.assertDictEqual({
            TopicPartition('topic_one_partition', 0): brokers[0]},
            client.topics_to_brokers)
Exemplo n.º 29
0
    def assert_message_count(self,
                             topic,
                             check_count,
                             timeout=10,
                             partitions=None,
                             at_least=False):
        hosts = ','.join(
            ['%s:%d' % (broker.host, broker.port) for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client,
                                  None,
                                  topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Exemplo n.º 30
0
    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}
Exemplo n.º 31
0
    def test_send_produce_request_raises_when_noleader(self, protocol, conn):
        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567),
            BrokerMetadata(1, 'broker_2', 5678)
        ]

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse(brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])

        requests = [ProduceRequestPayload(
            "topic_noleader", 0,
            [create_message("a"), create_message("b")])]

        with self.assertRaises(LeaderNotAvailableError):
            client.send_produce_request(requests)
Exemplo n.º 32
0
def send_to_kafka(message):
    producer = get_producer()
    try:
        producer.send(settings.KAFKA_TOPIC, message)
    except:
        client = SimpleClient(hosts=settings.KAFKA_SERVERS)
        client.ensure_topic_exists(settings.KAFKA_TOPIC)
        client.close()
        producer.send(settings.KAFKA_TOPIC, message)
    producer.close(10)
    def getTopics(self, once_sleep=60):
        '''
        获取需要消费的topic,可通过数据库表控制
        :param once_sleep: topics更新频率
        :return:
        '''
        while True:
            if self.debug:
                debug_topic = self.configures.get("debugconf", "debug_topic")
                self.topics = [(item, "debug")
                               for item in debug_topic.split(",")]
            else:
                kafka_topics = set()
                saas_appkey = set()
                appkey_logpath = {}
                try:
                    from kafka import SimpleClient
                    hostname = self.configures.get("kafka", "hostname")
                    client = SimpleClient(hosts=hostname)
                    for topic in client.topics:
                        kafka_topics.add(topic)
                    client.close()
                    log.info("get kafka topics: %s" %
                             json.dumps(list(kafka_topics)))
                except:
                    logging.error(sys.exc_info())
                    continue

                try:
                    client = MysqlClient("saas_server")
                    topics = client.getTopics(group_id=self.group_id)
                    for topic, logpath in topics:
                        saas_appkey.add(topic)
                        appkey_logpath.setdefault(topic, set()).add(logpath)
                    client.closeMysql()
                    log.info("get mysql appkeys: %s" %
                             json.dumps(list(saas_appkey)))
                except:
                    logging.error(sys.exc_info())
                    continue
                self.topics = [(topic, logpath)
                               for topic in list(kafka_topics & saas_appkey)
                               for logpath in appkey_logpath[topic]]
            log.info("current topics: %s" % json.dumps(self.topics))
            time.sleep(once_sleep)
    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_args = [self.zk.host, self.zk.port, zk_chroot, replicas, partitions]
        self.brokers = [KafkaFixture.instance(i, *kk_args) for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()
Exemplo n.º 35
0
def save_to_kafka(taobao_total_data):
    # 保存数据到kafka
    num = 1
    while num < 3:
        try:
            kafka_client = SimpleClient('{}:{}'.format(settings.KAFKA_IP,
                                                       settings.KAFKA_PORT))
            producer = SimpleProducer(kafka_client)
            taobao_total_data = json.dumps(taobao_total_data)
            producer.send_messages('{}'.format(settings.KAFKA_TOPIC),
                                   taobao_total_data.encode("utf8"))
        except Exception as e:
            num += 1
            logger.error(e)
        else:
            break
Exemplo n.º 36
0
    def test_get_leader_exceptions_when_noleader(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](
            resp0_brokers, topics)

        client = SimpleClient(hosts=['broker_1:4567'])
        self.assertDictEqual(
            {
                TopicPartition('topic_noleader', 0): None,
                TopicPartition('topic_noleader', 1): None
            }, client.topics_to_brokers)

        # No leader partitions -- raise LeaderNotAvailableError
        with self.assertRaises(LeaderNotAvailableError):
            self.assertIsNone(
                client._get_leader_for_partition('topic_noleader', 0))
        with self.assertRaises(LeaderNotAvailableError):
            self.assertIsNone(
                client._get_leader_for_partition('topic_noleader', 1))

        # Unknown partitions -- raise UnknownTopicOrPartitionError
        with self.assertRaises(UnknownTopicOrPartitionError):
            self.assertIsNone(
                client._get_leader_for_partition('topic_noleader', 2))

        topics = [
            (NO_ERROR, 'topic_noleader', [(NO_ERROR, 0, 0, [0, 1], [0, 1]),
                                          (NO_ERROR, 1, 1, [1, 0], [1, 0])]),
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](
            resp0_brokers, topics)
        self.assertEqual(brokers[0],
                         client._get_leader_for_partition('topic_noleader', 0))
        self.assertEqual(brokers[1],
                         client._get_leader_for_partition('topic_noleader', 1))
Exemplo n.º 37
0
def wait_for_kafka(hostport, timeout=60):
    """Wait for Kafka to start responding on the specified host:port string."""
    # Delay import to facilitate module use in limited virtualenvs.
    from kafka import SimpleClient

    start = time.time()
    while True:
        try:
            SimpleClient(hostport, client_id=b'dummy', timeout=1)
            return
        except Exception:
            pass

        if time.time() - start > timeout:
            raise Exception('Timeout reached waiting for Kafka')

        time.sleep(0.1)
Exemplo n.º 38
0
    def test_producer_sync_fail_on_error(self):
        error = FailedPayloadsError('failure')
        with patch.object(SimpleClient, 'load_metadata_for_topics'):
            with patch.object(SimpleClient, 'ensure_topic_exists'):
                with patch.object(SimpleClient, 'get_partition_ids_for_topic', return_value=[0, 1]):
                    with patch.object(SimpleClient, '_send_broker_aware_request', return_value = [error]):

                        client = SimpleClient(MagicMock())
                        producer = SimpleProducer(client, async_send=False, sync_fail_on_error=False)

                        # This should not raise
                        (response,) = producer.send_messages('foobar', b'test message')
                        self.assertEqual(response, error)

                        producer = SimpleProducer(client, async_send=False, sync_fail_on_error=True)
                        with self.assertRaises(FailedPayloadsError):
                            producer.send_messages('foobar', b'test message')
Exemplo n.º 39
0
    def test_load_metadata(self, protocol, conn):

        mock_conn(conn)

        brokers = [
            BrokerMetadata(0, 'broker_1', 4567, None),
            BrokerMetadata(1, 'broker_2', 5678, None)
        ]
        resp0_brokers = list(map(itemgetter(0, 1, 2), brokers))

        topics = [
            (NO_ERROR, 'topic_1', [
                (NO_ERROR, 0, 1, [1, 2], [1, 2])
            ]),
            (NO_ERROR, 'topic_noleader', [
                (NO_LEADER, 0, -1, [], []),
                (NO_LEADER, 1, -1, [], []),
            ]),
            (NO_LEADER, 'topic_no_partitions', []),
            (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []),
            (NO_ERROR, 'topic_3', [
                (NO_ERROR, 0, 0, [0, 1], [0, 1]),
                (NO_ERROR, 1, 1, [1, 0], [1, 0]),
                (NO_ERROR, 2, 0, [0, 1], [0, 1])
            ])
        ]
        protocol.decode_metadata_response.return_value = MetadataResponse[0](resp0_brokers, topics)

        # client loads metadata at init
        client = SimpleClient(hosts=['broker_1:4567'])
        self.assertDictEqual({
            TopicPartition('topic_1', 0): brokers[1],
            TopicPartition('topic_noleader', 0): None,
            TopicPartition('topic_noleader', 1): None,
            TopicPartition('topic_3', 0): brokers[0],
            TopicPartition('topic_3', 1): brokers[1],
            TopicPartition('topic_3', 2): brokers[0]},
            client.topics_to_brokers)

        # if we ask for metadata explicitly, it should raise errors
        with self.assertRaises(LeaderNotAvailableError):
            client.load_metadata_for_topics('topic_no_partitions')

        with self.assertRaises(UnknownTopicOrPartitionError):
            client.load_metadata_for_topics('topic_unknown')

        # This should not raise
        client.load_metadata_for_topics('topic_no_leader')
Exemplo n.º 40
0
    def __init__(self):
        config = ConfigReader("config.json")

        auth = OAuthHandler(config.get_key("CONSUMER_KEY"),
                            config.get_key("CONSUMER_SECRET"))
        auth.set_access_token(config.get_key("ACCESS_TOKEN_KEY"),
                              config.get_key("ACCESS_TOKEN_SECRET"))
        self.api = tweepy.API(auth_handler=auth,
                              wait_on_rate_limit=True,
                              wait_on_rate_limit_notify=True)
        self.rate_limits = self.tweep_rate_limits_to_dictionary(
            self.api.rate_limit_status())
        self.scrapers = []
        kafka_url = "{:s}:{:s}".format(config.get_key("KAFKA_HOST"),
                                       config.get_key("KAFKA_PORT"))
        try:
            kafka = SimpleClient(kafka_url, timeout=60)
        except KafkaUnavailableError as e:
            logging.error("Could not connect to Kafka2")
            raise e

        self.producer = SimpleProducer(kafka)
Exemplo n.º 41
0
    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')


        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError('Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}
Exemplo n.º 42
0
def refresh_graph_data1():
    global list_result1, username1
    user_timeline1 = twitter.get_user_timeline(screen_name=username1, count=20)
    result1 = sentiment_analysis(user_timeline1)
    list_result1 = [result1.pos1, result1.neut1, result1.neg1]
    for tweet in user_timeline1:
        try:
            client = SimpleClient("localhost:9092")
            producer = SimpleProducer(client,
                                      async=False,
                                      batch_send_every_n=10,
                                      batch_send_every_t=2)
            print(tweet['text'])
            msg = tweet['text'].encode('utf-8')
            producer.send_messages(topic_name, msg)
            # producer.send('test', key=bytes('tweet', encoding='utf-8'), value=bytes(tweet['text'],encoding='utf-8'))
            # producer.flush()
            print('publish success')
        except Exception as ex:
            print('Exception in publishing message')
            print(str(ex))
    time.sleep(30)
    # list_result = [10,10,10]
    return jsonify(sResult1=list_result1)
Exemplo n.º 43
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')


        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError('Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([OffsetRequestPayload(topic,
                                                                             partition, -1, 1)])
        except Exception:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 44
0
class FeedManager(FlaskView):
    dockerClient = docker.from_env()
    mongoClient = pymongo.MongoClient(**mongo_params)
    forms: Database = mongoClient[os.getenv("FORM_DATABASE", "forms")]
    feeds: Database = mongoClient[os.getenv("PARAMETER_DATABASE", "params")]
    parameter_stats: Database = mongoClient[os.getenv("PARAM_STATS_DATABASE", "params_stats")]
    parameterSchemas = forms['parameterSchemas']
    admin = KafkaAdminClient(**kafka_params)
    kafkaClient = SimpleClient(hosts=kafka_params.get("bootstrap_servers")[0])
    feed_params: Database = mongoClient[os.getenv("PARAMETER_DATABASE", "params")]
    feed_ports = {name.get("name"): 8000+i for (i, name) in enumerate(feeds["feed"].find({}))}

    def getParameter(self, collection, name):
        params = self.feed_params[collection].find_one(filter={"name": name})
        if params is None:
            return Response(status=404)
        params.pop("_id")
        return Response(json.dumps(params), mimetype="application/json")

    def getParameterStatus(self, feedName):
        c = self.parameterSchemas.find({})
        payload = []
        for parameterName in [param.get("name") for param in c]:
            errors = self.parameter_stats[parameterName].count({"name": feedName})
            status = {
                "errors": errors,
                "name": parameterName
            }
            payload.append(status)
        return Response(json.dumps(payload), mimetype='application/json')

    def getParameterTypes(self):
        c = self.parameterSchemas.find({})

        data = [param.get("name") for param in c]
        return Response(json.dumps(data), mimetype="application/json")

    def getParameterSchema(self, parameterName):
        parameter = self.parameterSchemas.find_one({"name": parameterName})
        val = parameter['value']
        return Response(json.dumps(val), mimetype="application/json")

    @route("/setParameter/<string:collection>/<string:name>", methods=['PUT'])
    def setParameter(self, collection, name=None):
        value = request.get_json()
        param: dict = self.feed_params[collection].find_one({"name": name})
        value.update({"name": name})
        old = param
        if param is not None:
            self.feed_params[collection].replace_one(filter={"name": name}, replacement=value)
            old["name"] = "{}_{}".format(name, datetime.now().strftime("%d%m%Y"))
            old.pop("_id")
            self.feed_params[collection].insert(old)
        else:
            self.feed_params[collection].insert_one(value)
        return Response("ok", status=200)

    def getFeeds(self):
        c = self.feeds["feed"].find({})
        data = [param.get("name") for param in c]
        return Response(json.dumps(data), mimetype="application/json")

    def newFeed(self, feedName):
        port = len(self.feed_ports)
        self.feed_ports.update({feedName: 8000 + port})
        c = self.feeds["feed"].find({"name": feedName})
        if any(val == feedName for val in c):
            pass
        else:
            self.feeds["feed"].insert_one({"name": feedName})
        return "ok"

    def startFeed(self, feedName):
        logging.info("starting feed {}".format(feedName))
        parameterSets = self.feeds.list_collection_names(include_system_collections=False)
        notSet = []
        for set in parameterSets:
            if self.feeds[set].find_one({"name": feedName}) is None:
                notSet.append(set)
        if len(notSet):
            payload = {"notSet": notSet, "status": False}
            return Response(json.dumps(payload), mimetype='application/json')
        else:
            try:
                queues_to_make = []
                queues_to_make.append(
                    NewTopic(name="{}-results".format(feedName), num_partitions=1, replication_factor=1))
                queues_to_make.append(
                    NewTopic(name="{}-items".format(feedName), num_partitions=1, replication_factor=1))
                self.admin.create_topics(queues_to_make)
            except TopicAlreadyExistsError:
                pass
            try:
                feed = self.dockerClient.containers.get(feedName)
                feed.start()
            except APIError as e:
                with open("./docker.env") as file:
                    string = file.read()
                    env_vars = list(filter(lambda item: item is not "", string.split("\n")))

                image = self.dockerClient.images.get(feed_params['image'])
                feed: Container = self.dockerClient.containers.run(image,
                                                                   environment=["NAME={}".format(feedName),
                                                                                'BROWSER_PORT={}'.format(self.feed_ports.get(feedName))] + env_vars,
                                                                   detach=True,
                                                                   name=feedName,
                                                                   restart_policy={"Name": 'always'},
                                                                   network=os.getenv("NETWORK", "car_default"))
            return Response(json.dumps({"status": True}), status=200)

    def stopFeed(self, feedName):
        feed = self.dockerClient.containers.get(feedName)
        feed.stop()
        feed.remove()
        self.admin.delete_topics(["{}-{}".format(feedName, val) for val in ("items", "results")])
        return "ok"

    def feedStatus(self, feedName):
        try:
            feed = self.dockerClient.containers.get(feedName)
            if feed.status == 'running':
                status = True
            else:
                status = False
        except APIError as e:
            status = False
        return Response(json.dumps({"status": status}), mimetype='application/json')
Exemplo n.º 45
0
 def __init__(self, api=None):
     # create kafka producer and bind it to listener
     super(MyStreamListener, self).__init__(api)
     client = SimpleClient('localhost:9092')
     producer = SimpleProducer(client)
     self.kafProducer = producer
Exemplo n.º 46
0
# -*- coding: utf-8 -*-
from kafka import SimpleClient, SimpleProducer, KafkaConsumer

kafka = SimpleClient(
    "192.168.6.51  192.168.6.52   192.168.6.53  192.168.6.54  192.168.6.55")
producer = SimpleProducer(kafka)

kafka.close()
Exemplo n.º 47
0
        r.data_source.data['x'] = range(len(list(df['value'])))[-WINDOW_SIZE:]
        dots.data_source.data['y'] = list(df['value'])[-WINDOW_SIZE:]
        dots.data_source.data['x'] = range(len(list(
            df['value'])))[-WINDOW_SIZE:]
    else:
        r.data_source.data['y'] = list(df['value'])
        r.data_source.data['x'] = range(len(list(df['value'])))
        dots.data_source.data['y'] = list(df['value'])
        dots.data_source.data['x'] = range(len(list(df['value'])))


# A Kafka consumer listens for messages on the 'wave' topic and plots
# up-to-date results in a Bokeh plot
if __name__ == '__main__':
    # Initiate connection to Kafka (consumer) and Redis
    client = SimpleClient('localhost:9092')
    consumer = SimpleConsumer(client, None, 'wave')

    # push this plotting session to Bokeh page
    session = push_session(curdoc())

    # dataframe that is updated with all new data
    df = pd.DataFrame(columns=['time', 'value'])

    # data vars
    time, value = [0], [0]

    # figure that is updated with new data
    plot = figure()
    r = plot.line(time, value)
    dots = plot.circle(time, value, size=1, color='navy')
Exemplo n.º 48
0
def simple_client(kafka_broker):
    return SimpleClient(get_connect_str(kafka_broker))
Exemplo n.º 49
0
    def spoorer(self):  #连接kafka,获取topics
        try:
            kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout)
            # print kafka_client.topics
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            kafka_topics = kafka_client.topics
        finally:
            kafka_client.close()

        #连接zk,获取当前消费进度current offset
        try:
            zookeeper_client = KazooClient(hosts=self.zookeeper_hosts, read_only=True, timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        try:
            groups = map(str,zookeeper_client.get_children(self.zookeeper_url + 'consumers'))
        except NoNodeError as e:
            print "Error, invalid zookeeper url."
            zookeeper_client.stop()
            sys.exit(2)
        else:
            for group in groups:
                if 'offsets' not in zookeeper_client.get_children(self.zookeeper_url + 'consumers/%s' % group):continue
                topic_path = 'consumers/%s/offsets' % (group)
                topics = map(str,zookeeper_client.get_children(self.zookeeper_url + topic_path))
                if len(topics) == 0: continue
                for topic in topics:
                    # print topic
                    # print self.white_topic_group.keys()
                    if topic not in self.white_topic_group.keys():
                        continue
                    # elif group not in self.white_topic_group[topic].replace(' ','').split(','):
                    #     continue
                    partition_path = 'consumers/%s/offsets/%s' % (group,topic)
                    partitions = map(int,zookeeper_client.get_children(self.zookeeper_url + partition_path))

                    for partition in partitions:
                        base_path = 'consumers/%s/%s/%s/%s' % (group, '%s', topic, partition)
                        owner_path, offset_path = base_path % 'owners', base_path % 'offsets'
                        offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0]

                        try:
                            owner = zookeeper_client.get(self.zookeeper_url + owner_path)[0]
                        except NoNodeError as e:
                            owner = 'null'
                        #消费进度放在字典metric中
                        metric = {'datetime':time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'topic':topic, 'group':group, 'partition':int(partition), 'logsize':None, 'offset':int(offset), 'lag':None, 'owner':owner}
                        self.result.append(metric)
        finally:
            zookeeper_client.stop()
        #获取每个分片的logsize
        try:
            client = SimpleClient(self.kafka_hosts)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            for kafka_topic in kafka_topics:
                self.kafka_logsize[kafka_topic] = {}
                partitions = client.topic_partitions[kafka_topic]
                offset_requests = [OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys()]
                offsets_responses = client.send_offset_request(offset_requests)
                for r in offsets_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]

            #logsize减去current offset等于lag
        f1 = open(self.log_file,'w')
        # f2 = open(self.log_day_file,'a')
        # print self.result
        for metric in self.result:
            logsize = self.kafka_logsize[metric['topic']][metric['partition']]
            metric['logsize'] = int(logsize)
            metric['lag'] = int(logsize) - int(metric['offset'])
            f1.write(json.dumps(metric,sort_keys=True) + '\n')
            f1.flush()
            # f2.write(json.dumps(metric,sort_keys=True) + '\n')
            # f2.flush()
        # finally:
        f1.close()
        client.close()
Exemplo n.º 50
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(
                    self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')

        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError(
                            'Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request(
                [OffsetRequestPayload(topic, partition, -1, 1)])
        except Exception:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
class TestFailover(KafkaIntegrationTestCase):
    create_client = False

    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_args = [self.zk.host, self.zk.port]
        kk_kwargs = {'zk_chroot': zk_chroot, 'replicas': replicas,
                     'partitions': partitions}
        self.brokers = [KafkaFixture.instance(i, *kk_args, **kk_kwargs)
                        for i in range(replicas)]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()

    def tearDown(self):
        super(TestFailover, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        self.client.close()
        for broker in self.brokers:
            broker.close()
        self.zk.close()

    def test_switch_leader(self):
        topic = self.topic
        partition = 0

        # Testing the base Producer class here so that we can easily send
        # messages to a specific partition, kill the leader for that partition
        # and check that after another broker takes leadership the producer
        # is able to resume sending messages

        # require that the server commit messages to all in-sync replicas
        # so that failover doesn't lose any messages on server-side
        # and we can assert that server-side message count equals client-side
        producer = Producer(self.client, async=False,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)

        # Send 100 random messages to a specific partition
        self._send_random_messages(producer, topic, partition, 100)

        # kill leader for partition
        self._kill_leader(topic, partition)

        # expect failure, but don't wait more than 60 secs to recover
        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                log.debug("attempting to send 'success' message after leader killed")
                producer.send_messages(topic, partition, b'success')
                log.debug("success!")
                recovered = True
            except (FailedPayloadsError, ConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages to new leader
        self._send_random_messages(producer, topic, partition, 100)

        # count number of messages
        # Should be equal to 100 before + 1 recovery + 100 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic, 201, partitions=(partition,),
                                  at_least=True)

    def test_switch_leader_async(self):
        topic = self.topic
        partition = 0

        # Test the base class Producer -- send_messages to a specific partition
        producer = Producer(self.client, async=True,
                            batch_send_every_n=15,
                            batch_send_every_t=3,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
                            async_log_messages_on_error=False)

        # Send 10 random messages
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # kill leader for partition
        self._kill_leader(topic, partition)

        log.debug("attempting to send 'success' message after leader killed")

        # in async mode, this should return immediately
        producer.send_messages(topic, partition, b'success')
        producer.send_messages(topic, partition + 1, b'success')

        # send to new leader
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # Stop the producer and wait for it to shutdown
        producer.stop()
        started = time.time()
        timeout = 60
        while (time.time() - started) < timeout:
            if not producer.thread.is_alive():
                break
            time.sleep(0.1)
        else:
            self.fail('timeout waiting for producer queue to empty')

        # count number of messages
        # Should be equal to 10 before + 1 recovery + 10 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic, 21, partitions=(partition,),
                                  at_least=True)
        self.assert_message_count(topic, 21, partitions=(partition + 1,),
                                  at_least=True)

    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, ConnectionError, RequestTimedOutError,
                    NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

    def test_switch_leader_simple_consumer(self):
        producer = Producer(self.client, async=False)
        consumer = SimpleConsumer(self.client, None, self.topic, partitions=None, auto_commit=False, iter_timeout=10)
        self._send_random_messages(producer, self.topic, 0, 2)
        consumer.get_messages()
        self._kill_leader(self.topic, 0)
        consumer.get_messages()

    def _send_random_messages(self, producer, topic, partition, n):
        for j in range(n):
            msg = 'msg {0}: {1}'.format(j, random_string(10))
            log.debug('_send_random_message %s to %s:%d', msg, topic, partition)
            while True:
                try:
                    producer.send_messages(topic, partition, msg.encode('utf-8'))
                except:
                    log.exception('failure in _send_random_messages - retrying')
                    continue
                else:
                    break

    def _kill_leader(self, topic, partition):
        leader = self.client.topics_to_brokers[TopicPartition(topic, partition)]
        broker = self.brokers[leader.nodeId]
        broker.close()
        return broker

    def assert_message_count(self, topic, check_count, timeout=10,
                             partitions=None, at_least=False):
        hosts = ','.join(['%s:%d' % (broker.host, broker.port)
                          for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client, None, topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True
Exemplo n.º 52
0
def simple_client(kafka_broker):
    connect_str = 'localhost:' + str(kafka_broker.port)
    return SimpleClient(connect_str)
Exemplo n.º 53
0
class TestFailover(KafkaIntegrationTestCase):
    create_client = False

    def setUp(self):
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('integration test requires KAFKA_VERSION')

        zk_chroot = random_string(10)
        replicas = 3
        partitions = 3

        # mini zookeeper, 3 kafka brokers
        self.zk = ZookeeperFixture.instance()
        kk_kwargs = {
            'zk_chroot': zk_chroot,
            'replicas': replicas,
            'partitions': partitions
        }
        self.brokers = [
            KafkaFixture.instance(i, self.zk, **kk_kwargs)
            for i in range(replicas)
        ]

        hosts = ['%s:%d' % (b.host, b.port) for b in self.brokers]
        self.client = SimpleClient(hosts, timeout=2)
        super(TestFailover, self).setUp()

    def tearDown(self):
        super(TestFailover, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        self.client.close()
        for broker in self.brokers:
            broker.close()
        self.zk.close()

    def test_switch_leader(self):
        topic = self.topic
        partition = 0

        # Testing the base Producer class here so that we can easily send
        # messages to a specific partition, kill the leader for that partition
        # and check that after another broker takes leadership the producer
        # is able to resume sending messages

        # require that the server commit messages to all in-sync replicas
        # so that failover doesn't lose any messages on server-side
        # and we can assert that server-side message count equals client-side
        producer = Producer(self.client,
                            async_send=False,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT)

        # Send 100 random messages to a specific partition
        self._send_random_messages(producer, topic, partition, 100)

        # kill leader for partition
        self._kill_leader(topic, partition)

        # expect failure, but don't wait more than 60 secs to recover
        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                log.debug(
                    "attempting to send 'success' message after leader killed")
                producer.send_messages(topic, partition, b'success')
                log.debug("success!")
                recovered = True
            except (FailedPayloadsError, KafkaConnectionError,
                    RequestTimedOutError, NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages to new leader
        self._send_random_messages(producer, topic, partition, 100)

        # count number of messages
        # Should be equal to 100 before + 1 recovery + 100 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic,
                                  201,
                                  partitions=(partition, ),
                                  at_least=True)

    def test_switch_leader_async(self):
        topic = self.topic
        partition = 0

        # Test the base class Producer -- send_messages to a specific partition
        producer = Producer(self.client,
                            async_send=True,
                            batch_send_every_n=15,
                            batch_send_every_t=3,
                            req_acks=Producer.ACK_AFTER_CLUSTER_COMMIT,
                            async_log_messages_on_error=False)

        # Send 10 random messages
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # kill leader for partition
        self._kill_leader(topic, partition)

        log.debug("attempting to send 'success' message after leader killed")

        # in async mode, this should return immediately
        producer.send_messages(topic, partition, b'success')
        producer.send_messages(topic, partition + 1, b'success')

        # send to new leader
        self._send_random_messages(producer, topic, partition, 10)
        self._send_random_messages(producer, topic, partition + 1, 10)

        # Stop the producer and wait for it to shutdown
        producer.stop()
        started = time.time()
        timeout = 60
        while (time.time() - started) < timeout:
            if not producer.thread.is_alive():
                break
            time.sleep(0.1)
        else:
            self.fail('timeout waiting for producer queue to empty')

        # count number of messages
        # Should be equal to 10 before + 1 recovery + 10 after
        # at_least=True because exactly once delivery isn't really a thing
        self.assert_message_count(topic,
                                  21,
                                  partitions=(partition, ),
                                  at_least=True)
        self.assert_message_count(topic,
                                  21,
                                  partitions=(partition + 1, ),
                                  at_least=True)

    def test_switch_leader_keyed_producer(self):
        topic = self.topic

        producer = KeyedProducer(self.client, async_send=False)

        # Send 10 random messages
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

        # kill leader for partition 0
        self._kill_leader(topic, 0)

        recovered = False
        started = time.time()
        timeout = 60
        while not recovered and (time.time() - started) < timeout:
            try:
                key = random_string(3).encode('utf-8')
                msg = random_string(10).encode('utf-8')
                producer.send_messages(topic, key, msg)
                if producer.partitioners[topic].partition(key) == 0:
                    recovered = True
            except (FailedPayloadsError, KafkaConnectionError,
                    RequestTimedOutError, NotLeaderForPartitionError):
                log.debug("caught exception sending message -- will retry")
                continue

        # Verify we successfully sent the message
        self.assertTrue(recovered)

        # send some more messages just to make sure no more exceptions
        for _ in range(10):
            key = random_string(3).encode('utf-8')
            msg = random_string(10).encode('utf-8')
            producer.send_messages(topic, key, msg)

    def test_switch_leader_simple_consumer(self):
        producer = Producer(self.client, async_send=False)
        consumer = SimpleConsumer(self.client,
                                  None,
                                  self.topic,
                                  partitions=None,
                                  auto_commit=False,
                                  iter_timeout=10)
        self._send_random_messages(producer, self.topic, 0, 2)
        consumer.get_messages()
        self._kill_leader(self.topic, 0)
        consumer.get_messages()

    def _send_random_messages(self, producer, topic, partition, n):
        for j in range(n):
            msg = 'msg {0}: {1}'.format(j, random_string(10))
            log.debug('_send_random_message %s to %s:%d', msg, topic,
                      partition)
            while True:
                try:
                    producer.send_messages(topic, partition,
                                           msg.encode('utf-8'))
                except Exception:
                    log.exception(
                        'failure in _send_random_messages - retrying')
                    continue
                else:
                    break

    def _kill_leader(self, topic, partition):
        leader = self.client.topics_to_brokers[TopicPartition(
            topic, partition)]
        broker = self.brokers[leader.nodeId]
        broker.close()
        return broker

    def assert_message_count(self,
                             topic,
                             check_count,
                             timeout=10,
                             partitions=None,
                             at_least=False):
        hosts = ','.join(
            ['%s:%d' % (broker.host, broker.port) for broker in self.brokers])

        client = SimpleClient(hosts, timeout=2)
        consumer = SimpleConsumer(client,
                                  None,
                                  topic,
                                  partitions=partitions,
                                  auto_commit=False,
                                  iter_timeout=timeout)

        started_at = time.time()
        pending = -1
        while pending < check_count and (time.time() - started_at < timeout):
            try:
                pending = consumer.pending(partitions)
            except FailedPayloadsError:
                pass
            time.sleep(0.5)

        consumer.stop()
        client.close()

        if pending < check_count:
            self.fail('Too few pending messages: found %d, expected %d' %
                      (pending, check_count))
        elif pending > check_count and not at_least:
            self.fail('Too many pending messages: found %d, expected %d' %
                      (pending, check_count))
        return True