def _init_consumer(self, timeout=MIN_TIMEOUT, auto_offset_reset='smallest'): """Allow re-initing the consumer if necessary """ config = { 'client_id': self._client_id, 'bootstrap_servers': settings.KAFKA_BROKERS, 'consumer_timeout_ms': timeout, 'auto_offset_reset': auto_offset_reset, 'enable_auto_commit': False, 'api_version': settings.KAFKA_API_VERSION, } self._consumer = KafkaConsumer(**config) topic_partitions = [] for topic in self.topics: for partition in self._consumer.partitions_for_topic(topic): topic_partitions.append(TopicPartition(topic, partition)) self._consumer.assign(self._filter_partitions(topic_partitions)) return self._consumer
def test_with_limited_retries(self): # lets create a queue and add 10 messages for 10 different partitions # to show how retries should work ideally for i in range(10): self.queue.put((TopicPartition("test", i), "msg %i" % i, "key %i" % i)) def send_side_effect(reqs, *args, **kwargs): return [FailedPayloadsError(req) for req in reqs] self.client.send_produce_request.side_effect = send_side_effect self._run_process(3, 3) # the queue should be void at the end of the test self.assertEqual(self.queue.empty(), True) # there should be 16 non-void calls: # 3 initial batches of 3 msgs each + 1 initial batch of 1 msg + # 3 retries of the batches above = (1 + 3 retries) * 4 batches = 16 self.assertEqual(self.client.send_produce_request.call_count, 16)
def __record_change_metric_in_datadog(self, metric, change, processing_time=None, add_case_type_tag=False): if change.metadata is not None: metric_tags = { 'datasource': change.metadata.data_source_name, 'pillow_name': self.get_name(), } if add_case_type_tag: metric_tags['case_type'] = 'NA' if settings.ENTERPRISE_MODE and change.metadata.document_type == 'CommCareCase': metric_tags['case_type'] = change.metadata.document_subtype metrics_counter(metric, tags=metric_tags) change_lag = (datetime.utcnow() - change.metadata.publish_timestamp).total_seconds() metrics_gauge( 'commcare.change_feed.change_lag', change_lag, tags={ 'pillow_name': self.get_name(), 'topic': _topic_for_ddog( TopicPartition(change.topic, change.partition) if change.partition is not None else change.topic), }) if processing_time: tags = {'pillow_name': self.get_name()} metrics_counter('commcare.change_feed.processing_time.total', processing_time, tags=tags) metrics_counter('commcare.change_feed.processing_time.count', tags=tags)
def _create_topic(self): self._logger.info("Creating topic: {0} with {1} parititions".format( self._topic, self._num_of_partitions)) # Create partitions for the workers. self._partitions = [ TopicPartition(self._topic, p) for p in range(int(self._num_of_partitions)) ] # create partitioner self._partitioner = RoundRobinPartitioner(self._partitions) # get script path zk_conf = "{0}:{1}".format(self._zk_server, self._zk_port) create_topic_cmd = "{0}/kafka_topic.sh create {1} {2} {3}".format( os.path.dirname(os.path.abspath(__file__)), self._topic, zk_conf, self._num_of_partitions) # execute create topic cmd Util.execute_cmd(create_topic_cmd, self._logger)
def test_async_producer_not_leader(self): for i in range(10): self.queue.put((TopicPartition("test", i), "msg %i", "key %i")) # Mock offsets counter for closure offsets = collections.defaultdict( lambda: collections.defaultdict(lambda: 0)) self.client.is_first_time = True def send_side_effect(reqs, *args, **kwargs): if self.client.is_first_time: self.client.is_first_time = False return [ ProduceResponsePayload(req.topic, req.partition, NotLeaderForPartitionError.errno, -1) for req in reqs ] responses = [] for req in reqs: offset = offsets[req.topic][req.partition] offsets[req.topic][req.partition] += len(req.messages) responses.append( ProduceResponsePayload(req.topic, req.partition, 0, offset)) return responses self.client.send_produce_request.side_effect = send_side_effect self._run_process(2) # the queue should be void at the end of the test self.assertEqual(self.queue.empty(), True) # there should be 5 non-void calls: 1st failed batch of 3 msgs # + 3 batches of 3 msgs each + 1 batch of 1 msg = 1 + 3 + 1 = 5 self.assertEqual(self.client.send_produce_request.call_count, 5)
def _proc_offsets_fetch_request(self, node_id, request): response = yield from self._send_req(node_id, request) offsets = {} for topic, partitions in response.topics: for partition, offset, metadata, error_code in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) if error_type is not Errors.NoError: error = error_type() log.debug("Error fetching offset for %s: %s", tp, error) if error_type is Errors.GroupLoadInProgressError: # just retry raise error elif error_type is Errors.NotCoordinatorForGroupError: # re-discover the coordinator and retry self.coordinator_dead() raise error elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError): # need to re-join group self._subscription.mark_for_reassignment() raise error elif error_type is Errors.UnknownTopicOrPartitionError: log.warning("OffsetFetchRequest -- unknown topic %s", topic) continue else: log.error("Unknown error fetching offsets for %s: %s", tp, error) raise error elif offset >= 0: # record the position with the offset # (-1 indicates no committed offset to fetch) offsets[tp] = OffsetAndMetadata(offset, metadata) else: log.debug("No committed offset for partition %s", tp) return offsets
def __record_change_metric_in_datadog(self, metric, change, processor=None, processing_time=None, add_case_type_tag=False): if change.metadata is not None: common_tags = [ 'datasource:{}'.format(change.metadata.data_source_name), 'is_deletion:{}'.format(change.metadata.is_deletion), 'pillow_name:{}'.format(self.get_name()), 'processor:{}'.format(processor.__class__.__name__ if processor else "all_processors"), ] metric_tags = list(common_tags) if add_case_type_tag and settings.ENTERPRISE_MODE and change.metadata.document_type == 'CommCareCase': metric_tags.append('case_type:{}'.format( change.metadata.document_subtype)) datadog_counter(metric, tags=metric_tags) change_lag = (datetime.utcnow() - change.metadata.publish_timestamp).total_seconds() datadog_gauge( 'commcare.change_feed.change_lag', change_lag, tags=[ 'pillow_name:{}'.format(self.get_name()), _topic_for_ddog( TopicPartition(change.topic, change.partition) if change.partition is not None else change.topic), ]) if processing_time: datadog_histogram('commcare.change_feed.processing_time', processing_time, tags=common_tags)
def assign(cls, cluster, member_metadata): all_topics = set() for metadata in six.itervalues(member_metadata): all_topics.update(metadata.subscription) all_topic_partitions = [] for topic in all_topics: partitions = cluster.partitions_for_topic(topic) if partitions is None: log.warning('No partition metadata for topic %s', topic) continue for partition in partitions: all_topic_partitions.append(TopicPartition(topic, partition)) all_topic_partitions.sort() # construct {member_id: {topic: [partition, ...]}} assignment = collections.defaultdict( lambda: collections.defaultdict(list)) member_iter = itertools.cycle(sorted(member_metadata.keys())) for partition in all_topic_partitions: member_id = next(member_iter) # Because we constructed all_topic_partitions from the set of # member subscribed topics, we should be safe assuming that # each topic in all_topic_partitions is in at least one member # subscription; otherwise this could yield an infinite loop while partition.topic not in member_metadata[ member_id].subscription: member_id = next(member_iter) assignment[member_id][partition.topic].append(partition.partition) protocol_assignment = {} for member_id in member_metadata: protocol_assignment[member_id] = ConsumerProtocolMemberAssignment( cls.version, sorted(assignment[member_id].items()), b'') return protocol_assignment
def process_args(self): self.brokers = self.get_opt('brokers') # TODO: add broker list validation back in # validate_hostport(self.brokers) log_option('brokers', self.brokers) self.timeout_ms = max(self.timeout * 1000 - 1000, 1000) list_topics = self.get_opt('list_topics') list_partitions = self.get_opt('list_partitions') if list_topics: self.print_topics() sys.exit(ERRORS['UNKNOWN']) self.topic = self.get_opt('topic') if self.topic: validate_chars(self.topic, 'topic', 'A-Za-z-') elif list_topics or list_partitions: pass else: self.usage('--topic not specified') if list_partitions: if self.topic: self.print_topic_partitions(self.topic) else: for topic in self.get_topics(): self.print_topic_partitions(topic) sys.exit(ERRORS['UNKNOWN']) self.partition = self.get_opt('partition') # technically optional, will hash to a random partition, but need to know which partition to get offset # if self.partition is not None: validate_int(self.partition, "partition", 0, 10000) self.topic_partition = TopicPartition(self.topic, self.partition) self.acks = self.get_opt('acks') log_option('acks', self.acks) self.validate_thresholds()
def test_fetch_committed_offsets(mocker, coordinator): # No partitions, no IO polling mocker.patch.object(coordinator._client, 'poll') assert coordinator.fetch_committed_offsets([]) == {} assert coordinator._client.poll.call_count == 0 # general case -- send offset fetch request, get successful future mocker.patch.object(coordinator, 'ensure_coordinator_known') mocker.patch.object(coordinator, '_send_offset_fetch_request', return_value=Future().success('foobar')) partitions = [TopicPartition('foobar', 0)] ret = coordinator.fetch_committed_offsets(partitions) assert ret == 'foobar' coordinator._send_offset_fetch_request.assert_called_with(partitions) assert coordinator._client.poll.call_count == 1 # Failed future is raised if not retriable coordinator._send_offset_fetch_request.return_value = Future().failure(AssertionError) coordinator._client.poll.reset_mock() try: coordinator.fetch_committed_offsets(partitions) except AssertionError: pass else: assert False, 'Exception not raised when expected' assert coordinator._client.poll.call_count == 1 coordinator._client.poll.reset_mock() coordinator._send_offset_fetch_request.side_effect = [ Future().failure(Errors.RequestTimedOutError), Future().success('fizzbuzz')] ret = coordinator.fetch_committed_offsets(partitions) assert ret == 'fizzbuzz' assert coordinator._client.poll.call_count == 2 # call + retry
def _get_leader_for_partition(self, topic, partition): """ Returns the leader for a partition or None if the partition exists but has no leader. UnknownTopicOrPartitionError will be raised if the topic or partition is not part of the metadata. LeaderNotAvailableError is raised if server has metadata, but there is no current leader """ key = TopicPartition(topic, partition) # Use cached metadata if it is there if self.topics_to_brokers.get(key) is not None: return self.topics_to_brokers[key] # Otherwise refresh metadata # If topic does not already exist, this will raise # UnknownTopicOrPartitionError if not auto-creating # LeaderNotAvailableError otherwise until partitions are created self.load_metadata_for_topics(topic) # If the partition doesn't actually exist, raise if partition not in self.topic_partitions.get(topic, []): raise UnknownTopicOrPartitionError(key) # If there's no leader for the partition, raise leader = self.topic_partitions[topic][partition] if leader == -1: raise LeaderNotAvailableError((topic, partition)) # Otherwise return the BrokerMetadata return self.brokers[leader]
def test_load_metadata(self, protocol, conn): mock_conn(conn) brokers = [ BrokerMetadata(0, 'broker_1', 4567), BrokerMetadata(1, 'broker_2', 5678) ] topics = [(NO_ERROR, 'topic_1', [(NO_ERROR, 0, 1, [1, 2], [1, 2])]), (NO_ERROR, 'topic_noleader', [ (NO_LEADER, 0, -1, [], []), (NO_LEADER, 1, -1, [], []), ]), (NO_LEADER, 'topic_no_partitions', []), (UNKNOWN_TOPIC_OR_PARTITION, 'topic_unknown', []), (NO_ERROR, 'topic_3', [(NO_ERROR, 0, 0, [0, 1], [0, 1]), (NO_ERROR, 1, 1, [1, 0], [1, 0]), (NO_ERROR, 2, 0, [0, 1], [0, 1])])] protocol.decode_metadata_response.return_value = MetadataResponse( brokers, topics) # client loads metadata at init client = SimpleClient(hosts=['broker_1:4567']) self.assertDictEqual( { TopicPartition('topic_1', 0): brokers[1], TopicPartition('topic_noleader', 0): None, TopicPartition('topic_noleader', 1): None, TopicPartition('topic_3', 0): brokers[0], TopicPartition('topic_3', 1): brokers[1], TopicPartition('topic_3', 2): brokers[0] }, client.topics_to_brokers) # if we ask for metadata explicitly, it should raise errors with self.assertRaises(LeaderNotAvailableError): client.load_metadata_for_topics('topic_no_partitions') with self.assertRaises(UnknownTopicOrPartitionError): client.load_metadata_for_topics('topic_unknown') # This should not raise client.load_metadata_for_topics('topic_no_leader')
def test_proc_fetch_request(self): client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp = TopicPartition('test', 0) tp_info = (tp.topic, [(tp.partition, 155, 100000)]) req = FetchRequest( -1, # replica_id 100, 100, [tp_info]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() msg = Message(b"test msg") msg._encode_self() client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 0, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) state = TopicPartitionState() state.seek(0) subscriptions.assignment[tp] = state subscriptions.needs_partition_assignment = False fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone(), None) # invalid offset, msg is ignored state.seek(4) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] self.assertEqual(buf.getone().value, b"test msg") # error -> no partition found client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 3, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) # error -> topic auth failed client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 29, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(TopicAuthorizationFailedError): yield from fetcher.next_record([]) # error -> unknown client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, -1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) # error -> offset out of range client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, False) self.assertEqual(state.is_fetchable(), False) state.seek(4) subscriptions._default_offset_reset_strategy = OffsetResetStrategy.NONE client.send.side_effect = asyncio.coroutine(lambda n, r: FetchResponse( [('test', [(0, 1, 9, [(4, 10, msg)])])])) fetcher._in_flight.add(0) fetcher._records.clear() needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) with self.assertRaises(OffsetOutOfRangeError): yield from fetcher.next_record([]) yield from fetcher.close()
def _proc_fetch_request(self, node_id, request): needs_wakeup = False try: response = yield from self._client.send(node_id, request) except Errors.KafkaError as err: log.error("Failed fetch messages from %s: %s", node_id, err) return False finally: self._in_flight.remove(node_id) fetch_offsets = {} for topic, partitions in request.topics: for partition, offset, _ in partitions: fetch_offsets[TopicPartition(topic, partition)] = offset for topic, partitions in response.topics: for partition, error_code, highwater, messages in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) if not self._subscriptions.is_fetchable(tp): # this can happen when a rebalance happened log.debug( "Ignoring fetched records for partition %s" " since it is no longer fetchable", tp) elif error_type is Errors.NoError: self._subscriptions.assignment[tp].highwater = highwater # we are interested in this fetch only if the beginning # offset matches the current consumed position fetch_offset = fetch_offsets[tp] partial = None if messages and \ isinstance(messages[-1][-1], PartialMessage): partial = messages.pop() if messages: log.debug( "Adding fetched record for partition %s with" " offset %d to buffered record list", tp, fetch_offset) try: messages = collections.deque( self._unpack_message_set(tp, messages)) except Errors.InvalidMessageError as err: self._set_error(tp, err) continue self._records[tp] = FetchResult( tp, messages=messages, subscriptions=self._subscriptions, backoff=self._prefetch_backoff, loop=self._loop) # We added at least 1 successful record needs_wakeup = True elif partial: # we did not read a single message from a non-empty # buffer because that message's size is larger than # fetch size, in this case record this exception err = RecordTooLargeError( "There are some messages at [Partition=Offset]: " "%s=%s whose size is larger than the fetch size %s" " and hence cannot be ever returned. " "Increase the fetch size, or decrease the maximum " "message size the broker will allow.", tp, fetch_offset, self._max_partition_fetch_bytes) self._set_error(tp, err) needs_wakeup = True self._subscriptions.assignment[tp].position += 1 elif error_type in (Errors.NotLeaderForPartitionError, Errors.UnknownTopicOrPartitionError): self._client.force_metadata_update() elif error_type is Errors.OffsetOutOfRangeError: fetch_offset = fetch_offsets[tp] if self._subscriptions.has_default_offset_reset_policy(): self._subscriptions.need_offset_reset(tp) else: err = Errors.OffsetOutOfRangeError({tp: fetch_offset}) self._set_error(tp, err) needs_wakeup = True log.info( "Fetch offset %s is out of range, resetting offset", fetch_offset) elif error_type is Errors.TopicAuthorizationFailedError: log.warn("Not authorized to read from topic %s.", tp.topic) err = Errors.TopicAuthorizationFailedError(tp.topic) self._set_error(tp, err) needs_wakeup = True else: log.warn('Unexpected error while fetching data: %s', error_type.__name__) return needs_wakeup
def test_consumer_rebalance_on_new_topic(self): # Test will create a consumer group and check if adding new topic # will trigger a group rebalance and assign partitions pattern = "^another-autocreate-pattern-.*$" client = AIOKafkaClient( loop=self.loop, bootstrap_servers=self.hosts, client_id="test_autocreate") yield from client.bootstrap() listener1 = StubRebalanceListener(loop=self.loop) listener2 = StubRebalanceListener(loop=self.loop) consumer1 = AIOKafkaConsumer( loop=self.loop, bootstrap_servers=self.hosts, metadata_max_age_ms=200, group_id="test-autocreate-rebalance", heartbeat_interval_ms=100) consumer1.subscribe(pattern=pattern, listener=listener1) yield from consumer1.start() consumer2 = AIOKafkaConsumer( loop=self.loop, bootstrap_servers=self.hosts, metadata_max_age_ms=200, group_id="test-autocreate-rebalance", heartbeat_interval_ms=100) consumer2.subscribe(pattern=pattern, listener=listener2) yield from consumer2.start() yield from asyncio.sleep(0.5, loop=self.loop) # bootstrap will take care of the initial group assignment self.assertEqual(consumer1.assignment(), set()) self.assertEqual(consumer2.assignment(), set()) listener1.reset() listener2.reset() # Lets force autocreation of a topic my_topic = "another-autocreate-pattern-1" yield from client._wait_on_metadata(my_topic) # Wait for group to stabilize assign1 = yield from listener1.wait_assign() assign2 = yield from listener2.wait_assign() # We expect 2 partitons for autocreated topics my_partitions = set([ TopicPartition(my_topic, 0), TopicPartition(my_topic, 1)]) self.assertEqual(assign1 | assign2, my_partitions) self.assertEqual( consumer1.assignment() | consumer2.assignment(), my_partitions) # Lets add another topic listener1.reset() listener2.reset() my_topic2 = "another-autocreate-pattern-2" yield from client._wait_on_metadata(my_topic2) # Wait for group to stabilize assign1 = yield from listener1.wait_assign() assign2 = yield from listener2.wait_assign() # We expect 2 partitons for autocreated topics my_partitions = set([ TopicPartition(my_topic, 0), TopicPartition(my_topic, 1), TopicPartition(my_topic2, 0), TopicPartition(my_topic2, 1)]) self.assertEqual(assign1 | assign2, my_partitions) self.assertEqual( consumer1.assignment() | consumer2.assignment(), my_partitions) yield from consumer1.stop() yield from consumer2.stop() yield from client.close()
def _send_produce_req(self, node_id, batches): """Create produce request to node If producer configured with `retries`>0 and produce response contain "failed" partitions produce request for this partition will try resend to broker `retries` times with `retry_timeout_ms` timeouts. Arguments: node_id (int): kafka broker identifier batches (dict): dictionary of {TopicPartition: MessageBatch} """ self._in_flight.add(node_id) t0 = self._loop.time() while True: topics = collections.defaultdict(list) for tp, batch in batches.items(): topics[tp.topic].append((tp.partition, batch.data())) request = ProduceRequest(required_acks=self._acks, timeout=self._request_timeout_ms, topics=list(topics.items())) try: response = yield from self.client.send(node_id, request) except KafkaError as err: for batch in batches.values(): if not err.retriable or batch.expired(): batch.done(exception=err) log.warning("Got error produce response: %s", err) if not err.retriable: break else: if response is None: # noacks, just "done" batches for batch in batches.values(): batch.done() break for topic, partitions in response.topics: for partition, error_code, offset in partitions: tp = TopicPartition(topic, partition) error = Errors.for_code(error_code) batch = batches.pop(tp, None) if batch is None: continue if error is Errors.NoError: batch.done(offset) elif not getattr(error, 'retriable', False) or \ batch.expired(): batch.done(exception=error()) else: # Ok, we can retry this batch batches[tp] = batch log.warning( "Got error produce response on topic-partition" " %s, retrying. Error: %s", tp, error) if batches: yield from asyncio.sleep(self._retry_backoff, loop=self._loop) else: break # if batches for node is processed in less than a linger seconds # then waiting for the remaining time sleep_time = self._linger_time - (self._loop.time() - t0) if sleep_time > 0: yield from asyncio.sleep(sleep_time, loop=self._loop) self._in_flight.remove(node_id)
def test_batch_done(self): tp0 = TopicPartition("test-topic", 0) tp1 = TopicPartition("test-topic", 1) tp2 = TopicPartition("test-topic", 2) tp3 = TopicPartition("test-topic", 3) def mocked_leader_for_partition(tp): if tp == tp0: return 0 if tp == tp1: return 1 if tp == tp2: return -1 return None cluster = ClusterMetadata(metadata_max_age_ms=10000) cluster.leader_for_partition = mock.MagicMock() cluster.leader_for_partition.side_effect = mocked_leader_for_partition ma = MessageAccumulator(cluster, 1000, None, 1, self.loop) fut1 = yield from ma.add_message( tp2, None, b'msg for tp@2', timeout=2) fut2 = yield from ma.add_message( tp3, None, b'msg for tp@3', timeout=2) yield from ma.add_message(tp1, None, b'0123456789'*70, timeout=2) with self.assertRaises(KafkaTimeoutError): yield from ma.add_message(tp1, None, b'0123456789'*70, timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches[1][tp1].expired(), True) with self.assertRaises(LeaderNotAvailableError): yield from fut1 with self.assertRaises(NotLeaderForPartitionError): yield from fut2 fut01 = yield from ma.add_message( tp0, b'key0', b'value#0', timeout=2) fut02 = yield from ma.add_message( tp0, b'key1', b'value#1', timeout=2) fut10 = yield from ma.add_message( tp1, None, b'0123456789'*70, timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches[0][tp0].expired(), False) self.assertEqual(batches[1][tp1].expired(), False) batch_data = batches[0][tp0].get_data_buffer() self.assertEqual(type(batch_data), io.BytesIO) batches[0][tp0].done(base_offset=10) class TestException(Exception): pass batches[1][tp1].done(exception=TestException()) res = yield from fut01 self.assertEqual(res.topic, "test-topic") self.assertEqual(res.partition, 0) self.assertEqual(res.offset, 10) res = yield from fut02 self.assertEqual(res.topic, "test-topic") self.assertEqual(res.partition, 0) self.assertEqual(res.offset, 11) with self.assertRaises(TestException): yield from fut10 fut01 = yield from ma.add_message( tp0, b'key0', b'value#0', timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) batches[0][tp0].done(base_offset=None) res = yield from fut01 self.assertEqual(res, None) # cancelling future fut01 = yield from ma.add_message( tp0, b'key0', b'value#2', timeout=2) batches, _ = ma.drain_by_nodes(ignore_nodes=[]) fut01.cancel() batches[0][tp0].done(base_offset=21) # no error in this case
def kafka_tp(kafka_topic, kafka_partition): return TopicPartition(kafka_topic, kafka_partition)
def test_group(kafka_broker, topic): num_partitions = 4 connect_str = 'localhost:' + str(kafka_broker.port) consumers = {} stop = {} messages = collections.defaultdict(list) def consumer_thread(i): assert i not in consumers assert i not in stop stop[i] = threading.Event() consumers[i] = KafkaConsumer(topic, bootstrap_servers=connect_str, heartbeat_interval_ms=500) while not stop[i].is_set(): for tp, records in six.itervalues(consumers[i].poll()): messages[i][tp].extend(records) consumers[i].close() del consumers[i] del stop[i] num_consumers = 4 for i in range(num_consumers): t = threading.Thread(target=consumer_thread, args=(i,)) t.daemon = True t.start() try: timeout = time.time() + 35 while True: for c in range(num_consumers): # Verify all consumers have been created if c not in consumers: break # Verify all consumers have an assignment elif not consumers[c].assignment(): break # Verify all consumers are in the same generation generations = set() for consumer in six.itervalues(consumers): generations.add(consumer._coordinator.generation) if len(generations) != 1: break # If all checks passed, log state and break while loop else: for c in range(num_consumers): logging.info("[%s] %s %s: %s", c, consumers[c]._coordinator.generation, consumers[c]._coordinator.member_id, consumers[c].assignment()) break assert time.time() < timeout, "timeout waiting for assignments" group_assignment = set() for c in range(num_consumers): assert len(consumers[c].assignment()) != 0 assert set.isdisjoint(consumers[c].assignment(), group_assignment) group_assignment.update(consumers[c].assignment()) assert group_assignment == set([ TopicPartition(topic, partition) for partition in range(num_partitions)]) finally: for c in range(num_consumers): stop[c].set()
def _kill_leader(self, topic, partition): leader = self.client.topics_to_brokers[TopicPartition(topic, partition)] broker = self.brokers[leader.nodeId] broker.close() return broker
def test_compacted_topic_consumption(self): # Compacted topics can have offsets skipped client = AIOKafkaClient(loop=self.loop, bootstrap_servers=[]) client.ready = mock.MagicMock() client.ready.side_effect = asyncio.coroutine(lambda a: True) client.force_metadata_update = mock.MagicMock() client.force_metadata_update.side_effect = asyncio.coroutine( lambda: False) client.send = mock.MagicMock() subscriptions = SubscriptionState('latest') fetcher = Fetcher(client, subscriptions, loop=self.loop) tp = TopicPartition('test', 0) req = FetchRequest( -1, # replica_id 100, 100, [(tp.topic, [(tp.partition, 155, 100000)])]) msg1 = Message(b"12345", key=b"1") msg1._encode_self() msg2 = Message(b"23456", key=b"2") msg2._encode_self() msg3 = Message(b"34567", key=b"3") msg3._encode_self() resp = FetchResponse([( 'test', [( 0, 0, 3000, # partition, error_code, highwater_offset [ (160, 5, msg1), # offset, len_bytes, bytes (162, 5, msg2), (167, 5, msg3), ])])]) client.send.side_effect = asyncio.coroutine(lambda n, r: resp) state = TopicPartitionState() state.seek(155) state.drop_pending_message_set = False subscriptions.assignment[tp] = state subscriptions.needs_partition_assignment = False fetcher._in_flight.add(0) needs_wake_up = yield from fetcher._proc_fetch_request(0, req) self.assertEqual(needs_wake_up, True) buf = fetcher._records[tp] # Test successful getone first = buf.getone() self.assertEqual(state.position, 161) self.assertEqual((first.value, first.key, first.offset), (msg1.value, msg1.key, 160)) # Test successful getmany second, third = buf.getall() self.assertEqual(state.position, 168) self.assertEqual((second.value, second.key, second.offset), (msg2.value, msg2.key, 162)) self.assertEqual((third.value, third.key, third.offset), (msg3.value, msg3.key, 167))
def test_consumer_topic(kafka_consumer_with_topic, kafka_topic, kafka_partition_count): assert kafka_topic in kafka_consumer_with_topic.topics() assginment_list = [TopicPartition(kafka_topic, p) for p in range(0, kafka_partition_count)] assert kafka_consumer_with_topic.assignment() == set(assginment_list)
def _handle_fetch_response(self, request, response): """The callback for fetch completion""" #total_bytes = 0 #total_count = 0 fetch_offsets = {} for topic, partitions in request.topics: for partition, offset, _ in partitions: fetch_offsets[TopicPartition(topic, partition)] = offset for topic, partitions in response.topics: for partition, error_code, highwater, messages in partitions: tp = TopicPartition(topic, partition) error_type = Errors.for_code(error_code) if not self._subscriptions.is_fetchable(tp): # this can happen when a rebalance happened or a partition # consumption paused while fetch is still in-flight log.debug( "Ignoring fetched records for partition %s" " since it is no longer fetchable", tp) elif error_type is Errors.NoError: fetch_offset = fetch_offsets[tp] # we are interested in this fetch only if the beginning # offset matches the current consumed position position = self._subscriptions.assignment[tp].position if position is None or position != fetch_offset: log.debug( "Discarding fetch response for partition %s" " since its offset %d does not match the" " expected offset %d", tp, fetch_offset, position) continue partial = None if messages and isinstance(messages[-1][-1], PartialMessage): partial = messages.pop() if messages: log.debug( "Adding fetched record for partition %s with" " offset %d to buffered record list", tp, position) self._records.append((fetch_offset, tp, messages)) #last_offset, _, _ = messages[-1] #self.sensors.records_fetch_lag.record(highwater - last_offset) elif partial: # we did not read a single message from a non-empty # buffer because that message's size is larger than # fetch size, in this case record this exception self._record_too_large_partitions[tp] = fetch_offset # TODO: bytes metrics #self.sensors.record_topic_fetch_metrics(tp.topic, num_bytes, parsed.size()); #totalBytes += num_bytes; #totalCount += parsed.size(); elif error_type in (Errors.NotLeaderForPartitionError, Errors.UnknownTopicOrPartitionError): self._client.cluster.request_update() elif error_type is Errors.OffsetOutOfRangeError: fetch_offset = fetch_offsets[tp] if self._subscriptions.has_default_offset_reset_policy(): self._subscriptions.need_offset_reset(tp) else: self._offset_out_of_range_partitions[tp] = fetch_offset log.info( "Fetch offset %s is out of range, resetting offset", fetch_offset) elif error_type is Errors.TopicAuthorizationFailedError: log.warn("Not authorized to read from topic %s.", tp.topic) self._unauthorized_topics.add(tp.topic) elif error_type is Errors.UnknownError: log.warn( "Unknown error fetching data for topic-partition %s", tp) else: raise error_type('Unexpected error while fetching data') """TOOD - metrics
def offsets(): return { TopicPartition('foobar', 0): OffsetAndMetadata(123, b''), TopicPartition('foobar', 1): OffsetAndMetadata(234, b''), }
def commit_offsets(self, offsets): """Commit specific offsets asynchronously. Arguments: offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit Raises error on failure """ self._subscription.needs_fetch_committed_offsets = True if not offsets: log.debug('No offsets to commit') return True if (yield from self.coordinator_unknown()): raise Errors.GroupCoordinatorNotAvailableError() node_id = self.coordinator_id # create the offset commit request offset_data = collections.defaultdict(list) for tp, offset in offsets.items(): offset_data[tp.topic].append( (tp.partition, offset.offset, offset.metadata)) request = OffsetCommitRequest( self.group_id, self.generation, self.member_id, OffsetCommitRequest.DEFAULT_RETENTION_TIME, [(topic, tp_offsets) for topic, tp_offsets in offset_data.items()]) log.debug("Sending offset-commit request with %s to %s", offsets, node_id) response = yield from self._send_req(node_id, request) unauthorized_topics = set() for topic, partitions in response.topics: for partition, error_code in partitions: tp = TopicPartition(topic, partition) offset = offsets[tp] error_type = Errors.for_code(error_code) if error_type is Errors.NoError: log.debug("Committed offset %s for partition %s", offset, tp) if self._subscription.is_assigned(tp): partition = self._subscription.assignment[tp] partition.committed = offset.offset elif error_type is Errors.GroupAuthorizationFailedError: log.error("OffsetCommit failed for group %s - %s", self.group_id, error_type.__name__) raise error_type() elif error_type is Errors.TopicAuthorizationFailedError: unauthorized_topics.add(topic) elif error_type in (Errors.OffsetMetadataTooLargeError, Errors.InvalidCommitOffsetSizeError): # raise the error to the user log.info( "OffsetCommit failed for group %s on partition %s" " due to %s, will retry", self.group_id, tp, error_type.__name__) raise error_type() elif error_type is Errors.GroupLoadInProgressError: # just retry log.info( "OffsetCommit failed for group %s because group is" " initializing (%s), will retry", self.group_id, error_type.__name__) raise error_type() elif error_type in (Errors.GroupCoordinatorNotAvailableError, Errors.NotCoordinatorForGroupError, Errors.RequestTimedOutError): log.info( "OffsetCommit failed for group %s due to a" " coordinator error (%s), will find new coordinator" " and retry", self.group_id, error_type.__name__) self.coordinator_dead() raise error_type() elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError, Errors.RebalanceInProgressError): # need to re-join group error = error_type(self.group_id) log.error( "OffsetCommit failed for group %s due to group" " error (%s), will rejoin", self.group_id, error) self._subscription.mark_for_reassignment() raise error else: log.error( "OffsetCommit failed for group %s on partition %s" " with offset %s: %s", self.group_id, tp, offset, error_type.__name__) raise error_type() if unauthorized_topics: log.error("OffsetCommit failed for unauthorized topics %s", unauthorized_topics) raise Errors.TopicAuthorizationFailedError(unauthorized_topics)
def partitions(): return [TopicPartition('foobar', 0), TopicPartition('foobar', 1)]
def partitions(self): return [ TopicPartition(topic, partition) for topic, partitions in self.assignment # pylint: disable-msg=no-member for partition in partitions ]
def update_metadata(self, metadata): # In the common case where we ask for a single topic and get back an # error, we should fail the future if len(metadata.topics) == 1 and metadata.topics[0][0] != 0: error_code, topic, _ = metadata.topics[0] error = Errors.for_code(error_code)(topic) return self.failed_update(error) if not metadata.brokers: log.warning("No broker metadata found in MetadataResponse") for node_id, host, port in metadata.brokers: self._brokers.update( {node_id: BrokerMetadata(node_id, host, port)}) _new_partitions = {} _new_broker_partitions = collections.defaultdict(set) _new_unauthorized_topics = set() for error_code, topic, partitions in metadata.topics: error_type = Errors.for_code(error_code) if error_type is Errors.NoError: _new_partitions[topic] = {} for p_error, partition, leader, replicas, isr in partitions: _new_partitions[topic][partition] = PartitionMetadata( topic=topic, partition=partition, leader=leader, replicas=replicas, isr=isr, error=p_error) if leader != -1: _new_broker_partitions[leader].add( TopicPartition(topic, partition)) elif error_type is Errors.LeaderNotAvailableError: log.warning( "Topic %s is not available during auto-create" " initialization", topic) elif error_type is Errors.UnknownTopicOrPartitionError: log.error("Topic %s not found in cluster metadata", topic) elif error_type is Errors.TopicAuthorizationFailedError: log.error("Topic %s is not authorized for this client", topic) _new_unauthorized_topics.add(topic) elif error_type is Errors.InvalidTopicError: log.error("'%s' is not a valid topic name", topic) else: log.error("Error fetching metadata for topic %s: %s", topic, error_type) with self._lock: self._partitions = _new_partitions self._broker_partitions = _new_broker_partitions self.unauthorized_topics = _new_unauthorized_topics f = None if self._future: f = self._future self._future = None self._need_update = False now = time.time() * 1000 self._last_refresh_ms = now self._last_successful_refresh_ms = now if f: f.success(self) log.debug("Updated cluster metadata to %s", self) for listener in self._listeners: listener(self)
def test_basic(self): cluster = ClusterMetadata(metadata_max_age_ms=10000) ma = MessageAccumulator(cluster, 1000, None, 30, self.loop) data_waiter = ma.data_waiter() done, _ = yield from asyncio.wait( [data_waiter], timeout=0.2, loop=self.loop) self.assertFalse(bool(done)) # no data in accumulator yet... tp0 = TopicPartition("test-topic", 0) tp1 = TopicPartition("test-topic", 1) yield from ma.add_message(tp0, b'key', b'value', timeout=2) yield from ma.add_message(tp1, None, b'value without key', timeout=2) done, _ = yield from asyncio.wait( [data_waiter], timeout=0.2, loop=self.loop) self.assertTrue(bool(done)) batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(batches, {}) self.assertEqual(unknown_leaders_exist, True) def mocked_leader_for_partition(tp): if tp == tp0: return 0 if tp == tp1: return 1 return -1 cluster.leader_for_partition = mock.MagicMock() cluster.leader_for_partition.side_effect = mocked_leader_for_partition batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(len(batches), 2) self.assertEqual(unknown_leaders_exist, False) m_set0 = batches[0].get(tp0) self.assertEqual(type(m_set0), MessageBatch) m_set1 = batches[1].get(tp1) self.assertEqual(type(m_set1), MessageBatch) self.assertEqual(m_set0.expired(), False) data_waiter = ensure_future(ma.data_waiter(), loop=self.loop) done, _ = yield from asyncio.wait( [data_waiter], timeout=0.2, loop=self.loop) self.assertFalse(bool(done)) # no data in accumulator again... # testing batch overflow tp2 = TopicPartition("test-topic", 2) yield from ma.add_message( tp0, None, b'some short message', timeout=2) yield from ma.add_message( tp0, None, b'some other short message', timeout=2) yield from ma.add_message( tp1, None, b'0123456789' * 70, timeout=2) yield from ma.add_message( tp2, None, b'message to unknown leader', timeout=2) # next we try to add message with len=500, # as we have buffer_size=1000 coroutine will block until data will be # drained add_task = ensure_future( ma.add_message(tp1, None, b'0123456789' * 50, timeout=2), loop=self.loop) done, _ = yield from asyncio.wait( [add_task], timeout=0.2, loop=self.loop) self.assertFalse(bool(done)) batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[1, 2]) self.assertEqual(unknown_leaders_exist, True) m_set0 = batches[0].get(tp0) self.assertEqual(m_set0._builder._relative_offset, 2) m_set1 = batches[1].get(tp1) self.assertEqual(m_set1, None) done, _ = yield from asyncio.wait( [add_task], timeout=0.1, loop=self.loop) self.assertFalse(bool(done)) # we stil not drained data for tp1 batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(unknown_leaders_exist, True) m_set0 = batches[0].get(tp0) self.assertEqual(m_set0, None) m_set1 = batches[1].get(tp1) self.assertEqual(m_set1._builder._relative_offset, 1) done, _ = yield from asyncio.wait( [add_task], timeout=0.2, loop=self.loop) self.assertTrue(bool(done)) batches, unknown_leaders_exist = ma.drain_by_nodes(ignore_nodes=[]) self.assertEqual(unknown_leaders_exist, True) m_set1 = batches[1].get(tp1) self.assertEqual(m_set1._builder._relative_offset, 1)
def load_metadata_for_topics(self, *topics, **kwargs): """Fetch broker and topic-partition metadata from the server. Updates internal data: broker list, topic/partition list, and topic/parition -> broker map. This method should be called after receiving any error. Note: Exceptions *will not* be raised in a full refresh (i.e. no topic list). In this case, error codes will be logged as errors. Partition-level errors will also not be raised here (a single partition w/o a leader, for example). Arguments: *topics (optional): If a list of topics is provided, the metadata refresh will be limited to the specified topics only. ignore_leadernotavailable (bool): suppress LeaderNotAvailableError so that metadata is loaded correctly during auto-create. Default: False. Raises: UnknownTopicOrPartitionError: Raised for topics that do not exist, unless the broker is configured to auto-create topics. LeaderNotAvailableError: Raised for topics that do not exist yet, when the broker is configured to auto-create topics. Retry after a short backoff (topics/partitions are initializing). """ if 'ignore_leadernotavailable' in kwargs: ignore_leadernotavailable = kwargs['ignore_leadernotavailable'] else: ignore_leadernotavailable = False if topics: self.reset_topic_metadata(*topics) else: self.reset_all_metadata() resp = self.send_metadata_request(topics) log.debug('Updating broker metadata: %s', resp.brokers) log.debug('Updating topic metadata: %s', [topic for _, topic, _ in resp.topics]) self.brokers = dict([(nodeId, BrokerMetadata(nodeId, host, port)) for nodeId, host, port in resp.brokers]) for error, topic, partitions in resp.topics: # Errors expected for new topics if error: error_type = kafka.common.kafka_errors.get(error, UnknownError) if error_type in (UnknownTopicOrPartitionError, LeaderNotAvailableError): log.error('Error loading topic metadata for %s: %s (%s)', topic, error_type, error) if topic not in topics: continue elif (error_type is LeaderNotAvailableError and ignore_leadernotavailable): continue raise error_type(topic) self.topic_partitions[topic] = {} for error, partition, leader, _, _ in partitions: self.topic_partitions[topic][partition] = leader # Populate topics_to_brokers dict topic_part = TopicPartition(topic, partition) # Check for partition errors if error: error_type = kafka.common.kafka_errors.get( error, UnknownError) # If No Leader, topics_to_brokers topic_partition -> None if error_type is LeaderNotAvailableError: log.error('No leader for topic %s partition %d', topic, partition) self.topics_to_brokers[topic_part] = None continue # If one of the replicas is unavailable -- ignore # this error code is provided for admin purposes only # we never talk to replicas, only the leader elif error_type is ReplicaNotAvailableError: log.debug( 'Some (non-leader) replicas not available for topic %s partition %d', topic, partition) else: raise error_type(topic_part) # If Known Broker, topic_partition -> BrokerMetadata if leader in self.brokers: self.topics_to_brokers[topic_part] = self.brokers[leader] # If Unknown Broker, fake BrokerMetadata so we dont lose the id # (not sure how this could happen. server could be in bad state) else: self.topics_to_brokers[topic_part] = BrokerMetadata( leader, None, None)