def _init_client(self, wait_time=None): for i in range(self.max_retry): try: # if there is a client instance, but _init_client is called # again, most likely the connection has gone stale, close that # connection and reconnect. if self._client: self._client.close() if not wait_time: wait_time = self.wait_time time.sleep(wait_time) self._client = client.KafkaClient(self.uri) # when a client is re-initialized, existing consumer should be # reset as well. self._consumer = None self._producer = None LOG.debug("Successfully connected to Kafka server at topic: " "\"%s\" partitions %s" % (self.topic, self.partitions)) break except common.KafkaUnavailableError: LOG.error('Kafka server at %s is down.' % self.uri) except common.LeaderNotAvailableError: LOG.error('Kafka at %s has no leader available.' % self.uri) except Exception: LOG.error('Kafka at %s initialization failed.' % self.uri) # Wait a bit and try again to get a client time.sleep(self.wait_time)
def healthcheck(self): url = CONF.kafka_healthcheck.kafka_url try: kafka_client = client.KafkaClient(hosts=url) except client.KafkaUnavailableError as ex: LOG.error(repr(ex)) error_str = 'Could not connect to kafka at %s' % url return result.HealthCheckResult(healthy=False, message=error_str) self._disconnect_gracefully(kafka_client) return self._verify_topics(kafka_client)
def replicationproducer(self): """Obtain a ``Producer`` instance to write to the replication log.""" if not getattr(self, '_replicationproducer', None): client = kafkaclient.KafkaClient(hosts, client_id=clientid, timeout=timeout) self._replicationproducer = vcsrproducer.Producer( client, topic, batch_send=False, req_acks=reqacks, ack_timeout=acktimeout) return self._replicationproducer
def _init_client(self, wait_time=None): last_exception = None for i in range(self.max_retry): try: # if there is a client instance, but _init_client is called # again, most likely the connection has gone stale, close that # connection and reconnect. if self._client: self._client.close() if not wait_time: wait_time = self.wait_time time.sleep(wait_time) self._client = client.KafkaClient(self.uri) # when a client is re-initialized, existing consumer should be # reset as well. self._producer = None return except common.KafkaUnavailableError as e: last_exception = e LOG.error('Kafka server at %s is down.' % self.uri) self.statsd_kafka_producer_error_count.increment(1, sample_rate=1.0) except common.LeaderNotAvailableError as e: last_exception = e LOG.error('Kafka at %s has no leader available.' % self.uri) self.statsd_kafka_producer_error_count.increment(1, sample_rate=1.0) except Exception as e: last_exception = e LOG.exception('Kafka at %s initialization failed.' % self.uri) self.statsd_kafka_producer_error_count.increment(1, sample_rate=1.0) # Wait a bit and try again to get a client time.sleep(self.wait_time) # do not swallow errors if last_exception: raise last_exception
def __enter__(self): self.kafka_conn = client.KafkaClient(self.connect_str) return self.kafka_conn
def check(self, instance): consumer_groups = self.read_config(instance, 'consumer_groups', cast=self._validate_consumer_groups) kafka_host_ports = self.read_config(instance, 'kafka_connect_str') full_output = self.read_config(instance, 'full_output', cast=bool) dimensions = {'component': 'kafka', 'service': 'kafka'} try: # Connect to Kafka kafka_conn = client.KafkaClient(kafka_host_ports) # Query Kafka for consumer offsets consumer_offsets = {} topics = collections.defaultdict(set) for consumer_group, topic_partitions in consumer_groups.iteritems( ): for topic, partitions in topic_partitions.iteritems(): kafka_consumer = consumer.SimpleConsumer( kafka_conn, consumer_group, topic) # Remember the topic partitions that we've see so that we can # look up their broker offsets later topics[topic].update(set(partitions)) for partition in partitions: try: consumer_offsets[( consumer_group, topic, partition)] = kafka_consumer.offsets[partition] except KeyError: kafka_consumer.stop() self.log.error( 'Error fetching consumer offset for {0} partition {1}' .format(topic, partition)) kafka_consumer.stop() # Query Kafka for the broker offsets, done in a separate loop so only one query is done # per topic even if multiple consumer groups watch the same topic broker_offsets = {} for topic, partitions in topics.items(): offset_responses = [] for p in partitions: try: response = kafka_conn.send_offset_request( [common.OffsetRequest(topic, p, -1, 1)]) offset_responses.append(response[0]) except common.KafkaError as e: self.log.error( "Error fetching broker offset: {0}".format(e)) for resp in offset_responses: broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0] finally: try: kafka_conn.close() except Exception: self.log.exception('Error cleaning up Kafka connection') # Report the broker data if full_output: broker_dimensions = dimensions.copy() for (topic, partition), broker_offset in broker_offsets.items(): broker_dimensions.update({ 'topic': topic, 'partition': partition }) broker_offset = broker_offsets.get((topic, partition)) self.gauge('kafka.broker_offset', broker_offset, dimensions=self._set_dimensions( broker_dimensions, instance)) # Report the consumer data consumer_dimensions = dimensions.copy() for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items(): # Get the broker offset broker_offset = broker_offsets.get((topic, partition)) # Report the consumer offset and lag consumer_dimensions.update({ 'topic': topic, 'partition': partition, 'consumer_group': consumer_group }) if full_output: self.gauge('kafka.consumer_offset', consumer_offset, dimensions=self._set_dimensions( consumer_dimensions, instance)) self.gauge('kafka.consumer_lag', broker_offset - consumer_offset, dimensions=self._set_dimensions(consumer_dimensions, instance))