Exemplo n.º 1
0
def kafka_get_topics_offsets(host, topic, port=9092):
    """Return available partitions and their offsets for the given topic.

    Args:
        host (str): Kafka host.
        topic (str): Kafka topic.
        port (int): Kafka port.

    Returns:
        [(int, int, int)]: [(partition, start_offset, end_offset)].
    """
    brokers = ['{}:{}'.format(host, port)]
    client = SimpleClient(brokers)

    offsets = []
    partitions = client.get_partition_ids_for_topic(topic)

    offsets_responses_end = client.send_offset_request([
        OffsetRequestPayload(topic, partition, -1, 1)
        for partition in partitions
    ])
    offsets_responses_start = client.send_offset_request([
        OffsetRequestPayload(topic, partition, -2, 1)
        for partition in partitions
    ])

    for start_offset, end_offset in zip(offsets_responses_start,
                                        offsets_responses_end):
        offsets.append((start_offset.partition, start_offset.offsets[0],
                        end_offset.offsets[0]))

    return offsets
Exemplo n.º 2
0
    def _get_highwater_offsets(self, kafka_hosts_ports):
        """
        Fetch highwater offsets for each topic/partition from Kafka cluster.

        Do this for all partitions in the cluster because even if it has no
        consumers, we may want to measure whether producers are successfully
        producing. No need to limit this for performance because fetching broker
        offsets from Kafka is a relatively inexpensive operation.
        """
        kafka_conn = SimpleClient(kafka_hosts_ports,
                                  timeout=self.kafka_timeout)
        try:
            broker_topics_partitions = kafka_conn.topics_to_brokers.keys()
            # batch a bunch of requests into a single network call
            offsets_request = [
                OffsetRequestPayload(topic, partition, -1, 1)
                for topic, partition in broker_topics_partitions
            ]
            offsets_response = kafka_conn.send_offset_request(offsets_request)
            highwater_offsets = {(x.topic, x.partition): x.offsets[0]
                                 for x in offsets_response}
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')
        return highwater_offsets
Exemplo n.º 3
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))
            self.client_async = KafkaClient(
                bootstrap_servers='%s:%d' %
                (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request(
                [OffsetRequestPayload(topic, partition, -1, 1)])
        except:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 4
0
def process(spouts):
    '''
    Returns a named tuple of type PartitionsSummary.
    '''
    results = []
    total_depth = 0
    total_delta = 0
    brokers = []
    for s in spouts:
        for p in s.partitions:
            try:
                k = SimpleClient(p['broker']['host'] + ':' +
                                 str(p['broker']['port']))
            except socket.gaierror as e:
                raise ProcessorError('Failed to contact Kafka broker %s (%s)' %
                                     (p['broker']['host'], str(e)))
            earliest_off = [
                OffsetRequestPayload(p['topic'], p['partition'], -2, 1)
            ]
            latest_off = [
                OffsetRequestPayload(p['topic'], p['partition'], -1, 1)
            ]

            earliest = k.send_offset_request(earliest_off)[0].offsets[0]
            latest = k.send_offset_request(latest_off)[0].offsets[0]
            current = p['offset']

            brokers.append(p['broker']['host'])
            total_depth = total_depth + (latest - earliest)
            total_delta = total_delta + (latest - current)

            results.append(
                PartitionState._make([
                    p['broker']['host'], p['topic'], p['partition'], earliest,
                    latest, latest - earliest, s.id, current, latest - current
                ]))
    return PartitionsSummary(total_depth=total_depth,
                             total_delta=total_delta,
                             num_partitions=len(results),
                             num_brokers=len(set(brokers)),
                             partitions=tuple(results))
Exemplo n.º 5
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([OffsetRequestPayload(topic, partition, -1, 1)])
        except:
            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 6
0
 def _python_kafka_partitionoffset(self, topic):
     """
         Return offset and partition of the topic
     """
     topic = self.topic
     client = SimpleClient(self.brokers)
     partitions = client.topic_partitions[topic]
     offset_requests = [
         OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
     ]
     offsets_responses = client.send_offset_request(offset_requests)
     for r in offsets_responses:
         print("partition = %s, offset = %s" % (r.partition, r.offsets[0]))
Exemplo n.º 7
0
 def getoffset(self, topic):
     '''目前未使用'''
     from kafka import SimpleClient
     from kafka.protocol.offset import OffsetRequest, OffsetResetStrategy
     from kafka.common import OffsetRequestPayload
     client = SimpleClient(self.server)
     partitions = client.topic_partitions[topic]
     offset_requests = [
         OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
     ]
     offsets_responses = client.send_offset_request(offset_requests)
     for r in offsets_responses:
         print "partition = %s, offset = %s" % (r.partition, r.offsets[0])
Exemplo n.º 8
0
 def _python_kafka_offsetcount(self, topic):
     """
         Count no of offset of the topic
     """
     client = SimpleClient(self.brokers)
     self.topic = topic
     partitions = client.topic_partitions[self.topic]
     offset_requests = [
         OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
     ]
     offsets_responses = client.send_offset_request(offset_requests)
     totaloffset = 0
     for r in offsets_responses:
         totaloffset = totaloffset + r.offsets[0]
     return totaloffset
Exemplo n.º 9
0
class KafkaClient(object):
	def __init__(self, kafka_host, topic, group_id):
		self.client = SimpleClient(kafka_host)
		self.topic = topic
		try:
			self.partitions = self.client.topic_partitions[topic]
		except KeyError as ex:
			print('KeyError: {}'.format(ex))
			self.partitions = None
		self.group_id = group_id

	def close(self):
		self.client.close()

	def get_latest_offsets(self):
		request = [OffsetRequestPayload(self.topic, p, -1, 1) for p in self.partitions.keys()]
		response = self.client.send_offset_request(request)	
		offsets = {r.partition: r.offsets[0] for r in response} # build dictionary
		return offsets
Exemplo n.º 10
0
class KafkaClient(object):
    def __init__(self, kafka_host, topic, group_id):
        self.client = SimpleClient(kafka_host)
        self.topic = topic
        self.partitions = self.client.topic_partitions[topic]
        self.group_id = group_id

    def close(self):
        self.client.close()

    def get_tail_offsets(self):
        request = [
            OffsetRequestPayload(self.topic, p, -1, 1)
            for p in self.partitions.keys()
        ]
        response = self.client.send_offset_request(request)
        offsets = {r.partition: r.offsets[0]
                   for r in response}  # build dictionary
        return offsets
Exemplo n.º 11
0
def count_kafka_mssg(topic, server):
    """Returns the total number of messages (sum of all partitions) in given kafka topic

    """
    client = SimpleClient(server)

    partitions = client.topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]

    offsets_responses = client.send_offset_request(offset_requests)

    total_mssg = 0

    for r in offsets_responses:
        logging.info("partition = {}, offset = {}".format(
            r.partition, r.offsets[0]))
        total_mssg += int(r.offsets[0])

    return total_mssg
Exemplo n.º 12
0
def topic_offsets(kafka_brokers, topic):
    client = SimpleClient(insure_is_array(kafka_brokers))
    topic_partitions = client.topic_partitions
    if topic not in topic_partitions:
        raise KafkaException("topic {} doesn't exists".format(topic))
    partitions = topic_partitions[topic]
    offset_requests = [
        OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()
    ]
    offsets_responses = client.send_offset_request(offset_requests)
    client.close()
    partitions_and_offsets = {}
    for offset in offsets_responses:
        if offset.topic == topic:
            topic_offset = 0
            topic_partition = TopicPartition(topic=offset.topic,
                                             partition=offset.partition)
            if offset.offsets[0]:
                topic_offset = offset.offsets[0]
            partitions_and_offsets[topic_partition] = topic_offset

    return partitions_and_offsets
Exemplo n.º 13
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' % (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')


        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError, UnknownTopicOrPartitionError, FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError('Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([OffsetRequestPayload(topic,
                                                                             partition, -1, 1)])
        except Exception:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 14
0
    def spoorer(self):  #连接kafka,获取topics
        try:
            kafka_client = SimpleClient(self.kafka_hosts, timeout=self.timeout)
            # print kafka_client.topics
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            kafka_topics = kafka_client.topics
        finally:
            kafka_client.close()

        #连接zk,获取当前消费进度current offset
        try:
            zookeeper_client = KazooClient(hosts=self.zookeeper_hosts, read_only=True, timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        try:
            groups = map(str,zookeeper_client.get_children(self.zookeeper_url + 'consumers'))
        except NoNodeError as e:
            print "Error, invalid zookeeper url."
            zookeeper_client.stop()
            sys.exit(2)
        else:
            for group in groups:
                if 'offsets' not in zookeeper_client.get_children(self.zookeeper_url + 'consumers/%s' % group):continue
                topic_path = 'consumers/%s/offsets' % (group)
                topics = map(str,zookeeper_client.get_children(self.zookeeper_url + topic_path))
                if len(topics) == 0: continue
                for topic in topics:
                    # print topic
                    # print self.white_topic_group.keys()
                    if topic not in self.white_topic_group.keys():
                        continue
                    # elif group not in self.white_topic_group[topic].replace(' ','').split(','):
                    #     continue
                    partition_path = 'consumers/%s/offsets/%s' % (group,topic)
                    partitions = map(int,zookeeper_client.get_children(self.zookeeper_url + partition_path))

                    for partition in partitions:
                        base_path = 'consumers/%s/%s/%s/%s' % (group, '%s', topic, partition)
                        owner_path, offset_path = base_path % 'owners', base_path % 'offsets'
                        offset = zookeeper_client.get(self.zookeeper_url + offset_path)[0]

                        try:
                            owner = zookeeper_client.get(self.zookeeper_url + owner_path)[0]
                        except NoNodeError as e:
                            owner = 'null'
                        #消费进度放在字典metric中
                        metric = {'datetime':time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), 'topic':topic, 'group':group, 'partition':int(partition), 'logsize':None, 'offset':int(offset), 'lag':None, 'owner':owner}
                        self.result.append(metric)
        finally:
            zookeeper_client.stop()
        #获取每个分片的logsize
        try:
            client = SimpleClient(self.kafka_hosts)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)
        else:
            for kafka_topic in kafka_topics:
                self.kafka_logsize[kafka_topic] = {}
                partitions = client.topic_partitions[kafka_topic]
                offset_requests = [OffsetRequestPayload(kafka_topic, p, -1, 1) for p in partitions.keys()]
                offsets_responses = client.send_offset_request(offset_requests)
                for r in offsets_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]

            #logsize减去current offset等于lag
        f1 = open(self.log_file,'w')
        # f2 = open(self.log_day_file,'a')
        # print self.result
        for metric in self.result:
            logsize = self.kafka_logsize[metric['topic']][metric['partition']]
            metric['logsize'] = int(logsize)
            metric['lag'] = int(logsize) - int(metric['offset'])
            f1.write(json.dumps(metric,sort_keys=True) + '\n')
            f1.flush()
            # f2.write(json.dumps(metric,sort_keys=True) + '\n')
            # f2.flush()
        # finally:
        f1.close()
        client.close()
Exemplo n.º 15
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10))
            self.topic = topic

        if self.create_client:
            self.client = SimpleClient('%s:%d' %
                                       (self.server.host, self.server.port))

        timeout = time.time() + 30
        while time.time() < timeout:
            try:
                self.client.load_metadata_for_topics(
                    self.topic, ignore_leadernotavailable=False)
                if self.client.has_metadata_for_topic(topic):
                    break
            except (LeaderNotAvailableError, InvalidTopicError):
                time.sleep(1)
        else:
            raise KafkaTimeoutError('Timeout loading topic metadata!')

        # Ensure topic partitions have been created on all brokers to avoid UnknownPartitionErrors
        # TODO: It might be a good idea to move this to self.client.ensure_topic_exists
        for partition in self.client.get_partition_ids_for_topic(self.topic):
            while True:
                try:
                    req = OffsetRequestPayload(self.topic, partition, -1, 100)
                    self.client.send_offset_request([req])
                    break
                except (NotLeaderForPartitionError,
                        UnknownTopicOrPartitionError,
                        FailedPayloadsError) as e:
                    if time.time() > timeout:
                        raise KafkaTimeoutError(
                            'Timeout loading topic metadata!')
                    time.sleep(.1)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request(
                [OffsetRequestPayload(topic, partition, -1, 1)])
        except Exception:
            # XXX: We've seen some UnknownErrors here and can't debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
port=8086

## kafka cluster name, measurement in influxdb
kafka_name=""
zk_path='/consumers/'
consumer_group=''
kafka_brokers=sys.argv[2]
zk_clusters=sys.argv[4]
topic=sys.argv[6]

client = SimpleClient(kafka_brokers)

partitions = client.topic_partitions[topic]
offset_requests = [OffsetRequestPayload(topic, p, -1, 1) for p in partitions.keys()]

offsets_responses = client.send_offset_request(offset_requests)

zk = KazooClient(hosts=zk_clusters,read_only=True)
zk.start()

zk_path=zk_path+consumer_group
if zk.exists(zk_path):
	data, stat = zk.get(zk_path+"/offsets/"+topic+"/1")
	sum_lag=0
	sum_offset=0
	for r in offsets_responses:
		consumer_offset, stat = zk.get(zk_path+"/offsets/"+topic+"/"+str(r.partition))
		producer_offset=r.offsets[0]
		lag_partition=producer_offset - int(consumer_offset)
		#print("Partition: %s, lag: %s" % (r.partition, lag_partition))
		sum_offset=sum_offset+producer_offset