def _get_offsets_from_kafka(brokers,
                                topic,
                                offset_time):
        """get dict representing kafka
        offsets.
        """
        # get client
        client = KafkaClient(brokers)

        # get partitions for a topic
        partitions = client.topic_partitions[topic]

        # https://cwiki.apache.org/confluence/display/KAFKA/
        # A+Guide+To+The+Kafka+Protocol#
        # AGuideToTheKafkaProtocol-OffsetRequest
        MAX_OFFSETS = 1
        offset_requests = [OffsetRequestPayload(topic,
                                                part_name,
                                                offset_time,
                                                MAX_OFFSETS) for part_name
                           in partitions.keys()]

        offsets_responses = client.send_offset_request(offset_requests)

        offset_dict = {}
        for response in offsets_responses:
            key = "_".join((response.topic,
                            str(response.partition)))
            offset_dict[key] = response

        return offset_dict
Exemplo n.º 2
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    bytes_topic = None
    zk = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            self.skipTest('Integration test requires KAFKA_VERSION')

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10))
            self.topic = topic
            self.bytes_topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        try:
            offsets, = self.client.send_offset_request([ OffsetRequest(kafka_bytestring(topic), partition, -1, 1) ])
        except:
            # XXX: We've seen some UnknownErrors here and cant debug w/o server logs
            self.zk.child.dump_logs()
            self.server.child.dump_logs()
            raise
        else:
            return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 3
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:],
                               random_string(10).decode('utf-8'))
            self.topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' %
                                      (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        offsets, = self.client.send_offset_request(
            [OffsetRequest(topic, partition, -1, 1)])
        return offsets.offsets[0]

    def msgs(self, iterable):
        return [self.msg(x) for x in iterable]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 4
0
class KafkaIntegrationTestCase(unittest.TestCase):
    create_client = True
    topic = None
    server = None

    def setUp(self):
        super(KafkaIntegrationTestCase, self).setUp()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if not self.topic:
            topic = "%s-%s" % (self.id()[self.id().rindex(".") + 1:], random_string(10).decode('utf-8'))
            self.topic = topic.encode('utf-8')

        if self.create_client:
            self.client = KafkaClient('%s:%d' % (self.server.host, self.server.port))

        self.client.ensure_topic_exists(self.topic)

        self._messages = {}

    def tearDown(self):
        super(KafkaIntegrationTestCase, self).tearDown()
        if not os.environ.get('KAFKA_VERSION'):
            return

        if self.create_client:
            self.client.close()

    def current_offset(self, topic, partition):
        offsets, = self.client.send_offset_request([ OffsetRequest(topic, partition, -1, 1) ])
        return offsets.offsets[0]

    def msgs(self, iterable):
        return [ self.msg(x) for x in iterable ]

    def msg(self, s):
        if s not in self._messages:
            self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4()))

        return self._messages[s].encode('utf-8')

    def key(self, k):
        return k.encode('utf-8')
Exemplo n.º 5
0
class Kafka(object):
    executor = ThreadPoolExecutor(20)

    def __init__(self, broker):
        self.broker = broker
        self.client = KafkaClient(broker, timeout=3)

    @run_on_executor
    def getPartition(self, topic):
        """ 指定topic返回partition列表 """

        return self.client.get_partition_ids_for_topic(topic)

    @run_on_executor
    def getLogsize(self, topic, partitions):
        """ 指定topic与partition列表, 返回logsize数据 """

        tp = self.client.send_offset_request(
            [OffsetRequestPayload(topic, p, -1, 1) for p in partitions])
        return {p.partition: p.offsets[0] for p in tp}

    @run_on_executor
    def getOffsets(self, topic, partitions, group):
        """ 指定topic、partition和group, 返回offsets数据 """

        try:
            # 尝试使用zookeeper-storage api获取offsets数据
            # 未获得指定group的offsets数据将抛出UnknownTopicOrPartitionError异常
            tp = self.client.send_offset_fetch_request(
                group,
                [OffsetRequestPayload(topic, p, -1, 1) for p in partitions])
            offsets = {p.partition: p.offset for p in tp}

        except UnknownTopicOrPartitionError:
            # 收到异常后使用kafka-storage api获取offsets数据
            consumer = KafkaConsumer(group_id=group,
                                     bootstrap_servers=self.broker,
                                     enable_auto_commit=False)
            tp = [TopicPartition(topic, p) for p in partitions]
            consumer.assign(tp)
            offsets = {p.partition: consumer.position(p) for p in tp}

        return offsets
Exemplo n.º 6
0
    def handler(self):
        """ 查询指定Kafka集群Topic中每个Partition当前Logsize, 将Logsize写入LevelDB
            每次收集Logsize数据后会检测retention_day参数,删除过期数据
        """

        clusters = base.config["collector"]["clusters"]

        for cluster, metric in clusters.items():
            client = KafkaClient(metric["brokers"], timeout=3)

            for topic in metric["topics"]:
                partitions = client.get_partition_ids_for_topic(topic)
                payload = [
                    OffsetRequestPayload(topic, p, -1, 1) for p in partitions
                ]
                logsize = {
                    p.partition: p.offsets[0]
                    for p in client.send_offset_request(payload)
                }

                if logsize:
                    key = str(int(time.time())).encode("utf-8")
                    value = json.dumps(logsize).encode("utf-8")

                    db = base.init_leveldb(cluster=cluster, topic=topic)
                    db.Put(key, value)
                    deadline = base.config["collector"]["clusters"][cluster][
                        "retention_hour"] * 3600

                    for key, _ in db.RangeIter():
                        if time.time() - int(key) > deadline:
                            db.Delete(key)
                        else:
                            break

            client.close()
Exemplo n.º 7
0
    def check(self, instance):
        consumer_groups = self.read_config(instance, 'consumer_groups',
                                           cast=self._validate_consumer_groups)
        zk_connect_str = self.read_config(instance, 'zk_connect_str')
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')

        # Construct the Zookeeper path pattern
        zk_prefix = instance.get('zk_prefix', '')
        zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s'

        # Connect to Zookeeper
        zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout)
        zk_conn.start()

        try:
            # Query Zookeeper for consumer offsets
            consumer_offsets = {}
            topics = defaultdict(set)
            for consumer_group, topic_partitions in consumer_groups.iteritems():
                for topic, partitions in topic_partitions.iteritems():
                    # Remember the topic partitions that we've see so that we can
                    # look up their broker offsets later
                    topics[topic].update(set(partitions))
                    for partition in partitions:
                        zk_path = zk_path_tmpl % (consumer_group, topic, partition)
                        try:
                            consumer_offset = int(zk_conn.get(zk_path)[0])
                            key = (consumer_group, topic, partition)
                            consumer_offsets[key] = consumer_offset
                        except NoNodeError:
                            self.log.warn('No zookeeper node at %s' % zk_path)
                        except Exception:
                            self.log.exception('Could not read consumer offset from %s' % zk_path)
        finally:
            try:
                zk_conn.stop()
                zk_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Zookeeper connection')

        # Connect to Kafka
        kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout)

        try:
            # Query Kafka for the broker offsets
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = kafka_conn.send_offset_request([
                    OffsetRequest(topic, p, -1, 1) for p in partitions])

                for resp in offset_responses:
                    broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        for (topic, partition), broker_offset in broker_offsets.items():
            broker_tags = ['topic:%s' % topic, 'partition:%s' % partition]
            broker_offset = broker_offsets.get((topic, partition))
            self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags)

        # Report the consumer
        for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items():

            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))

            # Report the consumer offset and lag
            tags = ['topic:%s' % topic, 'partition:%s' % partition,
                    'consumer_group:%s' % consumer_group]
            self.gauge('kafka.consumer_offset', consumer_offset, tags=tags)
            self.gauge('kafka.consumer_lag', broker_offset - consumer_offset,
                       tags=tags)
Exemplo n.º 8
0
    def check(self, instance):
        consumer_groups = self.read_config(instance, 'consumer_groups',
                                           cast=self._validate_consumer_groups)
        zk_connect_str = self.read_config(instance, 'zk_connect_str')
        kafka_host_ports = self.read_config(instance, 'kafka_connect_str')

        # Construct the Zookeeper path pattern
        zk_prefix = instance.get('zk_prefix', '')
        zk_path_tmpl = zk_prefix + '/consumers/%s/offsets/%s/%s'

        # Connect to Zookeeper
        zk_conn = KazooClient(zk_connect_str, timeout=self.zk_timeout)
        zk_conn.start()

        try:
            # Query Zookeeper for consumer offsets
            consumer_offsets = {}
            topics = defaultdict(set)
            for consumer_group, topic_partitions in consumer_groups.iteritems():
                for topic, partitions in topic_partitions.iteritems():
                    # Remember the topic partitions that we've see so that we can
                    # look up their broker offsets later
                    topics[topic].update(set(partitions))
                    for partition in partitions:
                        zk_path = zk_path_tmpl % (consumer_group, topic, partition)
                        try:
                            consumer_offset = int(zk_conn.get(zk_path)[0])
                            key = (consumer_group, topic, partition)
                            consumer_offsets[key] = consumer_offset
                        except NoNodeError:
                            self.log.warn('No zookeeper node at %s' % zk_path)
                        except Exception:
                            self.log.exception('Could not read consumer offset from %s' % zk_path)
        finally:
            try:
                zk_conn.stop()
                zk_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Zookeeper connection')

        # Connect to Kafka
        kafka_conn = KafkaClient(kafka_host_ports, timeout=self.kafka_timeout)

        try:
            # Query Kafka for the broker offsets
            broker_offsets = {}
            for topic, partitions in topics.items():
                offset_responses = kafka_conn.send_offset_request([
                    OffsetRequest(topic, p, -1, 1) for p in partitions])

                for resp in offset_responses:
                    broker_offsets[(resp.topic, resp.partition)] = resp.offsets[0]
        finally:
            try:
                kafka_conn.close()
            except Exception:
                self.log.exception('Error cleaning up Kafka connection')

        # Report the broker data
        for (topic, partition), broker_offset in broker_offsets.items():
            broker_tags = ['topic:%s' % topic, 'partition:%s' % partition]
            broker_offset = broker_offsets.get((topic, partition))
            self.gauge('kafka.broker_offset', broker_offset, tags=broker_tags)

        # Report the consumer
        for (consumer_group, topic, partition), consumer_offset in consumer_offsets.items():

            # Get the broker offset
            broker_offset = broker_offsets.get((topic, partition))

            # Report the consumer offset and lag
            tags = ['topic:%s' % topic, 'partition:%s' % partition,
                    'consumer_group:%s' % consumer_group]
            self.gauge('kafka.consumer_offset', consumer_offset, tags=tags)
            self.gauge('kafka.consumer_lag', broker_offset - consumer_offset,
                       tags=tags)
Exemplo n.º 9
0
    def monitor(self):
        try:
            kafka_client = KafkaClient(KAFKA_HOSTS, timeout=self.timeout)
        except Exception as e:
            print "Error, cannot connect kafka broker."
            sys.exit(1)

        try:
            zookeeper_client = KazooClient(hosts=ZOO_HOSTS,
                                           read_only=True,
                                           timeout=self.timeout)
            zookeeper_client.start()
        except Exception as e:
            print "Error, cannot connect zookeeper server."
            sys.exit(1)

        for group in CONSUMER_GROUPS:
            for topic in TOPIC_LIST:
                try:
                    partition_path = 'consumers/%s/offsets/%s' % (group, topic)
                    partitions = map(
                        int,
                        zookeeper_client.get_children(self.zookeeper_url +
                                                      partition_path))

                    for partition in partitions:
                        offset_path = 'consumers/%s/offsets/%s/%s' % (
                            group, topic, partition)
                        offset = zookeeper_client.get(self.zookeeper_url +
                                                      offset_path)[0]
                        if offset is None:
                            continue

                        obj = {
                            'timestamp': self.timestamp,
                            'group': group,
                            'topic': topic,
                            'partition': int(partition),
                            'metric': 'consumerlag:%s' % group,
                            'tags':
                            'topic=%s,partition=%s' % (topic, partition),
                            'offset': int(offset)
                        }

                        self.result.append(obj)
                except NoNodeError as e:
                    print "Error, fail to get offset for group[%s], topic[%s]" % (
                        group, topic)
                    continue

        zookeeper_client.stop()

        for kafka_topic in TOPIC_LIST:
            self.kafka_logsize[kafka_topic] = {}
            try:
                partitions = kafka_client.topic_partitions[kafka_topic]
                logsize_requests = [
                    OffsetRequestPayload(kafka_topic, p, -1, 1)
                    for p in partitions.keys()
                ]

                logsize_responses = kafka_client.send_offset_request(
                    logsize_requests)

                for r in logsize_responses:
                    self.kafka_logsize[kafka_topic][r.partition] = r.offsets[0]
            except Exception as e:
                print "error to get logsize for topic: %s" % kafka_topic

        kafka_client.close()

        payload = []
        for obj in self.result:
            try:
                logsize = self.kafka_logsize[obj['topic']][obj['partition']]
                lag = int(logsize) - int(obj['offset'])
                item = {}
                item['endpoint'] = ENDPOINT
                item['metric'] = obj['metric']
                item['tags'] = obj['tags']
                item['timestamp'] = obj['timestamp']
                item['step'] = STEP
                item['value'] = lag
                item['counterType'] = 'GAUGE'

                payload.append(item)

            except Exception as e:
                print "error to compute (%s/%s/%s) lag-value" % (
                    obj['group'], obj['topic'], obj['partition'])

        # 1. Print
        print "log-lag details:"
        print payload

        # 2. report to falcon-agent
        if len(payload) > 0:
            requests.post(FALCON_AGENT_URL,
                          data=json.dumps(payload),
                          timeout=10)