Exemplo n.º 1
0
    def get_topic_config(self, topic):
        """Get configuration information for specified topic.

        :rtype : dict of configuration
        """
        try:
            config_data = load_json(
                self.get(
                    "/config/topics/{topic}".format(topic=topic)
                )[0]
            )
        except NoNodeError as e:

            # Kafka version before 0.8.1 does not have "/config/topics/<topic_name>" path in ZK and
            # if the topic exists, return default dict instead of raising an Exception.
            # Ref: https://cwiki.apache.org/confluence/display/KAFKA/Kafka+data+structures+in+Zookeeper.

            topics = self.get_topics(topic_name=topic, fetch_partition_state=False)
            if len(topics) > 0:
                _log.info("Configuration not available for topic {topic}.".format(topic=topic))
                config_data = {"config": {}}
            else:
                _log.error(
                    "topic {topic} not found.".format(topic=topic)
                )
                raise e
        return config_data
Exemplo n.º 2
0
    def _get_entity_config(self, entity_type, entity_name, entity_exists):
        """Get configuration information for specified broker.

        :entity_type : "brokers" or "topics"
        :entity_name : broker id or topic name
        :entity_exists : fn(entity_name) -> bool to determine whether an entity
                            exists. used to determine whether to throw an exception
                            when a configuration cannot be found for the given entity_name
        :rtype : dict of configuration
        """
        assert entity_type in (
            "brokers", "topics"), "Supported entities are brokers and topics"

        try:
            config_data = load_json(
                self.get("/config/{entity_type}/{entity_name}".format(
                    entity_type=entity_type, entity_name=entity_name))[0])
        except NoNodeError as e:
            if entity_exists(entity_name):
                _log.info(
                    "Configuration not available for {entity_type} {entity_name}."
                    .format(
                        entity_type=entity_type,
                        entity_name=entity_name,
                    ))
                config_data = {"config": {}}
            else:
                _log.error("{entity_type} {entity_name} not found".format(
                    entity_type=entity_type, entity_name=entity_name))
                raise e

        return config_data
Exemplo n.º 3
0
    def get_topic_config(self, topic):
        """Get configuration information for specified topic.

        :rtype : dict of configuration
        """
        try:
            config_data = load_json(
                self.get("/config/topics/{topic}".format(topic=topic))[0])
        except NoNodeError as e:

            # Kafka version before 0.8.1 does not have "/config/topics/<topic_name>" path in ZK and
            # if the topic exists, return default dict instead of raising an Exception.
            # Ref: https://cwiki.apache.org/confluence/display/KAFKA/Kafka+data+structures+in+Zookeeper.

            topics = self.get_topics(topic_name=topic,
                                     fetch_partition_state=False)
            if len(topics) > 0:
                _log.info(
                    "Configuration not available for topic {topic}.".format(
                        topic=topic))
                config_data = {"config": {}}
            else:
                _log.error("topic {topic} not found.".format(topic=topic))
                raise e
        return config_data
Exemplo n.º 4
0
 def get_broker_metadata(self, broker_id):
     try:
         broker_json, _ = self.get(
             "/brokers/ids/{b_id}".format(b_id=broker_id))
     except NoNodeError:
         _log.error("broker '{b_id}' not found.".format(b_id=broker_id), )
         raise
     return load_json(broker_json)
Exemplo n.º 5
0
 def get_pending_plan(self):
     """Read the currently running plan on reassign_partitions node."""
     reassignment_path = '{admin}/{reassignment_node}'\
         .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE)
     try:
         result = self.get(reassignment_path)
         return load_json(result[0])
     except NoNodeError:
         return {}
Exemplo n.º 6
0
 def get_pending_plan(self):
     """Read the currently running plan on reassign_partitions node."""
     reassignment_path = '{admin}/{reassignment_node}'\
         .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE)
     try:
         result = self.get(reassignment_path)
         return load_json(result[0])
     except NoNodeError:
         return {}
Exemplo n.º 7
0
 def _fetch_partition_state(self, topic_id, partition_id):
     """Fetch partition-state for given topic-partition."""
     state_path = "/brokers/topics/{topic_id}/partitions/{p_id}/state"
     try:
         partition_json, _ = self.get(
             state_path.format(topic_id=topic_id, p_id=partition_id),
         )
         return load_json(partition_json)
     except NoNodeError:
         return {}  # The partition has no data
Exemplo n.º 8
0
 def get_broker_metadata(self, broker_id):
     try:
         broker_json, _ = self.get(
             "/brokers/ids/{b_id}".format(b_id=broker_id)
         )
     except NoNodeError:
         _log.error(
             "broker '{b_id}' not found.".format(b_id=broker_id),
         )
         raise
     return load_json(broker_json)
Exemplo n.º 9
0
    def get_topic_config(self, topic):
        """Get configuration information for specified topic.

        :rtype : dict of configuration
        """
        try:
            config_data = load_json(
                self.get("/config/topics/{topic}".format(topic=topic))[0])
        except NoNodeError as e:
            _log.error("topic {topic} not found.".format(topic=topic))
            raise e
        return config_data
Exemplo n.º 10
0
 def get_broker_metadata(self, broker_id):
     try:
         broker_json = load_json(
             self.get("/brokers/ids/{b_id}".format(b_id=broker_id))[0])
         if (broker_json['host'] is None):
             pattern = '(?:[SSL|INTERNAL|PLAINTEXTSASL].*://)?(?P<host>[^:/ ]+).?(?P<port>[0-9]*).*'
             result = re.search(pattern, broker_json['endpoints'][0])
             broker_json['host'] = result.group('host')
     except NoNodeError:
         _log.error("broker '{b_id}' not found.".format(b_id=broker_id), )
         raise
     return broker_json
Exemplo n.º 11
0
 def execute_plan(self,
                  plan,
                  allow_rf_change=False,
                  allow_rf_mismatch=False):
     """Submit reassignment plan for execution."""
     reassignment_path = '{admin}/{reassignment_node}'\
         .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE)
     plan_json = dump_json(plan)
     topic_names_from_proposed_plan = set()
     for partition in plan['partitions']:
         topic_names_from_proposed_plan.add(partition['topic'])
     base_plan = self.get_cluster_plan(
         topic_names=list(topic_names_from_proposed_plan))
     if not validate_plan(plan,
                          base_plan,
                          allow_rf_change=allow_rf_change,
                          allow_rf_mismatch=allow_rf_mismatch):
         _log.error(
             'Given plan is invalid. Aborting new reassignment plan ... {plan}'
             .format(plan=plan))
         return False
     # Send proposed-plan to zookeeper
     try:
         _log.info('Sending plan to Zookeeper...')
         self.create(reassignment_path, plan_json, makepath=True)
         _log.info(
             'Re-assign partitions node in Zookeeper updated successfully '
             'with {plan}'.format(plan=plan), )
         return True
     except NodeExistsError:
         _log.warning('Previous plan in progress. Exiting..')
         _log.warning(
             'Aborting new reassignment plan... {plan}'.format(plan=plan))
         in_progress_plan = load_json(self.get(reassignment_path)[0])
         in_progress_partitions = [
             '{topic}-{p_id}'.format(
                 topic=p_data['topic'],
                 p_id=str(p_data['partition']),
             ) for p_data in in_progress_plan['partitions']
         ]
         _log.warning(
             '{count} partition(s) reassignment currently in progress:-'.
             format(count=len(in_progress_partitions)), )
         _log.warning(
             '{partitions}. In Progress reassignment plan...'.format(
                 partitions=', '.join(in_progress_partitions), ), )
         return False
     except Exception as e:
         _log.error(
             'Could not re-assign partitions {plan}. Error: {e}'.format(
                 plan=plan, e=e), )
         return False
Exemplo n.º 12
0
 def execute_plan(self, plan, allow_rf_change=False):
     """Submit reassignment plan for execution."""
     reassignment_path = '{admin}/{reassignment_node}'\
         .format(admin=ADMIN_PATH, reassignment_node=REASSIGNMENT_NODE)
     plan_json = dump_json(plan)
     base_plan = self.get_cluster_plan()
     if not validate_plan(plan, base_plan, allow_rf_change=allow_rf_change):
         _log.error('Given plan is invalid. Aborting new reassignment plan ... {plan}'.format(plan=plan))
         return False
     # Send proposed-plan to zookeeper
     try:
         _log.info('Sending plan to Zookeeper...')
         self.create(reassignment_path, plan_json, makepath=True)
         _log.info(
             'Re-assign partitions node in Zookeeper updated successfully '
             'with {plan}'.format(plan=plan),
         )
         return True
     except NodeExistsError:
         _log.warning('Previous plan in progress. Exiting..')
         _log.warning('Aborting new reassignment plan... {plan}'.format(plan=plan))
         in_progress_plan = load_json(self.get(reassignment_path)[0])
         in_progress_partitions = [
             '{topic}-{p_id}'.format(
                 topic=p_data['topic'],
                 p_id=str(p_data['partition']),
             )
             for p_data in in_progress_plan['partitions']
         ]
         _log.warning(
             '{count} partition(s) reassignment currently in progress:-'
             .format(count=len(in_progress_partitions)),
         )
         _log.warning(
             '{partitions}. In Progress reassignment plan...'.format(
                 partitions=', '.join(in_progress_partitions),
             ),
         )
         return False
     except Exception as e:
         _log.error(
             'Could not re-assign partitions {plan}. Error: {e}'
             .format(plan=plan, e=e),
         )
         return False
Exemplo n.º 13
0
 def get_json(self, path, watch=None):
     """Reads the data of the specified node and converts it to json."""
     data, _ = self.get(path, watch)
     return load_json(data) if data else None
Exemplo n.º 14
0
    def get_group_offsets(self, group, topic=None):
        """Fetch group offsets for given topic and partition otherwise all topics
        and partitions otherwise.


        {
            'topic':
            {
                'partition': offset-value,
                ...
                ...
            }
        }
        """
        group_offsets = {}
        try:
            all_topics = self.get_my_subscribed_topics(group)
        except NoNodeError:
            # No offset information of given consumer-group
            _log.warning(
                "No topics subscribed to consumer-group {group}.".format(
                    group=group, ), )
            return group_offsets
        if topic:
            if topic in all_topics:
                topics = [topic]
            else:
                _log.error(
                    "Topic {topic} not found in topic list {topics} for consumer"
                    "-group {consumer_group}.".format(
                        topic=topic,
                        topics=', '.join(topic for topic in all_topics),
                        consumer_group=group,
                    ), )
                return group_offsets
        else:
            topics = all_topics
        for topic in topics:
            group_offsets[topic] = {}
            try:
                partitions = self.get_my_subscribed_partitions(group, topic)
            except NoNodeError:
                _log.warning(
                    "No partition offsets found for topic {topic}. "
                    "Continuing to next one...".format(topic=topic), )
                continue
            # Fetch offsets for each partition
            for partition in partitions:
                path = "/consumers/{group_id}/offsets/{topic}/{partition}".format(
                    group_id=group,
                    topic=topic,
                    partition=partition,
                )
                try:
                    # Get current offset
                    offset_json, _ = self.get(path)
                    group_offsets[topic][partition] = load_json(offset_json)
                except NoNodeError:
                    _log.error("Path {path} not found".format(path=path))
                    raise
        return group_offsets
Exemplo n.º 15
0
    def get_multiple_topics(
        self,
        topic_names=None,
        names_only=False,
        fetch_partition_state=True,
    ):
        """Get information on all the available topics.

        Topic-data format with fetch_partition_state as False :-
        topic_data = {
            'version': 1,
            'partitions': {
                <p_id>: {
                    replicas: <broker-ids>
                }
            }
        }

        Topic-data format with fetch_partition_state as True:-
        topic_data = {
            'version': 1,
            'ctime': <timestamp>,
            'partitions': {
                <p_id>:{
                    replicas: [<broker_id>, <broker_id>, ...],
                    isr: [<broker_id>, <broker_id>, ...],
                    controller_epoch: <val>,
                    leader_epoch: <val>,
                    version: 1,
                    leader: <broker-id>,
                    ctime: <timestamp>,
                }
            }
        }
        Note: By default we also fetch partition-state which results in
        accessing the zookeeper twice. If just partition-replica information is
        required fetch_partition_state should be set to False.
        """
        try:
            if not topic_names:
                topic_names = self.get_children("/brokers/topics")
        except NoNodeError:
            _log.error("Cluster is empty.")
            return {}

        if names_only:
            return topic_names
        topics_data = {}
        for topic_id in topic_names:
            try:
                topic_info = self.get(
                    "/brokers/topics/{id}".format(id=topic_id))
                topic_data = load_json(topic_info[0])
                topic_ctime = topic_info[1].ctime / 1000.0
                topic_data['ctime'] = topic_ctime
            except NoNodeError:
                _log.info(
                    "topic '{topic}' not found.".format(topic=topic_id), )
                return {}
            # Prepare data for each partition
            partitions_data = {}
            for p_id, replicas in six.iteritems(topic_data['partitions']):
                partitions_data[p_id] = {}
                if fetch_partition_state:
                    # Fetch partition-state from zookeeper
                    partition_state = self._fetch_partition_state(
                        topic_id, p_id)
                    partitions_data[p_id] = load_json(partition_state[0])
                    partitions_data[p_id][
                        'ctime'] = partition_state[1].ctime / 1000.0
                else:
                    # Fetch partition-info from zookeeper
                    partition_info = self._fetch_partition_info(topic_id, p_id)
                    partitions_data[p_id][
                        'ctime'] = partition_info.ctime / 1000.0
                partitions_data[p_id]['replicas'] = replicas
            topic_data['partitions'] = partitions_data
            topics_data[topic_id] = topic_data
        return topics_data
Exemplo n.º 16
0
    def get_topics(
        self,
        topic_name=None,
        names_only=False,
        fetch_partition_state=True,
    ):
        """Get information on all the available topics.

        Topic-data format with fetch_partition_state as False :-
        topic_data = {
            'version': 1,
            'partitions': {
                <p_id>: {
                    replicas: <broker-ids>
                }
            }
        }

        Topic-data format with fetch_partition_state as True:-
        topic_data = {
            'version': 1,
            'ctime': <timestamp>,
            'partitions': {
                <p_id>:{
                    replicas: [<broker_id>, <broker_id>, ...],
                    isr: [<broker_id>, <broker_id>, ...],
                    controller_epoch: <val>,
                    leader_epoch: <val>,
                    version: 1,
                    leader: <broker-id>,
                    ctime: <timestamp>,
                }
            }
        }
        Note: By default we also fetch partition-state which results in
        accessing the zookeeper twice. If just partition-replica information is
        required fetch_partition_state should be set to False.
        """
        try:
            topic_ids = [topic_name] if topic_name else self.get_children(
                "/brokers/topics",
            )
        except NoNodeError:
            _log.error(
                "Cluster is empty."
            )
            return {}

        if names_only:
            return topic_ids
        topics_data = {}
        for topic_id in topic_ids:
            try:
                topic_info = self.get("/brokers/topics/{id}".format(id=topic_id))
                topic_data = load_json(topic_info[0])
                topic_ctime = topic_info[1].ctime / 1000.0
                topic_data['ctime'] = topic_ctime
            except NoNodeError:
                _log.info(
                    "topic '{topic}' not found.".format(topic=topic_id),
                )
                return {}
            # Prepare data for each partition
            partitions_data = {}
            for p_id, replicas in six.iteritems(topic_data['partitions']):
                partitions_data[p_id] = {}
                if fetch_partition_state:
                    # Fetch partition-state from zookeeper
                    partition_state = self._fetch_partition_state(topic_id, p_id)
                    partitions_data[p_id] = load_json(partition_state[0])
                    partitions_data[p_id]['ctime'] = partition_state[1].ctime / 1000.0
                else:
                    # Fetch partition-info from zookeeper
                    partition_info = self._fetch_partition_info(topic_id, p_id)
                    partitions_data[p_id]['ctime'] = partition_info.ctime / 1000.0
                partitions_data[p_id]['replicas'] = replicas
            topic_data['partitions'] = partitions_data
            topics_data[topic_id] = topic_data
        return topics_data
Exemplo n.º 17
0
    def get_group_offsets(self, group, topic=None):
        """Fetch group offsets for given topic and partition otherwise all topics
        and partitions otherwise.


        {
            'topic':
            {
                'partition': offset-value,
                ...
                ...
            }
        }
        """
        group_offsets = {}
        try:
            all_topics = self.get_my_subscribed_topics(group)
        except NoNodeError:
            # No offset information of given consumer-group
            _log.warning(
                "No topics subscribed to consumer-group {group}.".format(
                    group=group,
                ),
            )
            return group_offsets
        if topic:
            if topic in all_topics:
                topics = [topic]
            else:
                _log.error(
                    "Topic {topic} not found in topic list {topics} for consumer"
                    "-group {consumer_group}.".format(
                        topic=topic,
                        topics=', '.join(topic for topic in all_topics),
                        consumer_group=group,
                    ),
                )
                return group_offsets
        else:
            topics = all_topics
        for topic in topics:
            group_offsets[topic] = {}
            try:
                partitions = self.get_my_subscribed_partitions(group, topic)
            except NoNodeError:
                _log.warning(
                    "No partition offsets found for topic {topic}. "
                    "Continuing to next one...".format(topic=topic),
                )
                continue
            # Fetch offsets for each partition
            for partition in partitions:
                path = "/consumers/{group_id}/offsets/{topic}/{partition}".format(
                    group_id=group,
                    topic=topic,
                    partition=partition,
                )
                try:
                    # Get current offset
                    offset_json, _ = self.get(path)
                    group_offsets[topic][partition] = load_json(offset_json)
                except NoNodeError:
                    _log.error("Path {path} not found".format(path=path))
                    raise
        return group_offsets
Exemplo n.º 18
0
 def get_json(self, path, watch=None):
     """Reads the data of the specified node and converts it to json."""
     data, _ = self.get(path, watch)
     return load_json(data) if data else None