Python ConsumerStats示例，brod.base.ConsumerStats Python示例

示例#1

0

显示文件

文件： f389b82b611152c237400726a4298a11d7c6c981simple.py 项目： isabella232/brod

    def __init__(self, topic, broker_partitions, end_broker_partitions=None):
        """If broker_partitions is a list of BrokerPartitions, we assume that
        we'll start at the latest offset. If broker_partitions is a mapping of
        BrokerPartitions to offsets, we'll start at those offsets."""
        self._topic = topic
        self._broker_partitions = sorted(broker_partitions)
        self._stats = defaultdict(
            lambda: ConsumerStats(fetches=0, bytes=0, messages=0, max_fetch=0))
        self._bps_to_next_offsets = broker_partitions

        # This will collapse duplicaets so we only have one conn per host/port
        broker_conn_info = frozenset(
            (bp.broker_id, bp.host, bp.port) for bp in self._broker_partitions)
        self._connections = dict((broker_id, Kafka(host, port))
                                 for broker_id, host, port in broker_conn_info)

        # Figure out where we're going to start from...
        if isinstance(broker_partitions, Mapping):
            self._bps_to_next_offsets = broker_partitions
        else:
            self._bps_to_next_offsets = dict(
                (bp,
                 self._connections[bp].latest_offset(bp.topic, bp.partition))
                for bp in broker_partitions)

        self._end_broker_partitions = end_broker_partitions or {}

示例#2

0

显示文件

    def __init__(self, zk_conn, consumer_group, topic, autocommit=True):
        """FIXME: switch arg order and default zk_conn to localhost?"""
        # Simple attributes we return as properties
        self._id = self._create_consumer_id(consumer_group)
        self._topic = topic
        self._consumer_group = consumer_group
        self._autocommit = autocommit

        # Internal vars
        self._zk_util = ZKUtil(zk_conn)
        self._needs_rebalance = True
        self._broker_partitions = []  # Updated during rebalancing
        self._bps_to_next_offsets = {}  # Updated after a successful fetch
        self._rebalance_enabled = True  # Only used for debugging purposes

        # These are to handle ZooKeeper notification subscriptions.
        self._topic_watch = None
        self._topics_watch = None
        self._consumers_watch = None
        self._brokers_watch = None

        # Register ourselves with ZK so other Consumers know we're active.
        self._register()

        # Force a rebalance so we know which broker-partitions we own
        self.rebalance()

        self._stats = ConsumerStats(fetches=0,
                                    bytes=0,
                                    messages=0,
                                    max_fetch=0)

示例#3

0

显示文件

文件： f389b82b611152c237400726a4298a11d7c6c981simple.py 项目： isabella232/brod

    def stats(self):
        ''' Returns the aggregate of the stats from all the broker partitions
        '''
        fetches = 0
        bytes = 0
        messages = 0
        max_fetch = 0
        for stats in self._stats.values():
            fetches += stats.fetches
            bytes += stats.bytes
            messages += stats.messages
            max_fetch = max(max_fetch, stats.max_fetch)

        return ConsumerStats(fetches, bytes, messages, max_fetch)

示例#4

0

显示文件

文件： f389b82b611152c237400726a4298a11d7c6c981simple.py 项目： isabella232/brod

    def fetch(self, max_size=None, min_size=None, fetch_step=None):
        log.debug("Fetch called on SimpleConsumer {0}".format(self.id))
        bps_to_offsets = self._bps_to_next_offsets

        # Do all the fetches we need to (this should get replaced with
        # multifetch or performance is going to suck wind later)...
        message_sets = []
        # We only iterate over those broker partitions for which we have offsets
        for bp in bps_to_offsets:
            offset = bps_to_offsets[bp]
            kafka = self._connections[bp.broker_id]

            offsets_msgs = kafka.fetch(bp.topic,
                                       offset,
                                       partition=bp.partition,
                                       min_size=min_size,
                                       max_size=max_size,
                                       fetch_step=fetch_step)

            msg_set = MessageSet(bp, offset, offsets_msgs)

            # fetches bytes messages max_fetch
            old_stats = self._stats[bp]
            self._stats[bp] = ConsumerStats(
                fetches=old_stats.fetches + 1,
                bytes=old_stats.bytes + msg_set.size,
                messages=old_stats.messages + len(msg_set),
                max_fetch=max(old_stats.max_fetch, msg_set.size))

            message_sets.append(msg_set)

        if message_sets:
            result = FetchResult(sorted(message_sets))
        else:
            result = FetchResult([])

        # Filter out broker partitions whose end offsets we've exceeded
        self._bps_to_next_offsets = {}
        for msg_result in result:
            bp = msg_result.broker_partition
            next_offset = msg_result.next_offset
            end_offset = self._end_broker_partitions.get(bp, None)

            if end_offset is None or next_offset <= end_offset:
                self._bps_to_next_offsets[bp] = next_offset

        return result

示例#5

0

显示文件

    def fetch(self, max_size=None, retry_limit=3, ignore_failures=False):
        """Return a FetchResult, which can be iterated over as a list of 
        MessageSets. A MessageSet is returned for every broker partition that
        is successfully queried, even if that MessageSet is empty.

        FIXME: This is where the adjustment needs to happen. Regardless of 
        whether a rebalance has occurred or not, we can very easily see if we
        are still responsible for the same partitions as we were the last time
        we ran, and set self._bps_to_next_offsets --> we just need to check if
        it's not None and if we still have the same offsets, and adjust 
        accordingly.
        """
        log.debug("Fetch called on ZKConsumer {0}".format(self.id))
        if self._needs_rebalance:
            self.rebalance()

        # Find where we're starting from...
        offsets_pulled_from_zk = False
        if self._bps_to_next_offsets:
            # We've already done a fetch, we use our internal value. This is
            # also all we can do in the case where autocommit is off, since any
            # value in ZK will be out of date
            bps_to_offsets = self._bps_to_next_offsets
        else:
            # In this case, it's our first fetch, and we need to ask ZooKeeper
            # for our start value. That being said, if the value from ZooKeeper
            # is out of range for any given partition, we'll simply start at the
            # most recent value for that partition.
            bps_to_offsets = self._zk_util.offsets_for(self.consumer_group,
                                                       self._id,
                                                       self.broker_partitions)
            offsets_pulled_from_zk = True

        # Do all the fetches we need to (this should get replaced with
        # multifetch or performance is going to suck wind later)...
        message_sets = []
        # We only iterate over those broker partitions for which we have offsets
        for bp in bps_to_offsets:
            offset = bps_to_offsets[bp]
            kafka = self._connections[bp.broker_id]
            partition = kafka.partition(bp.topic, bp.partition)

            if offset is None:
                offset = partition.latest_offset()

            try:
                offsets_msgs = kafka.fetch(bp.topic,
                                           offset,
                                           partition=bp.partition,
                                           max_size=max_size)

            # If our fetch fails because it's out of range, and the values came
            # from ZK originally (not our internal incrementing), we assume ZK
            # is somehow stale, so we just grab the latest and march on.
            except OffsetOutOfRange as ex:
                if offsets_pulled_from_zk:
                    log.error(
                        "Offset {0} from ZooKeeper is out of range for {1}".
                        format(offset, bp))
                    offset = partition.latest_offset()
                    log.error("Retrying with offset {0} for {1}".format(
                        offset, bp))
                    offsets_msgs = kafka.fetch(bp.topic,
                                               offset,
                                               partition=bp.partition,
                                               max_size=max_size)
                else:
                    raise
            except KafkaError as k_err:
                if ignore_failures:
                    log.error("Ignoring failed fetch on {0}".format(bp))
                    log.exception(k_err)
                    continue
                else:
                    raise

            message_sets.append(MessageSet(bp, offset, offsets_msgs))

        result = FetchResult(sorted(message_sets))

        # Now persist our new offsets
        for msg_set in result:
            self._bps_to_next_offsets[
                msg_set.broker_partition] = msg_set.next_offset

        if self._autocommit:
            self.commit_offsets()

        old_stats = self._stats  # fetches bytes messages max_fetch
        self._stats = ConsumerStats(
            fetches=old_stats.fetches + 1,
            bytes=old_stats.bytes + result.num_bytes,
            messages=old_stats.messages + result.num_messages,
            max_fetch=max(old_stats.max_fetch, result.num_bytes))
        return result

示例#6

0

显示文件

文件： zk.py 项目： isabella232/brod

    def fetch(self, max_size=None, retry_limit=3, ignore_failures=False):
        """Return a FetchResult, which can be iterated over as a list of 
        MessageSets. A MessageSet is returned for every broker partition that
        is successfully queried, even if that MessageSet is empty.

        FIXME: This is where the adjustment needs to happen. Regardless of 
        whether a rebalance has occurred or not, we can very easily see if we
        are still responsible for the same partitions as we were the last time
        we ran, and set self._bps_to_next_offsets --> we just need to check if
        it's not None and if we still have the same offsets, and adjust 
        accordingly.
        """
        def needs_offset_values_from_zk(bps_to_offsets):
            """We need to pull offset values from ZK if we have no 
            BrokerPartitions in our BPs -> Offsets mapping, or if some of those
            Offsets are unknown (None)"""
            return (not bps_to_offsets) or (None in bps_to_offsets.values())

        log.debug("Fetch called on ZKConsumer {0}".format(self.id))
        if self._needs_rebalance:
            self.rebalance()

        # Find where we're starting from. If we've already done a fetch, we use
        # our internal value. This is also all we can do in the case where
        # autocommit is off, since any value in ZK will be out of date.
        bps_to_offsets = dict(self._bps_to_next_offsets)
        offsets_pulled_from_zk = False

        if needs_offset_values_from_zk(bps_to_offsets):
            # We have some offsets, but we've been made responsible for new
            # BrokerPartitions that we need to lookup.
            if bps_to_offsets:
                bps_needing_offsets = [
                    bp for bp, offset in bps_to_offsets.items()
                    if offset is None
                ]
            # Otherwise, it's our first fetch, so we need everything
            else:
                bps_needing_offsets = self.broker_partitions

            bps_to_offsets.update(
                self._zk_util.offsets_for(self.consumer_group, self._id,
                                          bps_needing_offsets))
            offsets_pulled_from_zk = True

        # Do all the fetches we need to (this should get replaced with
        # multifetch or performance is going to suck wind later)...
        message_sets = []
        # We only iterate over those broker partitions for which we have offsets
        for bp in bps_to_offsets:
            offset = bps_to_offsets[bp]
            kafka = self._connections[bp.broker_id]
            partition = kafka.partition(bp.topic, bp.partition)

            if offset is None:
                offset = partition.latest_offset()

            try:
                offsets_msgs = kafka.fetch(bp.topic,
                                           offset,
                                           partition=bp.partition,
                                           max_size=max_size)

            # If our fetch fails because it's out of range, and the values came
            # from ZK originally (not our internal incrementing), we assume ZK
            # is somehow stale, so we just grab the latest and march on.
            except OffsetOutOfRange as ex:
                if offsets_pulled_from_zk:
                    log.error(
                        "Offset {0} from ZooKeeper is out of range for {1}".
                        format(offset, bp))
                    offset = partition.latest_offset()
                    log.error("Retrying with offset {0} for {1}".format(
                        offset, bp))
                    offsets_msgs = kafka.fetch(bp.topic,
                                               offset,
                                               partition=bp.partition,
                                               max_size=max_size)
                else:
                    raise
            except KafkaError as k_err:
                if ignore_failures:
                    log.error("Ignoring failed fetch on {0}".format(bp))
                    log.exception(k_err)
                    continue
                else:
                    raise

            msg_set = MessageSet(bp, offset, offsets_msgs)

            # fetches bytes messages max_fetch
            old_stats = self._stats[bp]
            self._stats[bp] = ConsumerStats(
                fetches=old_stats.fetches + 1,
                bytes=old_stats.bytes + msg_set.size,
                messages=old_stats.messages + len(msg_set),
                max_fetch=max(old_stats.max_fetch, msg_set.size))

            message_sets.append(msg_set)

        result = FetchResult(sorted(message_sets))

        # Now persist our new offsets
        for msg_set in result:
            self._bps_to_next_offsets[
                msg_set.broker_partition] = msg_set.next_offset

        if self._autocommit:
            self.commit_offsets()

        return result