示例#1
0
    def update_metadata(self, metadata):
        # In the common case where we ask for a single topic and get back an
        # error, we should fail the future
        if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
            error_code, topic, _ = metadata.topics[0]
            error = Errors.for_code(error_code)(topic)
            return self.failed_update(error)

        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse")

        for node_id, host, port in metadata.brokers:
            self._brokers.update({
                node_id: BrokerMetadata(node_id, host, port)
            })

        # Drop any UnknownTopic, InvalidTopic, and TopicAuthorizationFailed
        # but retain LeaderNotAvailable because it means topic is initializing
        self._partitions.clear()
        self._broker_partitions.clear()

        for error_code, topic, partitions in metadata.topics:
            error_type = Errors.for_code(error_code)
            if error_type is Errors.NoError:
                self._partitions[topic] = {}
                for _, partition, leader, _, _ in partitions:
                    self._partitions[topic][partition] = leader
                    if leader != -1:
                        self._broker_partitions[leader].add(TopicPartition(topic, partition))
            elif error_type is Errors.LeaderNotAvailableError:
                log.error("Topic %s is not available during auto-create"
                          " initialization", topic)
            elif error_type is Errors.UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is Errors.TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
            elif error_type is Errors.InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s",
                          topic, error_type)

        if self._future:
            self._future.success(self)
        self._future = None
        self._need_update = False
        self._version += 1
        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now
        log.debug("Updated cluster metadata version %d to %s",
                  self._version, self)

        for listener in self._listeners:
            listener(self)
示例#2
0
 def _handle_heartbeat_response(self, future, response):
     #self.sensors.heartbeat_latency.record(response.requestLatencyMs())
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.debug("Received successful heartbeat response.")
         future.success(None)
     elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                         Errors.NotCoordinatorForGroupError):
         log.info("Heartbeat failed: coordinator is either not started or"
                  " not valid; will refresh metadata and retry")
         self.coordinator_dead()
         future.failure(error_type())
     elif error_type is Errors.RebalanceInProgressError:
         log.info("Heartbeat failed: group is rebalancing; re-joining group")
         self.rejoin_needed = True
         future.failure(error_type())
     elif error_type is Errors.IllegalGenerationError:
         log.info("Heartbeat failed: local generation id is not current;"
                  " re-joining group")
         self.rejoin_needed = True
         future.failure(error_type())
     elif error_type is Errors.UnknownMemberIdError:
         log.info("Heartbeat failed: local member_id was not recognized;"
                  " resetting and re-joining group")
         self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID
         self.rejoin_needed = True
         future.failure(error_type)
     elif error_type is Errors.GroupAuthorizationFailedError:
         error = error_type(self.group_id)
         log.error("Heartbeat failed: authorization error: %s", error)
         future.failure(error)
     else:
         error = error_type()
         log.error("Heartbeat failed: Unhandled error: %s", error)
         future.failure(error)
 def _handle_offset_fetch_response(self, future, response):
     offsets = {}
     for topic, partitions in response.topics:
         for partition, offset, metadata, error_code in partitions:
             tp = TopicPartition(topic, partition)
             error_type = Errors.for_code(error_code)
             if error_type is not Errors.NoError:
                 error = error_type()
                 log.debug("Error fetching offset for %s: %s", tp, error_type())
                 if error_type is Errors.GroupLoadInProgressError:
                     # just retry
                     future.failure(error)
                 elif error_type is Errors.NotCoordinatorForGroupError:
                     # re-discover the coordinator and retry
                     self.coordinator_dead()
                     future.failure(error)
                 elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError):
                     # need to re-join group
                     self._subscription.mark_for_reassignment()
                     future.failure(error)
                 elif error_type is Errors.UnknownTopicOrPartitionError:
                     log.warning("OffsetFetchRequest -- unknown topic %s", topic)
                     continue
                 else:
                     log.error("Unknown error fetching offsets for %s: %s", tp, error)
                     future.failure(error)
                 return
             elif offset >= 0:
                 # record the position with the offset (-1 indicates no committed offset to fetch)
                 offsets[tp] = OffsetAndMetadata(offset, metadata)
             else:
                 log.debug("No committed offset for partition %s", tp)
     future.success(offsets)
示例#4
0
    def _handle_sync_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            log.debug("Received successful sync group response for group %s: %s",
                      self.group_id, response)
            #self.sensors.syncLatency.record(response.requestLatencyMs())
            future.success(response.member_assignment)
            return

        # Always rejoin on error
        self.rejoin_needed = True
        if error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        elif error_type is Errors.RebalanceInProgressError:
            log.info("SyncGroup for group %s failed due to coordinator"
                     " rebalance, rejoining the group", self.group_id)
            future.failure(error_type(self.group_id))
        elif error_type in (Errors.UnknownMemberIdError,
                            Errors.IllegalGenerationError):
            error = error_type()
            log.info("SyncGroup for group %s failed due to %s,"
                     " rejoining the group", self.group_id, error)
            self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            error = error_type()
            log.info("SyncGroup for group %s failed due to %s, will find new"
                     " coordinator and rejoin", self.group_id, error)
            self.coordinator_dead()
            future.failure(error)
        else:
            error = error_type()
            log.error("Unexpected error from SyncGroup: %s", error)
            future.failure(error)
示例#5
0
    def _handle_offset_response(self, partition, future, response):
        """Callback for the response of the list offset call above.

        Arguments:
            partition (TopicPartition): The partition that was fetched
            future (Future): the future to update based on response
            response (OffsetResponse): response from the server

        Raises:
            AssertionError: if response does not match partition
        """
        topic, partition_info = response.topics[0]
        assert len(response.topics) == 1 and len(partition_info) == 1, (
            'OffsetResponse should only be for a single topic-partition')

        part, error_code, offsets = partition_info[0]
        assert topic == partition.topic and part == partition.partition, (
            'OffsetResponse partition does not match OffsetRequest partition')

        error_type = Errors.for_code(error_code)
        if error_type is Errors.NoError:
            assert len(offsets) == 1, 'Expected OffsetResponse with one offset'
            offset = offsets[0]
            log.debug("Fetched offset %d for partition %s", offset, partition)
            future.success(offset)
        elif error_type in (Errors.NotLeaderForPartitionError,
                       Errors.UnknownTopicOrPartitionError):
            log.warning("Attempt to fetch offsets for partition %s failed due"
                        " to obsolete leadership information, retrying.",
                        partition)
            future.failure(error_type(partition))
        else:
            log.error("Attempt to fetch offsets for partition %s failed due to:"
                      " %s", partition, error_type)
            future.failure(error_type(partition))
示例#6
0
    def _proc_offset_request(self, partition, timestamp):
        """Fetch a single offset before the given timestamp for the partition.

        Arguments:
            partition (TopicPartition): partition that needs fetching offset
            timestamp (int): timestamp for fetching offset

        Returns:
            Future: resolves to the corresponding offset
        """
        node_id = self._client.cluster.leader_for_partition(partition)
        if node_id is None:
            log.debug("Partition %s is unknown for fetching offset,"
                      " wait for metadata refresh", partition)
            raise Errors.StaleMetadata(partition)
        elif node_id == -1:
            log.debug(
                "Leader for partition %s unavailable for fetching offset,"
                " wait for metadata refresh", partition)
            raise Errors.LeaderNotAvailableError(partition)

        request = OffsetRequest(
            -1, [(partition.topic, [(partition.partition, timestamp, 1)])]
        )

        if not (yield from self._client.ready(node_id)):
            raise Errors.NodeNotReadyError(node_id)

        response = yield from self._client.send(node_id, request)

        topic, partition_info = response.topics[0]
        assert len(response.topics) == 1 and len(partition_info) == 1, (
            'OffsetResponse should only be for a single topic-partition')

        part, error_code, offsets = partition_info[0]
        assert topic == partition.topic and part == partition.partition, (
            'OffsetResponse partition does not match OffsetRequest partition')

        error_type = Errors.for_code(error_code)
        if error_type is Errors.NoError:
            if not offsets:
                return -1
            assert len(offsets) == 1, 'Expected OffsetResponse with one offset'
            offset = offsets[0]
            log.debug("Fetched offset %d for partition %s", offset, partition)
            return offset
        elif error_type in (Errors.NotLeaderForPartitionError,
                            Errors.UnknownTopicOrPartitionError):
            log.warning("Attempt to fetch offsets for partition %s failed due"
                        " to obsolete leadership information, retrying.",
                        partition)
            raise error_type(partition)
        else:
            log.error(
                "Attempt to fetch offsets for partition %s failed due to:"
                " %s", partition, error_type)
            raise error_type(partition)
示例#7
0
    def _handle_join_group_response(self, future, response):
        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            self.member_id = response.member_id
            self.generation = response.generation_id
            self.rejoin_needed = False
            self.protocol = response.group_protocol
            log.info("Joined group '%s' (generation %s) with member_id %s",
                     self.group_id, self.generation, self.member_id)
            #self.sensors.join_latency.record(response.requestLatencyMs())
            if response.leader_id == response.member_id:
                log.info("Elected group leader -- performing partition"
                         " assignments using %s", self.protocol)
                self._on_join_leader(response).chain(future)
            else:
                self._on_join_follower().chain(future)

        elif error_type is Errors.GroupLoadInProgressError:
            log.debug("Attempt to join group %s rejected since coordinator is"
                      " loading the group.", self.group_id)
            # backoff and retry
            future.failure(error_type(response))
        elif error_type is Errors.UnknownMemberIdError:
            # reset the member id and retry immediately
            error = error_type(self.member_id)
            self.member_id = JoinGroupRequest.UNKNOWN_MEMBER_ID
            log.info("Attempt to join group %s failed due to unknown member id,"
                     " resetting and retrying.", self.group_id)
            future.failure(error)
        elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                            Errors.NotCoordinatorForGroupError):
            # re-discover the coordinator and retry with backoff
            self.coordinator_dead()
            log.info("Attempt to join group %s failed due to obsolete "
                     "coordinator information, retrying.", self.group_id)
            future.failure(error_type())
        elif error_type in (Errors.InconsistentGroupProtocolError,
                            Errors.InvalidSessionTimeoutError,
                            Errors.InvalidGroupIdError):
            # log the error and re-throw the exception
            error = error_type(response)
            log.error("Attempt to join group %s failed due to: %s",
                      self.group_id, error)
            future.failure(error)
        elif error_type is Errors.GroupAuthorizationFailedError:
            future.failure(error_type(self.group_id))
        else:
            # unexpected error, throw the exception
            error = error_type()
            log.error("Unexpected error in join group response: %s", error)
            future.failure(error)
示例#8
0
    def _handle_produce_response(self, batches, response):
        """Handle a produce response."""
        # if we have a response, parse it
        log.debug('Parsing produce response: %r', response)
        if response:
            batches_by_partition = dict(
                [(batch.topic_partition, batch) for batch in batches])

            for topic, partitions in response.topics:
                for partition, error_code, offset in partitions:
                    tp = TopicPartition(topic, partition)
                    error = Errors.for_code(error_code)
                    batch = batches_by_partition[tp]
                    self._complete_batch(batch, error, offset)

        else:
            # this is the acks = 0 case, just complete all requests
            for batch in batches:
                self._complete_batch(batch, None, -1)
示例#9
0
 def _send_req(self, node_id, request):
     """send request to Kafka node and mark coordinator as `dead`
     in error case
     """
     try:
         resp = yield from self._client.send(node_id, request)
     except Errors.KafkaError as err:
         log.error(
             'Error sending %s to node %s [%s] -- marking coordinator dead',
             request.__class__.__name__, node_id, err)
         self.coordinator_dead()
         raise err
     else:
         if not hasattr(resp, 'error_code'):
             return resp
         error_type = Errors.for_code(resp.error_code)
         if error_type is Errors.NoError:
             return resp
         else:
             raise error_type()
示例#10
0
    def add_group_coordinator(self, group, response):
        """Update with metadata for a group coordinator

        Arguments:
            group (str): name of group from GroupCoordinatorRequest
            response (GroupCoordinatorResponse): broker response

        Returns:
            bool: True if metadata is updated, False on error
        """
        log.debug("Updating coordinator for %s: %s", group, response)
        error_type = Errors.for_code(response.error_code)
        if error_type is not Errors.NoError:
            log.error("GroupCoordinatorResponse error: %s", error_type)
            self._groups[group] = -1
            return False

        node_id = response.coordinator_id
        coordinator = BrokerMetadata(
            response.coordinator_id,
            response.host,
            response.port)

        # Assume that group coordinators are just brokers
        # (this is true now, but could diverge in future)
        if node_id not in self._brokers:
            self._brokers[node_id] = coordinator

        # If this happens, either brokers have moved without
        # changing IDs, or our assumption above is wrong
        elif coordinator != self._brokers[node_id]:
            log.error("GroupCoordinator metadata conflicts with existing"
                      " broker metadata. Coordinator: %s, Broker: %s",
                      coordinator, self._brokers[node_id])
            self._groups[group] = node_id
            return False

        log.info("Group coordinator for %s is %s", group, coordinator)
        self._groups[group] = node_id
        return True
示例#11
0
    def _handle_group_coordinator_response(self, future, response):
        log.debug("Group metadata response %s", response)
        if not self.coordinator_unknown():
            # We already found the coordinator, so ignore the request
            log.debug("Coordinator already known -- ignoring metadata response")
            future.success(self.coordinator_id)
            return

        error_type = Errors.for_code(response.error_code)
        if error_type is Errors.NoError:
            ok = self._client.cluster.add_group_coordinator(self.group_id, response)
            if not ok:
                # This could happen if coordinator metadata is different
                # than broker metadata
                future.failure(Errors.IllegalStateError())
                return

            self.coordinator_id = response.coordinator_id
            self._client.ready(self.coordinator_id)

            # start sending heartbeats only if we have a valid generation
            if self.generation > 0:
                self.heartbeat_task.reset()
            future.success(self.coordinator_id)
        elif error_type is Errors.GroupCoordinatorNotAvailableError:
            log.debug("Group Coordinator Not Available; retry")
            future.failure(error_type())
        elif error_type is Errors.GroupAuthorizationFailedError:
            error = error_type(self.group_id)
            log.error("Group Coordinator Request failed: %s", error)
            future.failure(error)
        else:
            error = error_type()
            log.error("Unrecognized failure in Group Coordinator Request: %s",
                      error)
            future.failure(error)
示例#12
0
    def commit_offsets(self, offsets):
        """Commit specific offsets asynchronously.

        Arguments:
            offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit

        Raises error on failure
        """
        self._subscription.needs_fetch_committed_offsets = True
        if not offsets:
            log.debug('No offsets to commit')
            return True

        if (yield from self.coordinator_unknown()):
            raise Errors.GroupCoordinatorNotAvailableError()
        node_id = self.coordinator_id

        # create the offset commit request
        offset_data = collections.defaultdict(list)
        for tp, offset in offsets.items():
            offset_data[tp.topic].append(
                (tp.partition,
                 offset.offset,
                 offset.metadata))

        request = OffsetCommitRequest(
            self.group_id,
            self.generation,
            self.member_id,
            OffsetCommitRequest.DEFAULT_RETENTION_TIME,
            [(topic, tp_offsets) for topic, tp_offsets in offset_data.items()]
        )

        log.debug(
            "Sending offset-commit request with %s to %s", offsets, node_id)

        response = yield from self._send_req(node_id, request)

        unauthorized_topics = set()
        for topic, partitions in response.topics:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                offset = offsets[tp]

                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    log.debug(
                        "Committed offset %s for partition %s", offset, tp)
                    if self._subscription.is_assigned(tp):
                        partition = self._subscription.assignment[tp]
                        partition.committed = offset.offset
                elif error_type is Errors.GroupAuthorizationFailedError:
                    log.error("OffsetCommit failed for group %s - %s",
                              self.group_id, error_type.__name__)
                    raise error_type()
                elif error_type is Errors.TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type in (Errors.OffsetMetadataTooLargeError,
                                    Errors.InvalidCommitOffsetSizeError):
                    # raise the error to the user
                    log.info(
                        "OffsetCommit failed for group %s on partition %s"
                        " due to %s, will retry", self.group_id, tp,
                        error_type.__name__)
                    raise error_type()
                elif error_type is Errors.GroupLoadInProgressError:
                    # just retry
                    log.info(
                        "OffsetCommit failed for group %s because group is"
                        " initializing (%s), will retry", self.group_id,
                        error_type.__name__)
                    raise error_type()
                elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                                    Errors.NotCoordinatorForGroupError,
                                    Errors.RequestTimedOutError):
                    log.info(
                        "OffsetCommit failed for group %s due to a"
                        " coordinator error (%s), will find new coordinator"
                        " and retry", self.group_id, error_type.__name__)
                    self.coordinator_dead()
                    raise error_type()
                elif error_type in (Errors.UnknownMemberIdError,
                                    Errors.IllegalGenerationError,
                                    Errors.RebalanceInProgressError):
                    # need to re-join group
                    error = error_type(self.group_id)
                    log.error(
                        "OffsetCommit failed for group %s due to group"
                        " error (%s), will rejoin", self.group_id, error)
                    self._subscription.mark_for_reassignment()
                    raise error
                else:
                    log.error(
                        "OffsetCommit failed for group %s on partition %s"
                        " with offset %s: %s", self.group_id, tp, offset,
                        error_type.__name__)
                    raise error_type()

        if unauthorized_topics:
            log.error("OffsetCommit failed for unauthorized topics %s",
                      unauthorized_topics)
            raise Errors.TopicAuthorizationFailedError(unauthorized_topics)
示例#13
0
    def _proc_fetch_request(self, node_id, request):
        needs_wakeup = False
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        finally:
            self._in_flight.remove(node_id)

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, messages in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened
                    log.debug(
                        "Ignoring fetched records for partition %s"
                        " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    self._subscriptions.assignment[tp].highwater = highwater

                    # we are interested in this fetch only if the beginning
                    # offset matches the current consumed position
                    fetch_offset = fetch_offsets[tp]
                    partial = None
                    if messages and \
                            isinstance(messages[-1][-1], PartialMessage):
                        partial = messages.pop()

                    if messages:
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp,
                            fetch_offset)
                        try:
                            messages = collections.deque(
                                self._unpack_message_set(tp, messages))
                        except Errors.InvalidMessageError as err:
                            self._set_error(tp, err)
                            continue

                        self._records[tp] = FetchResult(
                            tp,
                            messages=messages,
                            subscriptions=self._subscriptions,
                            backoff=self._prefetch_backoff,
                            loop=self._loop)
                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif partial:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.", tp,
                            fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        needs_wakeup = True
                        self._subscriptions.assignment[tp].position += 1

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range, resetting offset",
                        fetch_offset)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warn('Unexpected error while fetching data: %s',
                             error_type.__name__)
        return needs_wakeup
示例#14
0
    def _handle_fetch_response(self, request, response):
        """The callback for fetch completion"""
        #total_bytes = 0
        #total_count = 0

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, messages in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened or a partition
                    # consumption paused while fetch is still in-flight
                    log.debug("Ignoring fetched records for partition %s"
                              " since it is no longer fetchable", tp)
                elif error_type is Errors.NoError:
                    fetch_offset = fetch_offsets[tp]

                    # we are interested in this fetch only if the beginning
                    # offset matches the current consumed position
                    position = self._subscriptions.assignment[tp].position
                    if position is None or position != fetch_offset:
                        log.debug("Discarding fetch response for partition %s"
                                  " since its offset %d does not match the"
                                  " expected offset %d", tp, fetch_offset,
                                  position)
                        continue

                    partial = None
                    if messages and isinstance(messages[-1][-1], PartialMessage):
                        partial = messages.pop()

                    if messages:
                        log.debug("Adding fetched record for partition %s with"
                                  " offset %d to buffered record list", tp,
                                  position)
                        self._records.append((fetch_offset, tp, messages))
                        #last_offset, _, _ = messages[-1]
                        #self.sensors.records_fetch_lag.record(highwater - last_offset)
                    elif partial:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        self._record_too_large_partitions[tp] = fetch_offset

                    # TODO: bytes metrics
                    #self.sensors.record_topic_fetch_metrics(tp.topic, num_bytes, parsed.size());
                    #totalBytes += num_bytes;
                    #totalCount += parsed.size();
                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.cluster.request_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                    else:
                        self._offset_out_of_range_partitions[tp] = fetch_offset
                    log.info("Fetch offset %s is out of range, resetting offset",
                             fetch_offset)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    self._unauthorized_topics.add(tp.topic)
                elif error_type is Errors.UnknownError:
                    log.warn("Unknown error fetching data for topic-partition %s", tp)
                else:
                    raise error_type('Unexpected error while fetching data')

        """TOOD - metrics
示例#15
0
 def _handle_leave_group_response(self, response):
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.info("LeaveGroup request succeeded")
     else:
         log.error("LeaveGroup request failed: %s", error_type())
示例#16
0
    def update_metadata(self, metadata):
        """Update cluster state given a MetadataResponse.

        Arguments:
            metadata (MetadataResponse): broker response to a metadata request

        Returns: None
        """
        # In the common case where we ask for a single topic and get back an
        # error, we should fail the future
        if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
            error_code, topic, _ = metadata.topics[0]
            error = Errors.for_code(error_code)(topic)
            return self.failed_update(error)

        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse")

        for node_id, host, port in metadata.brokers:
            self._brokers.update(
                {node_id: BrokerMetadata(node_id, host, port)})

        _new_partitions = {}
        _new_broker_partitions = collections.defaultdict(set)
        _new_unauthorized_topics = set()

        for error_code, topic, partitions in metadata.topics:
            error_type = Errors.for_code(error_code)
            if error_type is Errors.NoError:
                _new_partitions[topic] = {}
                for p_error, partition, leader, replicas, isr in partitions:
                    _new_partitions[topic][partition] = PartitionMetadata(
                        topic=topic,
                        partition=partition,
                        leader=leader,
                        replicas=replicas,
                        isr=isr,
                        error=p_error)
                    if leader != -1:
                        _new_broker_partitions[leader].add(
                            TopicPartition(topic, partition))

            elif error_type is Errors.LeaderNotAvailableError:
                log.warning(
                    "Topic %s is not available during auto-create"
                    " initialization", topic)
            elif error_type is Errors.UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is Errors.TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
                _new_unauthorized_topics.add(topic)
            elif error_type is Errors.InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s", topic,
                          error_type)

        with self._lock:
            self._partitions = _new_partitions
            self._broker_partitions = _new_broker_partitions
            self.unauthorized_topics = _new_unauthorized_topics
            f = None
            if self._future:
                f = self._future
            self._future = None
            self._need_update = False

        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now

        if f:
            f.success(self)
        log.debug("Updated cluster metadata to %s", self)

        for listener in self._listeners:
            listener(self)
示例#17
0
    def _send_produce_req(self, node_id, batches):
        """Create produce request to node
        If producer configured with `retries`>0 and produce response contain
        "failed" partitions produce request for this partition will try
        resend to broker `retries` times with `retry_timeout_ms` timeouts.

        Arguments:
            node_id (int): kafka broker identifier
            batches (dict): dictionary of {TopicPartition: MessageBatch}
        """
        self._in_flight.add(node_id)
        t0 = self._loop.time()
        while True:
            topics = collections.defaultdict(list)
            for tp, batch in batches.items():
                topics[tp.topic].append((tp.partition, batch.data()))

            request = ProduceRequest(
                required_acks=self._acks,
                timeout=self._request_timeout_ms,
                topics=list(topics.items()))

            try:
                response = yield from self.client.send(node_id, request)
            except KafkaError as err:
                for batch in batches.values():
                    if not err.retriable or batch.expired():
                        batch.done(exception=err)
                log.warning(
                    "Got error produce response: %s", err)
                if not err.retriable:
                    break
            else:
                if response is None:
                    # noacks, just "done" batches
                    for batch in batches.values():
                        batch.done()
                    break

                for topic, partitions in response.topics:
                    for partition, error_code, offset in partitions:
                        tp = TopicPartition(topic, partition)
                        error = Errors.for_code(error_code)
                        batch = batches.pop(tp, None)
                        if batch is None:
                            continue

                        if error is Errors.NoError:
                            batch.done(offset)
                        elif not getattr(error, 'retriable', False) or \
                                batch.expired():
                            batch.done(exception=error())
                        else:
                            # Ok, we can retry this batch
                            batches[tp] = batch
                            log.warning(
                                "Got error produce response on topic-partition"
                                " %s, retrying. Error: %s", tp, error)

            if batches:
                yield from asyncio.sleep(
                    self._retry_backoff, loop=self._loop)
            else:
                break

        # if batches for node is processed in less than a linger seconds
        # then waiting for the remaining time
        sleep_time = self._linger_time - (self._loop.time() - t0)
        if sleep_time > 0:
            yield from asyncio.sleep(sleep_time, loop=self._loop)

        self._in_flight.remove(node_id)
示例#18
0
    def update_metadata(self, metadata):
        # In the common case where we ask for a single topic and get back an
        # error, we should fail the future
        if len(metadata.topics) == 1 and metadata.topics[0][0] != 0:
            error_code, topic, _ = metadata.topics[0]
            error = Errors.for_code(error_code)(topic)
            return self.failed_update(error)

        if not metadata.brokers:
            log.warning("No broker metadata found in MetadataResponse")

        for node_id, host, port in metadata.brokers:
            self._brokers.update({
                node_id: BrokerMetadata(node_id, host, port)
            })

        _new_partitions = {}
        _new_broker_partitions = collections.defaultdict(set)
        _new_unauthorized_topics = set()

        for error_code, topic, partitions in metadata.topics:
            error_type = Errors.for_code(error_code)
            if error_type is Errors.NoError:
                _new_partitions[topic] = {}
                for p_error, partition, leader, replicas, isr in partitions:
                    _new_partitions[topic][partition] = PartitionMetadata(
                        topic=topic, partition=partition, leader=leader,
                        replicas=replicas, isr=isr, error=p_error)
                    if leader != -1:
                        _new_broker_partitions[leader].add(
                            TopicPartition(topic, partition))

            elif error_type is Errors.LeaderNotAvailableError:
                log.warning("Topic %s is not available during auto-create"
                            " initialization", topic)
            elif error_type is Errors.UnknownTopicOrPartitionError:
                log.error("Topic %s not found in cluster metadata", topic)
            elif error_type is Errors.TopicAuthorizationFailedError:
                log.error("Topic %s is not authorized for this client", topic)
                _new_unauthorized_topics.add(topic)
            elif error_type is Errors.InvalidTopicError:
                log.error("'%s' is not a valid topic name", topic)
            else:
                log.error("Error fetching metadata for topic %s: %s",
                          topic, error_type)

        with self._lock:
            self._partitions = _new_partitions
            self._broker_partitions = _new_broker_partitions
            self.unauthorized_topics = _new_unauthorized_topics
            f = None
            if self._future:
                f = self._future
            self._future = None
            self._need_update = False

        now = time.time() * 1000
        self._last_refresh_ms = now
        self._last_successful_refresh_ms = now

        if f:
            f.success(self)
        log.debug("Updated cluster metadata to %s", self)

        for listener in self._listeners:
            listener(self)
示例#19
0
    def _handle_fetch_response(self, request, response):
        """The callback for fetch completion"""
        #total_bytes = 0
        #total_count = 0

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, messages in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened or a partition
                    # consumption paused while fetch is still in-flight
                    log.debug(
                        "Ignoring fetched records for partition %s"
                        " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    self._subscriptions.assignment[tp].highwater = highwater

                    # we are interested in this fetch only if the beginning
                    # offset matches the current consumed position
                    fetch_offset = fetch_offsets[tp]
                    position = self._subscriptions.assignment[tp].position
                    if position is None or position != fetch_offset:
                        log.debug(
                            "Discarding fetch response for partition %s"
                            " since its offset %d does not match the"
                            " expected offset %d", tp, fetch_offset, position)
                        continue

                    partial = None
                    if messages and isinstance(messages[-1][-1],
                                               PartialMessage):
                        partial = messages.pop()

                    if messages:
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list", tp, position)
                        self._records.append((fetch_offset, tp, messages))
                        #last_offset, _, _ = messages[-1]
                        #self.sensors.records_fetch_lag.record(highwater - last_offset)
                    elif partial:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        self._record_too_large_partitions[tp] = fetch_offset

                    # TODO: bytes metrics
                    #self.sensors.record_topic_fetch_metrics(tp.topic, num_bytes, parsed.size());
                    #totalBytes += num_bytes;
                    #totalCount += parsed.size();
                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.cluster.request_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                    else:
                        self._offset_out_of_range_partitions[tp] = fetch_offset
                    log.info(
                        "Fetch offset %s is out of range, resetting offset",
                        fetch_offset)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    self._unauthorized_topics.add(tp.topic)
                elif error_type is Errors.UnknownError:
                    log.warn(
                        "Unknown error fetching data for topic-partition %s",
                        tp)
                else:
                    raise error_type('Unexpected error while fetching data')
        """TOOD - metrics
示例#20
0
    def _handle_offset_commit_response(self, offsets, future, response):
        #self.sensors.commit_latency.record(response.requestLatencyMs())
        unauthorized_topics = set()

        for topic, partitions in response.topics:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                offset = offsets[tp]

                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    log.debug("Committed offset %s for partition %s", offset, tp)
                    if self._subscription.is_assigned(tp):
                        self._subscription.assignment[tp].committed = offset.offset
                elif error_type is Errors.GroupAuthorizationFailedError:
                    log.error("OffsetCommit failed for group %s - %s",
                              self.group_id, error_type.__name__)
                    future.failure(error_type(self.group_id))
                    return
                elif error_type is Errors.TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type in (Errors.OffsetMetadataTooLargeError,
                                    Errors.InvalidCommitOffsetSizeError):
                    # raise the error to the user
                    log.info("OffsetCommit failed for group %s on partition %s"
                             " due to %s, will retry", self.group_id, tp,
                             error_type.__name__)
                    future.failure(error_type())
                    return
                elif error_type is Errors.GroupLoadInProgressError:
                    # just retry
                    log.info("OffsetCommit failed for group %s because group is"
                             " initializing (%s), will retry", self.group_id,
                             error_type.__name__)
                    future.failure(error_type(self.group_id))
                    return
                elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                                    Errors.NotCoordinatorForGroupError,
                                    Errors.RequestTimedOutError):
                    log.info("OffsetCommit failed for group %s due to a"
                             " coordinator error (%s), will find new coordinator"
                             " and retry", self.group_id, error_type.__name__)
                    self.coordinator_dead()
                    future.failure(error_type(self.group_id))
                    return
                elif error_type in (Errors.UnknownMemberIdError,
                                    Errors.IllegalGenerationError,
                                    Errors.RebalanceInProgressError):
                    # need to re-join group
                    error = error_type(self.group_id)
                    log.error("OffsetCommit failed for group %s due to group"
                              " error (%s), will rejoin", self.group_id, error)
                    self._subscription.mark_for_reassignment()
                    # Errors.CommitFailedError("Commit cannot be completed due to group rebalance"))
                    future.failure(error)
                    return
                else:
                    log.error("OffsetCommit failed for group % on partition %s"
                              " with offset %s: %s", self.group_id, tp, offset,
                              error_type.__name__)
                    future.failure(error_type())
                    return

        if unauthorized_topics:
            log.error("OffsetCommit failed for unauthorized topics %s",
                      unauthorized_topics)
            future.failure(Errors.TopicAuthorizationFailedError(unauthorized_topics))
        else:
            future.success(True)
示例#21
0
    def commit_offsets(self, offsets):
        """Commit specific offsets asynchronously.

        Arguments:
            offsets (dict {TopicPartition: OffsetAndMetadata}): what to commit

        Raises error on failure
        """
        self._subscription.needs_fetch_committed_offsets = True
        if not offsets:
            log.debug('No offsets to commit')
            return True

        if (yield from self.coordinator_unknown()):
            raise Errors.GroupCoordinatorNotAvailableError()
        node_id = self.coordinator_id

        # create the offset commit request
        offset_data = collections.defaultdict(list)
        for tp, offset in offsets.items():
            offset_data[tp.topic].append(
                (tp.partition, offset.offset, offset.metadata))

        request = OffsetCommitRequest(
            self.group_id, self.generation, self.member_id,
            OffsetCommitRequest.DEFAULT_RETENTION_TIME,
            [(topic, tp_offsets) for topic, tp_offsets in offset_data.items()])

        log.debug("Sending offset-commit request with %s for group %s to %s",
                  offsets, self.group_id, node_id)

        response = yield from self._send_req(node_id, request)

        unauthorized_topics = set()
        for topic, partitions in response.topics:
            for partition, error_code in partitions:
                tp = TopicPartition(topic, partition)
                offset = offsets[tp]

                error_type = Errors.for_code(error_code)
                if error_type is Errors.NoError:
                    log.debug("Committed offset %s for partition %s", offset,
                              tp)
                    if self._subscription.is_assigned(tp):
                        partition = self._subscription.assignment[tp]
                        partition.committed = offset.offset
                elif error_type is Errors.GroupAuthorizationFailedError:
                    log.error("OffsetCommit failed for group %s - %s",
                              self.group_id, error_type.__name__)
                    raise error_type()
                elif error_type is Errors.TopicAuthorizationFailedError:
                    unauthorized_topics.add(topic)
                elif error_type in (Errors.OffsetMetadataTooLargeError,
                                    Errors.InvalidCommitOffsetSizeError):
                    # raise the error to the user
                    log.info(
                        "OffsetCommit failed for group %s on partition %s"
                        " due to %s, will retry", self.group_id, tp,
                        error_type.__name__)
                    raise error_type()
                elif error_type is Errors.GroupLoadInProgressError:
                    # just retry
                    log.info(
                        "OffsetCommit failed for group %s because group is"
                        " initializing (%s), will retry", self.group_id,
                        error_type.__name__)
                    raise error_type()
                elif error_type in (Errors.GroupCoordinatorNotAvailableError,
                                    Errors.NotCoordinatorForGroupError,
                                    Errors.RequestTimedOutError):
                    log.info(
                        "OffsetCommit failed for group %s due to a"
                        " coordinator error (%s), will find new coordinator"
                        " and retry", self.group_id, error_type.__name__)
                    self.coordinator_dead()
                    raise error_type()
                elif error_type in (Errors.UnknownMemberIdError,
                                    Errors.IllegalGenerationError,
                                    Errors.RebalanceInProgressError):
                    # need to re-join group
                    error = error_type(self.group_id)
                    log.error(
                        "OffsetCommit failed for group %s due to group"
                        " error (%s), will rejoin", self.group_id, error)
                    self._subscription.mark_for_reassignment()
                    raise error
                else:
                    log.error(
                        "OffsetCommit failed for group %s on partition %s"
                        " with offset %s: %s", self.group_id, tp, offset,
                        error_type.__name__)
                    raise error_type()

        if unauthorized_topics:
            log.error("OffsetCommit failed for unauthorized topics %s",
                      unauthorized_topics)
            raise Errors.TopicAuthorizationFailedError(unauthorized_topics)
示例#22
0
    def _proc_fetch_request(self, node_id, request):
        needs_wakeup = False
        try:
            response = yield from self._client.send(node_id, request)
        except Errors.KafkaError as err:
            log.error("Failed fetch messages from %s: %s", node_id, err)
            return False
        finally:
            self._in_flight.remove(node_id)

        fetch_offsets = {}
        for topic, partitions in request.topics:
            for partition, offset, _ in partitions:
                fetch_offsets[TopicPartition(topic, partition)] = offset

        for topic, partitions in response.topics:
            for partition, error_code, highwater, messages in partitions:
                tp = TopicPartition(topic, partition)
                error_type = Errors.for_code(error_code)
                if not self._subscriptions.is_fetchable(tp):
                    # this can happen when a rebalance happened
                    log.debug("Ignoring fetched records for partition %s"
                              " since it is no longer fetchable", tp)

                elif error_type is Errors.NoError:
                    self._subscriptions.assignment[tp].highwater = highwater

                    # we are interested in this fetch only if the beginning
                    # offset matches the current consumed position
                    fetch_offset = fetch_offsets[tp]
                    partial = None
                    if messages and \
                            isinstance(messages[-1][-1], PartialMessage):
                        partial = messages.pop()

                    if messages:
                        log.debug(
                            "Adding fetched record for partition %s with"
                            " offset %d to buffered record list",
                            tp, fetch_offset)
                        try:
                            messages = collections.deque(
                                self._unpack_message_set(tp, messages))
                        except Errors.InvalidMessageError as err:
                            self._set_error(tp, err)
                            continue

                        self._records[tp] = FetchResult(
                            tp, messages=messages,
                            subscriptions=self._subscriptions,
                            backoff=self._prefetch_backoff,
                            loop=self._loop)
                        # We added at least 1 successful record
                        needs_wakeup = True
                    elif partial:
                        # we did not read a single message from a non-empty
                        # buffer because that message's size is larger than
                        # fetch size, in this case record this exception
                        err = RecordTooLargeError(
                            "There are some messages at [Partition=Offset]: "
                            "%s=%s whose size is larger than the fetch size %s"
                            " and hence cannot be ever returned. "
                            "Increase the fetch size, or decrease the maximum "
                            "message size the broker will allow.",
                            tp, fetch_offset, self._max_partition_fetch_bytes)
                        self._set_error(tp, err)
                        needs_wakeup = True
                        self._subscriptions.assignment[tp].position += 1

                elif error_type in (Errors.NotLeaderForPartitionError,
                                    Errors.UnknownTopicOrPartitionError):
                    self._client.force_metadata_update()
                elif error_type is Errors.OffsetOutOfRangeError:
                    fetch_offset = fetch_offsets[tp]
                    if self._subscriptions.has_default_offset_reset_policy():
                        self._subscriptions.need_offset_reset(tp)
                    else:
                        err = Errors.OffsetOutOfRangeError({tp: fetch_offset})
                        self._set_error(tp, err)
                        needs_wakeup = True
                    log.info(
                        "Fetch offset %s is out of range, resetting offset",
                        fetch_offset)
                elif error_type is Errors.TopicAuthorizationFailedError:
                    log.warn("Not authorized to read from topic %s.", tp.topic)
                    err = Errors.TopicAuthorizationFailedError(tp.topic)
                    self._set_error(tp, err)
                    needs_wakeup = True
                else:
                    log.warn('Unexpected error while fetching data: %s',
                             error_type.__name__)
        return needs_wakeup
示例#23
0
 def _handle_leave_group_response(self, response):
     error_type = Errors.for_code(response.error_code)
     if error_type is Errors.NoError:
         log.info("LeaveGroup request succeeded")
     else:
         log.error("LeaveGroup request failed: %s", error_type())