示例#1
0
    def _unpack_records(self):
        # NOTE: if the batch is not compressed it's equal to 1 record in
        #       v0 and v1.
        tp = self._tp
        records = self._records
        while records.has_next():
            next_batch = records.next_batch()
            if self._check_crcs and not next_batch.validate_crc():
                # This iterator will be closed after the exception, so we don't
                # try to drain other batches here. They will be refetched.
                raise Errors.CorruptRecordException(f"Invalid CRC - {tp}")

            if self._isolation_level == READ_COMMITTED and \
                    next_batch.producer_id is not None:
                self._consume_aborted_up_to(next_batch.base_offset)

                if next_batch.is_control_batch:
                    if self._contains_abort_marker(next_batch):
                        # Using `discard` instead of `remove`, because Kafka
                        # may return an abort marker for an otherwise empty
                        # topic-partition.
                        self._aborted_producers.discard(next_batch.producer_id)

                if next_batch.is_transactional and \
                        next_batch.producer_id in self._aborted_producers:
                    log.debug(
                        "Skipping aborted record batch from partition %s with"
                        " producer_id %s and offsets %s to %s", tp,
                        next_batch.producer_id, next_batch.base_offset,
                        next_batch.next_offset - 1)
                    self.next_fetch_offset = next_batch.next_offset
                    continue

            # We skip control batches no matter the isolation level
            if next_batch.is_control_batch:
                self.next_fetch_offset = next_batch.next_offset
                continue

            for record in next_batch:
                # It's OK for the offset to be larger than the current
                # partition. It will happen in compacted topics.
                if record.offset < self.next_fetch_offset:
                    # Probably just a compressed messageset, it's ok to skip.
                    continue
                consumer_record = self._consumer_record(tp, record)
                self.next_fetch_offset = record.offset + 1
                yield consumer_record

            # Message format v2 preserves the last offset in a batch even if
            # the last record is removed through compaction. By using the next
            # offset computed from the last offset in the batch, we ensure that
            # the offset of the next fetch will point to the next batch, which
            # avoids unnecessary re-fetching of the same batch (in the worst
            # case, the consumer could get stuck fetching the same batch
            # repeatedly).
            self.next_fetch_offset = next_batch.next_offset
示例#2
0
 def _unpack_records(self, tp, records):
     # NOTE: if the batch is not compressed it's equal to 1 record in
     #       v0 and v1.
     deserialize = self._deserialize
     check_crcs = self._check_crcs
     while records.has_next():
         next_batch = records.next_batch()
         if check_crcs and not next_batch.validate_crc():
             # This iterator will be closed after the exception, so we don't
             # try to drain other batches here. They will be refetched.
             raise Errors.CorruptRecordException("Invalid CRC")
         for record in next_batch:
             # Save encoded sizes
             key_size = len(record.key) if record.key is not None else -1
             value_size = \
                 len(record.value) if record.value is not None else -1
             key, value = deserialize(record)
             yield ConsumerRecord(tp.topic, tp.partition, record.offset,
                                  record.timestamp, record.timestamp_type,
                                  key, value, record.checksum, key_size,
                                  value_size)