示例#1
0
 def _verify_txn_started(self, transactional_id):
     if self._txn_manager is not None:
         txn_manager = self._txn_manager
         if txn_manager.transactional_id is not None and \
                 not self._txn_manager.is_in_transaction():
             raise IllegalOperation(
                 "Can't send messages while not in transaction")
示例#2
0
    async def send_batch(self, batch, topic, *, partition):
        """Submit a BatchBuilder for publication.

        Arguments:
            batch (BatchBuilder): batch object to be published.
            topic (str): topic where the batch will be published.
            partition (int): partition where this batch will be published.

        Returns:
            asyncio.Future: object that will be set when the batch is
                delivered.
        """
        # first make sure the metadata for the topic is available
        await self.client._wait_on_metadata(topic)
        # We only validate we have the partition in the metadata here
        partition = self._partition(topic, partition, None, None, None, None)

        # Ensure transaction is started and not committing
        if self._txn_manager is not None:
            txn_manager = self._txn_manager
            if txn_manager.transactional_id is not None and \
                    not self._txn_manager.is_in_transaction():
                raise IllegalOperation(
                    "Can't send messages while not in transaction")

        tp = TopicPartition(topic, partition)
        log.debug("Sending batch to %s", tp)
        future = await self._message_accumulator.add_batch(
            batch, tp, self._request_timeout_ms / 1000)
        return future
示例#3
0
    def committed(self, partition):
        """ Get the last committed offset for the given partition. (whether the
        commit happened by this process or another).

        This offset will be used as the position for the consumer in the event
        of a failure.

        This call will block to do a remote call to get the latest offset, as
        those are not cached by consumer (Transactional Producer can change
        them without Consumer knowledge as of Kafka 0.11.0)

        Arguments:
            partition (TopicPartition): the partition to check

        Returns:
            The last committed offset, or None if there was no prior commit.

        Raises:
            IllegalOperation: If used with ``group_id == None``
        """
        if self._group_id is None:
            raise IllegalOperation("Requires group_id")

        commit_map = yield from self._coordinator.fetch_committed_offsets(
            [partition])
        if partition in commit_map:
            committed = commit_map[partition].offset
            if committed == -1:
                committed = None
        else:
            committed = None
        return committed
示例#4
0
 def _verify_txn_started(self, transactional_id):
     try:
         txn_manager = self._transactions[transactional_id]
     except KeyError:
         pass
     else:
         assert txn_manager.transactional_id == transactional_id
         if not txn_manager.is_in_transaction():
             raise IllegalOperation(
                 "Can't send messages while not in transaction")
示例#5
0
    async def send_offsets_to_transaction(self, offsets, group_id):
        self._ensure_transactional()

        if not self._txn_manager.is_in_transaction():
            raise IllegalOperation("Not in the middle of a transaction")

        if not group_id or not isinstance(group_id, str):
            raise ValueError(group_id)

        # validate `offsets` structure
        formatted_offsets = commit_structure_validate(offsets)

        log.debug(
            "Begin adding offsets %s for consumer group %s to transaction",
            formatted_offsets, group_id)
        fut = self._txn_manager.add_offsets_to_txn(formatted_offsets, group_id)
        await asyncio.shield(fut, loop=self._loop)
示例#6
0
    def send_offsets_to_transaction(self, transactional_id, offsets, group_id):
        txn_manager = self._transactions[transactional_id]
        if not txn_manager.is_in_transaction():
            raise IllegalOperation("Not in the middle of a transaction")

        if not group_id or not isinstance(group_id, str):
            raise ValueError(group_id)

        # validate `offsets` structure
        formatted_offsets = commit_structure_validate(offsets)

        log.debug(
            "Begin adding offsets %s for consumer group %s to transaction",
            formatted_offsets, group_id)
        fut = txn_manager.add_offsets_to_txn(formatted_offsets, group_id)
        log.debug('+WAIT FOR RESPONSE OR ERROR %r' % (fut,))
        yield from asyncio.shield(fut, loop=self._loop)
        log.debug('-WAIT FOR RESPONSE OR ERROR %r' % (fut,))
示例#7
0
    def committed(self, partition):
        """ Get the last committed offset for the given partition. (whether the
        commit happened by this process or another).

        This offset will be used as the position for the consumer
        in the event of a failure.

        This call may block to do a remote call if the partition in question
        isn't assigned to this consumer or if the consumer hasn't yet
        initialized its cache of committed offsets.

        Arguments:
            partition (TopicPartition): the partition to check

        Returns:
            The last committed offset, or None if there was no prior commit.

        Raises:
            IllegalOperation: If used with ``group_id == None``
        """
        if self._group_id is None:
            raise IllegalOperation("Requires group_id")

        if self._subscription.is_assigned(partition):
            assignment = self._subscription.subscription.assignment
            tp_state = assignment.state_value(partition)
            if tp_state.committed is None:
                yield from tp_state.wait_for_committed()
            committed = tp_state.committed.offset

        else:
            commit_map = yield from self._coordinator.fetch_committed_offsets(
                [partition])
            if partition in commit_map:
                committed = commit_map[partition].offset
            else:
                committed = None
        if committed == -1:
            return None
        return committed
示例#8
0
 def _ensure_transactional(self):
     if self._txn_manager is None or \
             self._txn_manager.transactional_id is None:
         raise IllegalOperation(
             "You need to configure transaction_id to use transactions")
示例#9
0
    async def send(self,
                   topic,
                   value=None,
                   key=None,
                   partition=None,
                   timestamp_ms=None,
                   headers=None):
        """Publish a message to a topic.

        Arguments:
            topic (str): topic where the message will be published
            value (optional): message value. Must be type bytes, or be
                serializable to bytes via configured value_serializer. If value
                is None, key is required and message acts as a 'delete'.
                See kafka compaction documentation for more details:
                http://kafka.apache.org/documentation.html#compaction
                (compaction requires kafka >= 0.8.1)
            partition (int, optional): optionally specify a partition. If not
                set, the partition will be selected using the configured
                'partitioner'.
            key (optional): a key to associate with the message. Can be used to
                determine which partition to send the message to. If partition
                is None (and producer's partitioner config is left as default),
                then messages with the same key will be delivered to the same
                partition (but if key is None, partition is chosen randomly).
                Must be type bytes, or be serializable to bytes via configured
                key_serializer.
            timestamp_ms (int, optional): epoch milliseconds (from Jan 1 1970
                UTC) to use as the message timestamp. Defaults to current time.

        Returns:
            asyncio.Future: object that will be set when message is
            processed

        Raises:
            kafka.KafkaTimeoutError: if we can't schedule this record (
                pending buffer is full) in up to `request_timeout_ms`
                milliseconds.

        Note:
            The returned future will wait based on `request_timeout_ms`
            setting. Cancelling the returned future **will not** stop event
            from being sent, but cancelling the ``send`` coroutine itself
            **will**.
        """
        assert value is not None or self.client.api_version >= (0, 8, 1), (
            'Null messages require kafka >= 0.8.1')
        assert not (value is None and key is None), \
            'Need at least one: key or value'

        # first make sure the metadata for the topic is available
        await self.client._wait_on_metadata(topic)

        # Ensure transaction is started and not committing
        if self._txn_manager is not None:
            txn_manager = self._txn_manager
            if txn_manager.transactional_id is not None and \
                    not self._txn_manager.is_in_transaction():
                raise IllegalOperation(
                    "Can't send messages while not in transaction")

        if headers is not None:
            if self.client.api_version < (0, 11):
                raise UnsupportedVersionError(
                    "Headers not supported before Kafka 0.11")
        else:
            # Record parser/builder support only list type, no explicit None
            headers = []

        key_bytes, value_bytes = self._serialize(topic, key, value)
        partition = self._partition(topic, partition, key, value, key_bytes,
                                    value_bytes)

        tp = TopicPartition(topic, partition)
        log.debug("Sending (key=%s value=%s) to %s", key, value, tp)

        fut = await self._message_accumulator.add_message(
            tp,
            key_bytes,
            value_bytes,
            self._request_timeout_ms / 1000,
            timestamp_ms=timestamp_ms,
            headers=headers)
        return fut
示例#10
0
    def commit(self, offsets=None):
        """ Commit offsets to Kafka.

        This commits offsets only to Kafka. The offsets committed using this
        API will be used on the first fetch after every rebalance and also on
        startup. As such, if you need to store offsets in anything other than
        Kafka, this API should not be used.

        Currently only supports kafka-topic offset storage (not zookeeper)

        When explicitly passing ``offsets`` use either offset of next record,
        or tuple of offset and metadata::

            tp = TopicPartition(msg.topic, msg.partition)
            metadata = "Some utf-8 metadata"
            # Either
            await consumer.commit({tp: msg.offset + 1})
            # Or position directly
            await consumer.commit({tp: (msg.offset + 1, metadata)})

        .. note:: If you want `fire and forget` commit, like ``commit_async()``
            in *kafka-python*, just run it in a task. Something like::

                fut = loop.create_task(consumer.commit())
                fut.add_done_callback(on_commit_done)

        Arguments:
            offsets (dict, optional): {TopicPartition: (offset, metadata)} dict
                to commit with the configured ``group_id``. Defaults to current
                consumed offsets for all subscribed partitions.
        Raises:
            IllegalOperation: If used with ``group_id == None``
            ValueError: If offsets is of wrong format
            KafkaError: If commit failed on broker side. This could be due to
                invalid offset, too long metadata, authorization failure, etc.
        """
        if self._group_id is None:
            raise IllegalOperation("Requires group_id")

        if offsets is None:
            offsets = self._subscription.all_consumed_offsets()
        else:
            # validate `offsets` structure
            if not offsets or not isinstance(offsets, dict):
                raise ValueError(offsets)

            formatted_offsets = {}
            for tp, offset_and_metadata in offsets.items():
                if not isinstance(tp, TopicPartition):
                    raise ValueError("Key should be TopicPartition instance")

                if isinstance(offset_and_metadata, int):
                    offset, metadata = offset_and_metadata, ""
                else:
                    try:
                        offset, metadata = offset_and_metadata
                    except Exception:
                        raise ValueError(offsets)

                    if not isinstance(metadata, str):
                        raise ValueError("Metadata should be a string")

                formatted_offsets[tp] = OffsetAndMetadata(offset, metadata)

            offsets = formatted_offsets

        yield from self._coordinator.commit_offsets(offsets)
示例#11
0
    def commit(self, offsets=None):
        """ Commit offsets to Kafka.

        This commits offsets only to Kafka. The offsets committed using this
        API will be used on the first fetch after every rebalance and also on
        startup. As such, if you need to store offsets in anything other than
        Kafka, this API should not be used.

        Currently only supports kafka-topic offset storage (not zookeeper)

        When explicitly passing ``offsets`` use either offset of next record,
        or tuple of offset and metadata::

            tp = TopicPartition(msg.topic, msg.partition)
            metadata = "Some utf-8 metadata"
            # Either
            await consumer.commit({tp: msg.offset + 1})
            # Or position directly
            await consumer.commit({tp: (msg.offset + 1, metadata)})

        .. note:: If you want `fire and forget` commit, like ``commit_async()``
            in *kafka-python*, just run it in a task. Something like::

                fut = loop.create_task(consumer.commit())
                fut.add_done_callback(on_commit_done)

        Arguments:
            offsets (dict, optional): {TopicPartition: (offset, metadata)} dict
                to commit with the configured ``group_id``. Defaults to current
                consumed offsets for all subscribed partitions.
        Raises:
            IllegalOperation: If used with ``group_id == None``.
            IllegalStateError: If partitions not assigned.
            ValueError: If offsets is of wrong format.
            CommitFailedError: If membership already changed on broker.
            KafkaError: If commit failed on broker side. This could be due to
                invalid offset, too long metadata, authorization failure, etc.

        .. versionchanged:: 0.4.0

            Changed ``AssertionError`` to ``IllegalStateError`` in case of
            unassigned partition.

        .. versionchanged:: 0.4.0

            Will now raise ``CommitFailedError`` in case membership changed,
            as (posibly) this partition is handled by another consumer.
        """
        if self._group_id is None:
            raise IllegalOperation("Requires group_id")

        subscription = self._subscription.subscription
        if subscription is None:
            raise IllegalStateError("Not subscribed to any topics")
        assignment = subscription.assignment
        if assignment is None:
            raise IllegalStateError("No partitions assigned")

        if offsets is None:
            offsets = assignment.all_consumed_offsets()
        else:
            offsets = commit_structure_validate(offsets)
            for tp in offsets:
                if tp not in assignment.tps:
                    raise IllegalStateError(
                        "Partition {} is not assigned".format(tp))

        yield from self._coordinator.commit_offsets(assignment, offsets)