def test_overflow_no_error_on_ignore():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=2,
        limit_exceeded_behavior=types.LimitExceededBehavior.IGNORE,
    )
    flow_controller = FlowController(settings)

    # there should be no overflow errors
    flow_controller.add(grpc_types.PubsubMessage(data=b"foo"))
    flow_controller.add(grpc_types.PubsubMessage(data=b"bar"))
def test_no_overflow_no_error():
    settings = types.PublishFlowControl(
        message_limit=100,
        byte_limit=10000,
        limit_exceeded_behavior=types.LimitExceededBehavior.ERROR,
    )
    flow_controller = FlowController(settings)

    # there should be no errors
    for data in (b"foo", b"bar", b"baz"):
        msg = grpc_types.PubsubMessage(data=data)
        flow_controller.add(msg)
def test_incorrectly_releasing_too_many_messages():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=150,
        limit_exceeded_behavior=types.LimitExceededBehavior.ERROR,
    )
    flow_controller = FlowController(settings)

    msg1 = grpc_types.PubsubMessage(data=b"x" * 100)
    msg2 = grpc_types.PubsubMessage(data=b"y" * 100)
    msg3 = grpc_types.PubsubMessage(data=b"z" * 100)

    # Releasing a message that would make the load negative should result in a warning.
    with warnings.catch_warnings(record=True) as warned:
        flow_controller.release(msg1)

    assert len(warned) == 1
    assert issubclass(warned[0].category, RuntimeWarning)
    warning_msg = str(warned[0].message)
    assert "never added or already released" in warning_msg

    # Incorrectly removing a message does not mess up internal stats, we can
    # still only add a single message at a time to this flow.
    flow_controller.add(msg2)

    with pytest.raises(exceptions.FlowControlLimitError) as error:
        flow_controller.add(msg3)

    error_msg = str(error.value)
    assert "messages: 2 / 1" in error_msg
    total_size = msg2._pb.ByteSize() + msg3._pb.ByteSize()
    expected_size_info = "bytes: {} / 150".format(total_size)
    assert expected_size_info in error_msg
def test_blocked_messages_are_accepted_in_fifo_order():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=1_000_000,  # Unlimited for practical purposes in the test.
        limit_exceeded_behavior=types.LimitExceededBehavior.BLOCK,
    )
    flow_controller = FlowController(settings)

    # It's OK if the message instance is shared, as flow controlelr is only concerned
    # with byte sizes and counts, and not with particular message instances.
    message = grpc_types.PubsubMessage(data=b"x")

    adding_done_events = [threading.Event() for _ in range(10)]
    releasing_done_events = [threading.Event() for _ in adding_done_events]

    # Add messages. The first one will be accepted, and the rest should queue behind.
    for adding_done in adding_done_events:
        _run_in_daemon(flow_controller.add, [message], adding_done)
        time.sleep(0.1)

    if not adding_done_events[0].wait(timeout=0.1):  # pragma: NO COVER
        pytest.fail("The first message unexpectedly got blocked on adding.")

    # For each message, check that it has indeed been added to the flow controller.
    # Then release it to make room for the next message in line, and repeat the check.
    enumeration = enumerate(zip(adding_done_events, releasing_done_events))
    for i, (adding_done, releasing_done) in enumeration:
        if not adding_done.wait(timeout=0.1):  # pragma: NO COVER
            pytest.fail(f"Queued message still blocked on adding (i={i}).")

        _run_in_daemon(flow_controller.release, [message], releasing_done)
        if not releasing_done.wait(timeout=0.1):  # pragma: NO COVER
            pytest.fail(f"Queued message was not released in time (i={i}).")
def test_threads_posting_large_messages_do_not_starve():
    settings = types.PublishFlowControl(
        message_limit=100,
        byte_limit=110,
        limit_exceeded_behavior=types.LimitExceededBehavior.BLOCK,
    )
    flow_controller = FlowController(settings)

    large_msg = types.PubsubMessage(data=b"x" * 100)  # close to entire byte limit

    adding_initial_done = threading.Event()
    adding_large_done = threading.Event()
    adding_busy_done = threading.Event()
    releasing_busy_done = threading.Event()
    releasing_large_done = threading.Event()

    # Occupy some of the flow capacity, then try to add a large message. Releasing
    # enough messages should eventually allow the large message to come through, even
    # if more messages are added after it (those should wait for the large message).
    initial_messages = [types.PubsubMessage(data=b"x" * 10)] * 5
    _run_in_daemon(flow_controller, "add", initial_messages, adding_initial_done)
    assert adding_initial_done.wait(timeout=0.1)

    _run_in_daemon(flow_controller, "add", [large_msg], adding_large_done)

    # Continuously keep adding more messages after the large one.
    messages = [types.PubsubMessage(data=b"x" * 10)] * 10
    _run_in_daemon(flow_controller, "add", messages, adding_busy_done, action_pause=0.1)

    # At the same time, gradually keep releasing the messages - the freeed up
    # capacity should be consumed by the large message, not the other small messages
    # being added after it.
    _run_in_daemon(
        flow_controller, "release", messages, releasing_busy_done, action_pause=0.1
    )

    # Sanity check - releasing should have completed by now.
    if not releasing_busy_done.wait(timeout=1.1):
        pytest.fail("Releasing messages blocked or errored.")

    # Enough messages released, the large message should have come through in
    # the meantime.
    if not adding_large_done.wait(timeout=0.1):
        pytest.fail("A thread adding a large message starved.")

    if adding_busy_done.wait(timeout=0.1):
        pytest.fail("Adding multiple small messages did not block.")

    # Releasing the large message should unblock adding the remaining "busy" messages
    # that have not been added yet.
    _run_in_daemon(flow_controller, "release", [large_msg], releasing_large_done)
    if not releasing_large_done.wait(timeout=0.1):
        pytest.fail("Releasing a message blocked or errored.")

    if not adding_busy_done.wait(timeout=1.0):
        pytest.fail("Adding messages blocked or errored.")
    def __init__(self, batch_settings=(), publisher_options=(), **kwargs):
        assert (
            type(batch_settings) is types.BatchSettings
            or len(batch_settings) == 0
        ), "batch_settings must be of type BatchSettings or an empty tuple."
        assert (
            type(publisher_options) is types.PublisherOptions
            or len(publisher_options) == 0
        ), "publisher_options must be of type PublisherOptions or an empty tuple."

        # Sanity check: Is our goal to use the emulator?
        # If so, create a grpc insecure channel with the emulator host
        # as the target.
        if os.environ.get("PUBSUB_EMULATOR_HOST"):
            kwargs["client_options"] = {
                "api_endpoint": os.environ.get("PUBSUB_EMULATOR_HOST")
            }
            kwargs["credentials"] = AnonymousCredentials()

        # For a transient failure, retry publishing the message infinitely.
        self.publisher_options = types.PublisherOptions(*publisher_options)
        self._enable_message_ordering = self.publisher_options[0]

        # Add the metrics headers, and instantiate the underlying GAPIC
        # client.
        self.api = publisher_client.PublisherClient(**kwargs)
        self._target = self.api._transport._host
        self._batch_class = thread.Batch
        self.batch_settings = types.BatchSettings(*batch_settings)

        # The batches on the publisher client are responsible for holding
        # messages. One batch exists for each topic.
        self._batch_lock = self._batch_class.make_lock()
        # (topic, ordering_key) => sequencers object
        self._sequencers = {}
        self._is_stopped = False
        # Thread created to commit all sequencers after a timeout.
        self._commit_thread = None

        # The object controlling the message publishing flow
        self._flow_controller = FlowController(
            self.publisher_options.flow_control)
def test_no_error_on_moderate_message_flow():
    settings = types.PublishFlowControl(
        message_limit=2,
        byte_limit=250,
        limit_exceeded_behavior=types.LimitExceededBehavior.ERROR,
    )
    flow_controller = FlowController(settings)

    msg1 = grpc_types.PubsubMessage(data=b"x" * 100)
    msg2 = grpc_types.PubsubMessage(data=b"y" * 100)
    msg3 = grpc_types.PubsubMessage(data=b"z" * 100)

    # The flow control settings will accept two in-flight messages, but not three.
    # If releasing messages works correctly, the sequence below will not raise errors.
    flow_controller.add(msg1)
    flow_controller.add(msg2)
    flow_controller.release(msg1)
    flow_controller.add(msg3)
    flow_controller.release(msg2)
    flow_controller.release(msg3)
def test_rejected_messages_do_not_increase_total_load():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=150,
        limit_exceeded_behavior=types.LimitExceededBehavior.ERROR,
    )
    flow_controller = FlowController(settings)

    msg1 = grpc_types.PubsubMessage(data=b"x" * 100)
    msg2 = grpc_types.PubsubMessage(data=b"y" * 100)

    flow_controller.add(msg1)

    for _ in range(5):
        with pytest.raises(exceptions.FlowControlLimitError):
            flow_controller.add(grpc_types.PubsubMessage(data=b"z" * 100))

    # After releasing a message we should again be able to add another one, despite
    # previously trying to add a lot of other messages.
    flow_controller.release(msg1)
    flow_controller.add(msg2)
def test_error_if_mesage_would_block_indefinitely():
    settings = types.PublishFlowControl(
        message_limit=0,  # simulate non-sane settings
        byte_limit=1,
        limit_exceeded_behavior=types.LimitExceededBehavior.BLOCK,
    )
    flow_controller = FlowController(settings)

    msg = grpc_types.PubsubMessage(data=b"xyz")
    adding_done = threading.Event()
    error_event = threading.Event()

    _run_in_daemon(flow_controller.add, [msg],
                   adding_done,
                   error_event=error_event)

    assert error_event.wait(
        timeout=0.1), "No error on adding too large a message."

    # Now that we know that an error occurs, we can check its type directly
    # without the fear of blocking indefinitely.
    flow_controller = FlowController(settings)  # we want a fresh controller
    with pytest.raises(exceptions.FlowControlLimitError) as error_info:
        flow_controller.add(msg)

    error_msg = str(error_info.value)
    assert "would block forever" in error_msg
    assert "messages: 1 / 0" in error_msg
    assert "bytes: {} / 1".format(msg._pb.ByteSize()) in error_msg
def test_warning_on_internal_reservation_stats_error_when_unblocking():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=150,
        limit_exceeded_behavior=types.LimitExceededBehavior.BLOCK,
    )
    flow_controller = FlowController(settings)

    msg1 = grpc_types.PubsubMessage(data=b"x" * 100)
    msg2 = grpc_types.PubsubMessage(data=b"y" * 100)

    # If there is a concurrency bug in FlowController, we do not want to block
    # the main thread running the tests, thus we delegate all add/release
    # operations to daemon threads and check the outcome (blocked/not blocked)
    # through Events.
    adding_1_done = threading.Event()
    adding_2_done = threading.Event()
    releasing_1_done = threading.Event()

    # Adding a message with free capacity should not block.
    _run_in_daemon(flow_controller.add, [msg1], adding_1_done)
    if not adding_1_done.wait(timeout=0.1):
        pytest.fail(  # pragma: NO COVER
            "Adding a message with enough flow capacity blocked or errored.")

    # Adding messages when there is not enough capacity should block, even if
    # added through multiple threads.
    _run_in_daemon(flow_controller.add, [msg2], adding_2_done)
    if adding_2_done.wait(timeout=0.1):
        pytest.fail(
            "Adding a message on overflow did not block.")  # pragma: NO COVER

    # Intentionally corrupt internal stats
    reservation = next(iter(flow_controller._waiting.values()), None)
    assert reservation is not None, "No messages blocked by flow controller."
    reservation.bytes_reserved = reservation.bytes_needed + 1

    with warnings.catch_warnings(record=True) as warned:
        _run_in_daemon(flow_controller.release, [msg1], releasing_1_done)
        if not releasing_1_done.wait(timeout=0.1):
            pytest.fail(
                "Releasing a message blocked or errored.")  # pragma: NO COVER

    matches = [
        warning for warning in warned if warning.category is RuntimeWarning
    ]
    assert len(matches) == 1
    assert "too many bytes reserved" in str(matches[0].message).lower()
def test_message_count_overflow_error():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=10000,
        limit_exceeded_behavior=types.LimitExceededBehavior.ERROR,
    )
    flow_controller = FlowController(settings)

    flow_controller.add(grpc_types.PubsubMessage(data=b"foo"))
    with pytest.raises(exceptions.FlowControlLimitError) as error:
        flow_controller.add(grpc_types.PubsubMessage(data=b"bar"))

    assert "messages: 2 / 1" in str(error.value)
def test_byte_size_overflow_error():
    settings = types.PublishFlowControl(
        message_limit=10000,
        byte_limit=199,
        limit_exceeded_behavior=types.LimitExceededBehavior.ERROR,
    )
    flow_controller = FlowController(settings)

    # Since the message data itself occupies 100 bytes, it means that both
    # messages combined will exceed the imposed byte limit of 199, but a single
    # message will not (the message size overhead is way lower than data size).
    msg1 = grpc_types.PubsubMessage(data=b"x" * 100)
    msg2 = grpc_types.PubsubMessage(data=b"y" * 100)

    flow_controller.add(msg1)
    with pytest.raises(exceptions.FlowControlLimitError) as error:
        flow_controller.add(msg2)

    total_size = msg1._pb.ByteSize() + msg2._pb.ByteSize()
    expected_info = "bytes: {} / 199".format(total_size)
    assert expected_info in str(error.value)
示例#13
0
class Client(object):
    """A publisher client for Google Cloud Pub/Sub.

    This creates an object that is capable of publishing messages.
    Generally, you can instantiate this client with no arguments, and you
    get sensible defaults.

    Args:
        batch_settings (~google.cloud.pubsub_v1.types.BatchSettings): The
            settings for batch publishing.
        publisher_options (~google.cloud.pubsub_v1.types.PublisherOptions): The
            options for the publisher client. Note that enabling message ordering will
            override the publish retry timeout to be infinite.
        kwargs (dict): Any additional arguments provided are sent as keyword
            arguments to the underlying
            :class:`~google.cloud.pubsub_v1.gapic.publisher_client.PublisherClient`.
            Generally you should not need to set additional keyword
            arguments. Regional endpoints can be set via ``client_options`` that
            takes a single key-value pair that defines the endpoint.

    Example:

    .. code-block:: python

        from google.cloud import pubsub_v1

        publisher_client = pubsub_v1.PublisherClient(
            # Optional
            batch_settings = pubsub_v1.types.BatchSettings(
                max_bytes=1024,  # One kilobyte
                max_latency=1,   # One second
            ),

            # Optional
            publisher_options = pubsub_v1.types.PublisherOptions(
                enable_message_ordering=False,
                flow_control=pubsub_v1.types.PublishFlowControl(
                    message_limit=2000,
                    limit_exceeded_behavior=pubsub_v1.types.LimitExceededBehavior.BLOCK,
                ),
            ),

            # Optional
            client_options = {
                "api_endpoint": REGIONAL_ENDPOINT
            }
        )
    """
    def __init__(self, batch_settings=(), publisher_options=(), **kwargs):
        assert (
            type(batch_settings) is types.BatchSettings
            or len(batch_settings) == 0
        ), "batch_settings must be of type BatchSettings or an empty tuple."
        assert (
            type(publisher_options) is types.PublisherOptions
            or len(publisher_options) == 0
        ), "publisher_options must be of type PublisherOptions or an empty tuple."

        # Sanity check: Is our goal to use the emulator?
        # If so, create a grpc insecure channel with the emulator host
        # as the target.
        if os.environ.get("PUBSUB_EMULATOR_HOST"):
            kwargs["client_options"] = {
                "api_endpoint": os.environ.get("PUBSUB_EMULATOR_HOST")
            }
            kwargs["credentials"] = AnonymousCredentials()

        # For a transient failure, retry publishing the message infinitely.
        self.publisher_options = types.PublisherOptions(*publisher_options)
        self._enable_message_ordering = self.publisher_options[0]

        # Add the metrics headers, and instantiate the underlying GAPIC
        # client.
        self.api = publisher_client.PublisherClient(**kwargs)
        self._target = self.api._transport._host
        self._batch_class = thread.Batch
        self.batch_settings = types.BatchSettings(*batch_settings)

        # The batches on the publisher client are responsible for holding
        # messages. One batch exists for each topic.
        self._batch_lock = self._batch_class.make_lock()
        # (topic, ordering_key) => sequencers object
        self._sequencers = {}
        self._is_stopped = False
        # Thread created to commit all sequencers after a timeout.
        self._commit_thread = None

        # The object controlling the message publishing flow
        self._flow_controller = FlowController(
            self.publisher_options.flow_control)

    @classmethod
    def from_service_account_file(cls, filename, batch_settings=(), **kwargs):
        """Creates an instance of this client using the provided credentials
        file.

        Args:
            filename (str): The path to the service account private key json
                file.
            batch_settings (~google.cloud.pubsub_v1.types.BatchSettings): The
                settings for batch publishing.
            kwargs: Additional arguments to pass to the constructor.

        Returns:
            A Publisher :class:`~google.cloud.pubsub_v1.publisher.client.Client`
            instance that is the constructed client.
        """
        credentials = service_account.Credentials.from_service_account_file(
            filename)
        kwargs["credentials"] = credentials
        return cls(batch_settings, **kwargs)

    from_service_account_json = from_service_account_file

    @property
    def target(self):
        """Return the target (where the API is).

        Returns:
            str: The location of the API.
        """
        return self._target

    def _get_or_create_sequencer(self, topic, ordering_key):
        """ Get an existing sequencer or create a new one given the (topic,
            ordering_key) pair.
        """
        sequencer_key = (topic, ordering_key)
        sequencer = self._sequencers.get(sequencer_key)
        if sequencer is None:
            if ordering_key == "":
                sequencer = unordered_sequencer.UnorderedSequencer(self, topic)
            else:
                sequencer = ordered_sequencer.OrderedSequencer(
                    self, topic, ordering_key)
            self._sequencers[sequencer_key] = sequencer

        return sequencer

    def resume_publish(self, topic, ordering_key):
        """ Resume publish on an ordering key that has had unrecoverable errors.

        Args:
            topic (str): The topic to publish messages to.
            ordering_key: A string that identifies related messages for which
                publish order should be respected.

        Raises:
            RuntimeError:
                If called after publisher has been stopped by a `stop()` method
                call.
            ValueError:
                If the topic/ordering key combination has not been seen before
                by this client.
        """
        with self._batch_lock:
            if self._is_stopped:
                raise RuntimeError(
                    "Cannot resume publish on a stopped publisher.")

            if not self._enable_message_ordering:
                raise ValueError(
                    "Cannot resume publish on a topic/ordering key if ordering "
                    "is not enabled.")

            sequencer_key = (topic, ordering_key)
            sequencer = self._sequencers.get(sequencer_key)
            if sequencer is None:
                _LOGGER.debug(
                    "Error: The topic/ordering key combination has not "
                    "been seen before.")
            else:
                sequencer.unpause()

    def publish(self,
                topic,
                data,
                ordering_key="",
                retry=gapic_v1.method.DEFAULT,
                **attrs):
        """Publish a single message.

        .. note::
            Messages in Pub/Sub are blobs of bytes. They are *binary* data,
            not text. You must send data as a bytestring
            (``bytes`` in Python 3; ``str`` in Python 2), and this library
            will raise an exception if you send a text string.

            The reason that this is so important (and why we do not try to
            coerce for you) is because Pub/Sub is also platform independent
            and there is no way to know how to decode messages properly on
            the other side; therefore, encoding and decoding is a required
            exercise for the developer.

        Add the given message to this object; this will cause it to be
        published once the batch either has enough messages or a sufficient
        period of time has elapsed.
        This method may block if LimitExceededBehavior.BLOCK is used in the
        flow control settings.

        Example:
            >>> from google.cloud import pubsub_v1
            >>> client = pubsub_v1.PublisherClient()
            >>> topic = client.topic_path('[PROJECT]', '[TOPIC]')
            >>> data = b'The rain in Wales falls mainly on the snails.'
            >>> response = client.publish(topic, data, username='******')

        Args:
            topic (str): The topic to publish messages to.
            data (bytes): A bytestring representing the message body. This
                must be a bytestring.
            ordering_key: A string that identifies related messages for which
                publish order should be respected. Message ordering must be
                enabled for this client to use this feature.
            retry (Optional[google.api_core.retry.Retry]): Designation of what
                errors, if any, should be retried. If `ordering_key` is specified,
                the total retry deadline will be changed to "infinity".
            attrs (Mapping[str, str]): A dictionary of attributes to be
                sent as metadata. (These may be text strings or byte strings.)

        Returns:
            A :class:`~google.cloud.pubsub_v1.publisher.futures.Future`
            instance that conforms to Python Standard library's
            :class:`~concurrent.futures.Future` interface (but not an
            instance of that class).

        Raises:
            RuntimeError:
                If called after publisher has been stopped by a `stop()` method
                call.

            pubsub_v1.publisher.exceptions.MessageTooLargeError: If publishing
                the ``message`` would exceed the max size limit on the backend.
        """
        # Sanity check: Is the data being sent as a bytestring?
        # If it is literally anything else, complain loudly about it.
        if not isinstance(data, bytes):
            raise TypeError(
                "Data being published to Pub/Sub must be sent as a bytestring."
            )

        if not self._enable_message_ordering and ordering_key != "":
            raise ValueError(
                "Cannot publish a message with an ordering key when message "
                "ordering is not enabled.")

        # Coerce all attributes to text strings.
        for k, v in copy.copy(attrs).items():
            if isinstance(v, str):
                continue
            if isinstance(v, bytes):
                attrs[k] = v.decode("utf-8")
                continue
            raise TypeError("All attributes being published to Pub/Sub must "
                            "be sent as text strings.")

        # Create the Pub/Sub message object. For performance reasons, the message
        # should be constructed by directly using the raw protobuf class, and only
        # then wrapping it into the higher-level PubsubMessage class.
        vanilla_pb = _raw_proto_pubbsub_message(data=data,
                                                ordering_key=ordering_key,
                                                attributes=attrs)
        message = gapic_types.PubsubMessage.wrap(vanilla_pb)

        # Messages should go through flow control to prevent excessive
        # queuing on the client side (depending on the settings).
        try:
            self._flow_controller.add(message)
        except exceptions.FlowControlLimitError as exc:
            future = futures.Future()
            future.set_exception(exc)
            return future

        def on_publish_done(future):
            self._flow_controller.release(message)

        with self._batch_lock:
            if self._is_stopped:
                raise RuntimeError("Cannot publish on a stopped publisher.")

            # Set retry timeout to "infinite" when message ordering is enabled.
            # Note that this then also impacts messages added with an empty
            # ordering key.
            if self._enable_message_ordering:
                if retry is gapic_v1.method.DEFAULT:
                    # use the default retry for the publish GRPC method as a base
                    transport = self.api._transport
                    retry = transport._wrapped_methods[
                        transport.publish]._retry
                retry = retry.with_deadline(2.0**32)

            # Delegate the publishing to the sequencer.
            sequencer = self._get_or_create_sequencer(topic, ordering_key)
            future = sequencer.publish(message, retry=retry)
            future.add_done_callback(on_publish_done)

            # Create a timer thread if necessary to enforce the batching
            # timeout.
            self._ensure_commit_timer_runs_no_lock()

            return future

    def ensure_cleanup_and_commit_timer_runs(self):
        """ Ensure a cleanup/commit timer thread is running.

            If a cleanup/commit timer thread is already running, this does nothing.
        """
        with self._batch_lock:
            self._ensure_commit_timer_runs_no_lock()

    def _ensure_commit_timer_runs_no_lock(self):
        """ Ensure a commit timer thread is running, without taking
            _batch_lock.

            _batch_lock must be held before calling this method.
        """
        if not self._commit_thread and self.batch_settings.max_latency < float(
                "inf"):
            self._start_commit_thread()

    def _start_commit_thread(self):
        """Start a new thread to actually wait and commit the sequencers."""
        self._commit_thread = threading.Thread(
            name="Thread-PubSubBatchCommitter",
            target=self._wait_and_commit_sequencers)
        self._commit_thread.start()

    def _wait_and_commit_sequencers(self):
        """ Wait up to the batching timeout, and commit all sequencers.
        """
        # Sleep for however long we should be waiting.
        time.sleep(self.batch_settings.max_latency)
        _LOGGER.debug("Commit thread is waking up")

        with self._batch_lock:
            if self._is_stopped:
                return
            self._commit_sequencers()
            self._commit_thread = None

    def _commit_sequencers(self):
        """ Clean up finished sequencers and commit the rest. """
        finished_sequencer_keys = [
            key for key, sequencer in self._sequencers.items()
            if sequencer.is_finished()
        ]
        for sequencer_key in finished_sequencer_keys:
            del self._sequencers[sequencer_key]

        for sequencer in self._sequencers.values():
            sequencer.commit()

    def stop(self):
        """Immediately publish all outstanding messages.

        Asynchronously sends all outstanding messages and
        prevents future calls to `publish()`. Method should
        be invoked prior to deleting this `Client()` object
        in order to ensure that no pending messages are lost.

        .. note::

            This method is non-blocking. Use `Future()` objects
            returned by `publish()` to make sure all publish
            requests completed, either in success or error.

        Raises:
            RuntimeError:
                If called after publisher has been stopped by a `stop()` method
                call.
        """
        with self._batch_lock:
            if self._is_stopped:
                raise RuntimeError("Cannot stop a publisher already stopped.")

            self._is_stopped = True

            for sequencer in self._sequencers.values():
                sequencer.stop()

    # Used only for testing.
    def _set_batch(self, topic, batch, ordering_key=""):
        sequencer = self._get_or_create_sequencer(topic, ordering_key)
        sequencer._set_batch(batch)

    # Used only for testing.
    def _set_batch_class(self, batch_class):
        self._batch_class = batch_class

    # Used only for testing.
    def _set_sequencer(self, topic, sequencer, ordering_key=""):
        sequencer_key = (topic, ordering_key)
        self._sequencers[sequencer_key] = sequencer
示例#14
0
    def __init__(self, batch_settings=(), publisher_options=(), **kwargs):
        assert (
            type(batch_settings) is types.BatchSettings
            or len(batch_settings) == 0
        ), "batch_settings must be of type BatchSettings or an empty tuple."
        assert (
            type(publisher_options) is types.PublisherOptions
            or len(publisher_options) == 0
        ), "publisher_options must be of type PublisherOptions or an empty tuple."

        # Sanity check: Is our goal to use the emulator?
        # If so, create a grpc insecure channel with the emulator host
        # as the target.
        if os.environ.get("PUBSUB_EMULATOR_HOST"):
            kwargs["channel"] = grpc.insecure_channel(
                target=os.environ.get("PUBSUB_EMULATOR_HOST"))

        # The GAPIC client has mTLS logic to determine the api endpoint and the
        # ssl credentials to use. Here we create a GAPIC client to help compute the
        # api endpoint and ssl credentials. The api endpoint will be used to set
        # `self._target`, and ssl credentials will be passed to
        # `grpc_helpers.create_channel` to establish a mTLS channel (if ssl
        # credentials is not None).
        client_options = kwargs.get("client_options", None)
        credentials = kwargs.get("credentials", None)
        client_for_mtls_info = publisher_client.PublisherClient(
            credentials=credentials, client_options=client_options)

        self._target = client_for_mtls_info._transport._host

        # Use a custom channel.
        # We need this in order to set appropriate default message size and
        # keepalive options.
        if "transport" not in kwargs:
            channel = kwargs.pop("channel", None)
            if channel is None:
                channel = grpc_helpers.create_channel(
                    credentials=kwargs.pop("credentials", None),
                    target=self.target,
                    ssl_credentials=client_for_mtls_info._transport.
                    _ssl_channel_credentials,
                    scopes=publisher_client.PublisherClient._DEFAULT_SCOPES,
                    options={
                        "grpc.max_send_message_length": -1,
                        "grpc.max_receive_message_length": -1,
                    }.items(),
                )
            # cannot pass both 'channel' and 'credentials'
            kwargs.pop("credentials", None)
            transport = publisher_grpc_transport.PublisherGrpcTransport(
                channel=channel)
            kwargs["transport"] = transport

        # For a transient failure, retry publishing the message infinitely.
        self.publisher_options = types.PublisherOptions(*publisher_options)
        self._enable_message_ordering = self.publisher_options[0]

        # Add the metrics headers, and instantiate the underlying GAPIC
        # client.
        self.api = publisher_client.PublisherClient(**kwargs)
        self._batch_class = thread.Batch
        self.batch_settings = types.BatchSettings(*batch_settings)

        # The batches on the publisher client are responsible for holding
        # messages. One batch exists for each topic.
        self._batch_lock = self._batch_class.make_lock()
        # (topic, ordering_key) => sequencers object
        self._sequencers = {}
        self._is_stopped = False
        # Thread created to commit all sequencers after a timeout.
        self._commit_thread = None

        # The object controlling the message publishing flow
        self._flow_controller = FlowController(
            self.publisher_options.flow_control)
def test_blocking_on_overflow_until_free_capacity():
    settings = types.PublishFlowControl(
        message_limit=1,
        byte_limit=150,
        limit_exceeded_behavior=types.LimitExceededBehavior.BLOCK,
    )
    flow_controller = FlowController(settings)

    msg1 = grpc_types.PubsubMessage(data=b"x" * 100)
    msg2 = grpc_types.PubsubMessage(data=b"y" * 100)
    msg3 = grpc_types.PubsubMessage(data=b"z" * 100)
    msg4 = grpc_types.PubsubMessage(data=b"w" * 100)

    # If there is a concurrency bug in FlowController, we do not want to block
    # the main thread running the tests, thus we delegate all add/release
    # operations to daemon threads and check the outcome (blocked/not blocked)
    # through Events.
    adding_1_done = threading.Event()
    adding_2_done = threading.Event()
    adding_3_done = threading.Event()
    adding_4_done = threading.Event()
    releasing_1_done = threading.Event()
    releasing_x_done = threading.Event()

    # Adding a message with free capacity should not block.
    _run_in_daemon(flow_controller.add, [msg1], adding_1_done)
    if not adding_1_done.wait(timeout=0.1):
        pytest.fail(  # pragma: NO COVER
            "Adding a message with enough flow capacity blocked or errored.")

    # Adding messages when there is not enough capacity should block, even if
    # added through multiple threads.
    _run_in_daemon(flow_controller.add, [msg2], adding_2_done)
    if adding_2_done.wait(timeout=0.1):
        pytest.fail(
            "Adding a message on overflow did not block.")  # pragma: NO COVER

    _run_in_daemon(flow_controller.add, [msg3], adding_3_done)
    if adding_3_done.wait(timeout=0.1):
        pytest.fail(
            "Adding a message on overflow did not block.")  # pragma: NO COVER

    _run_in_daemon(flow_controller.add, [msg4], adding_4_done)
    if adding_4_done.wait(timeout=0.1):
        pytest.fail(
            "Adding a message on overflow did not block.")  # pragma: NO COVER

    # After releasing one message, there should be room for a new message, which
    # should result in unblocking one of the waiting threads.
    _run_in_daemon(flow_controller.release, [msg1], releasing_1_done)
    if not releasing_1_done.wait(timeout=0.1):
        pytest.fail(
            "Releasing a message blocked or errored.")  # pragma: NO COVER

    done_status = [
        adding_2_done.wait(timeout=0.1),
        adding_3_done.wait(timeout=0.1),
        adding_4_done.wait(timeout=0.1),
    ]

    # In sum() we use the fact that True==1 and False==0, and that Event.wait()
    # returns False only if it times out, i.e. its internal flag has not been set.
    done_count = sum(done_status)
    assert done_count == 1, "Exactly one thread should have been unblocked."

    # Release another message and verify that yet another thread gets unblocked.
    added_msg = [msg2, msg3, msg4][done_status.index(True)]
    _run_in_daemon(flow_controller.release, [added_msg], releasing_x_done)

    if not releasing_x_done.wait(timeout=0.1):
        pytest.fail(
            "Releasing messages blocked or errored.")  # pragma: NO COVER

    released_count = sum((
        adding_2_done.wait(timeout=0.1),
        adding_3_done.wait(timeout=0.1),
        adding_4_done.wait(timeout=0.1),
    ))
    assert released_count == 2, "Exactly two threads should have been unblocked."
    def __init__(self, batch_settings=(), publisher_options=(), **kwargs):
        assert (
            type(batch_settings) is types.BatchSettings
            or len(batch_settings) == 0
        ), "batch_settings must be of type BatchSettings or an empty tuple."
        assert (
            type(publisher_options) is types.PublisherOptions
            or len(publisher_options) == 0
        ), "publisher_options must be of type PublisherOptions or an empty tuple."

        # Sanity check: Is our goal to use the emulator?
        # If so, create a grpc insecure channel with the emulator host
        # as the target.
        if os.environ.get("PUBSUB_EMULATOR_HOST"):
            kwargs["channel"] = grpc.insecure_channel(
                target=os.environ.get("PUBSUB_EMULATOR_HOST"))

        client_options = kwargs.pop("client_options", None)
        if (client_options and "api_endpoint" in client_options and isinstance(
                client_options["api_endpoint"], six.string_types)):
            self._target = client_options["api_endpoint"]
        else:
            self._target = publisher_client.PublisherClient.SERVICE_ADDRESS

        # Use a custom channel.
        # We need this in order to set appropriate default message size and
        # keepalive options.
        if "transport" not in kwargs:
            channel = kwargs.pop("channel", None)
            if channel is None:
                channel = grpc_helpers.create_channel(
                    credentials=kwargs.pop("credentials", None),
                    target=self.target,
                    scopes=publisher_client.PublisherClient._DEFAULT_SCOPES,
                    options={
                        "grpc.max_send_message_length": -1,
                        "grpc.max_receive_message_length": -1,
                    }.items(),
                )
            # cannot pass both 'channel' and 'credentials'
            kwargs.pop("credentials", None)
            transport = publisher_grpc_transport.PublisherGrpcTransport(
                channel=channel)
            kwargs["transport"] = transport

        # For a transient failure, retry publishing the message infinitely.
        self.publisher_options = types.PublisherOptions(*publisher_options)
        self._enable_message_ordering = self.publisher_options[0]
        if self._enable_message_ordering:
            # Set retry timeout to "infinite" when message ordering is enabled.
            # Note that this then also impacts messages added with an empty ordering
            # key.
            client_config = _set_nested_value(
                kwargs.pop("client_config", {}),
                2**32,
                [
                    "interfaces",
                    "google.pubsub.v1.Publisher",
                    "retry_params",
                    "messaging",
                    "total_timeout_millis",
                ],
            )
            kwargs["client_config"] = client_config

        # Add the metrics headers, and instantiate the underlying GAPIC
        # client.
        self.api = publisher_client.PublisherClient(**kwargs)
        self._batch_class = thread.Batch
        self.batch_settings = types.BatchSettings(*batch_settings)

        # The batches on the publisher client are responsible for holding
        # messages. One batch exists for each topic.
        self._batch_lock = self._batch_class.make_lock()
        # (topic, ordering_key) => sequencers object
        self._sequencers = {}
        self._is_stopped = False
        # Thread created to commit all sequencers after a timeout.
        self._commit_thread = None

        # The object controlling the message publishing flow
        self._flow_controller = FlowController(
            self.publisher_options.flow_control)