def test_from_klio_message_raises(klio_message): payload = {"no": "bytes casting"} with pytest.raises( exceptions.KlioMessagePayloadException, match="Returned payload" ): serializer.from_klio_message(klio_message, payload)
def update_kmsg_metadata(self, raw_kmsg): """Update KlioMessage to enable partial bottom-up execution. Args: raw_kmsg (bytes): Unserialized KlioMessage Returns: bytes: KlioMessage deserialized to ``bytes`` with updated intended recipients metadata. """ # Use `serializer.to_klio_message` instead of @handle_klio in order to # get the full KlioMessage object (not just the data). kmsg = serializer.to_klio_message( raw_kmsg, kconfig=self._klio.config, logger=self._klio.logger ) # Make sure upstream job doesn't skip the message upstream_job = self._generate_upstream_job_object() lmtd = kmsg.metadata.intended_recipients.limited lmtd.recipients.extend([upstream_job]) # Assign the current job to `trigger_children_of` so that top-down # execution resumes after this job is done. current_job = self._generate_current_job_object() lmtd.recipients.extend([current_job]) lmtd.trigger_children_of.CopyFrom(current_job) return serializer.from_klio_message(kmsg)
def test_from_klio_message(klio_message, payload, exp_payload): expected = _get_klio_message() if exp_payload: expected.data.payload = exp_payload expected_str = expected.SerializeToString() actual_message = serializer.from_klio_message(klio_message, payload) assert expected_str == actual_message
def test_from_klio_message_v1(): payload = b"some-payload" msg = klio_pb2.KlioMessage() msg.version = klio_pb2.Version.V1 msg.data.payload = payload expected_str = msg.SerializeToString() actual_message = serializer.from_klio_message(msg, payload) assert expected_str == actual_message
def test_from_klio_message_tagged_output(klio_message): payload = b"some payload" expected_msg = _get_klio_message() expected_msg.data.payload = payload expected = pvalue.TaggedOutput("a-tag", expected_msg.SerializeToString()) tagged_payload = pvalue.TaggedOutput("a-tag", payload) actual_message = serializer.from_klio_message(klio_message, tagged_payload) # can't compare expected vs actual directly since pvalue.TaggedOutput # hasn't implemented the comparison operators assert expected.tag == actual_message.tag assert expected.value == actual_message.value
def __from_klio_message_generator(self, kmsg, payload, orig_item): try: yield serializer.from_klio_message(kmsg, payload) except Exception as err: self._klio.logger.error(_ERROR_MSG_KMSG_TO_BYTES.format(kmsg, err), exc_info=True) # Since the yielded value in the `try` clause may not tagged, that # one will be used by default by whatever executed this function, # and anything that has a tagged output value (like this dropped one) # will just be ignored, which is fine for dropped values. # But if the caller function wanted to, they could access this via # pcoll.drop. # We won't try to serialize kmsg to bytes since something already # went wrong. yield pvalue.TaggedOutput("drop", orig_item) # explicitly return so that Beam doesn't call `next` and # executes the next `yield` return
def __serialize_klio_message(metrics, ctx, func, incoming_item, *args, **kwargs): metrics.received.inc() # manipulate `ctx` to handle both methods and functions depending on # what we're wrapping. Functions just have `ctx` object, but methods # have `self._klio` as its context, and we also need access to `self` # in order to call the method _self = ctx if not isinstance(ctx, core.KlioContext): ctx = _self._klio with metrics.timer: try: kmsg = serializer.to_klio_message(incoming_item, ctx.config, ctx.logger) except Exception as err: ctx.logger.error( _ERROR_MSG_KMSG_FROM_BYTES.format(incoming_item, err), exc_info=True, ) metrics.error.inc() __ack_pubsub_if_direct_gke(incoming_item, ctx) # Since the returned value in the `try` clause is not tagged, that # one will be used by default by whatever executed this function, # and anything that has a tagged output value (like this dropped # one) will just be ignored, which is fine for dropped values. # But if the caller function wanted to, they could access this via # pcoll.drop. return pvalue.TaggedOutput("drop", incoming_item) try: ret = func(_self, kmsg.data, *args, **kwargs) if isinstance(ret, types.GeneratorType): raise TypeError("can't pickle generator object: '{}'".format( func.__name__)) except TypeError: metrics.error.inc() # If we get here, we threw a type error because we found a generator # and those can't be pickled. But there's no need to do any special # error handling - this will contain enough info for the user so # we just re-raise raise except Exception as err: log_msg, exc_info = __get_user_error_message( err, func.__name__, kmsg) ctx.logger.error(log_msg, exc_info=exc_info) metrics.error.inc() __ack_pubsub_if_direct_gke(kmsg, ctx) # Since the returned value in the `try` clause is not tagged, that # one will be used by default by whatever executed this function, # and anything that has a tagged output value (like this dropped # one) will just be ignored, which is fine for dropped values. # But if the caller function wanted to, they could access this via # pcoll.drop. # We won't try to serialize kmsg to bytes since something already # went wrong. return pvalue.TaggedOutput("drop", incoming_item) try: to_ret = serializer.from_klio_message(kmsg, ret) metrics.success.inc() return to_ret except Exception as err: ctx.logger.error(_ERROR_MSG_KMSG_TO_BYTES.format(kmsg, err), exc_info=True) metrics.error.inc() __ack_pubsub_if_direct_gke(kmsg, ctx) # Since the returned value in the `try` clause is not tagged, that # one will be used by default by whatever executed this function, # and anything that has a tagged output value (like this dropped # one) will just be ignored, which is fine for dropped values. # But if the caller function wanted to, they could access this via # pcoll.drop. # We won't try to serialize kmsg to bytes since something already # went wrong. return pvalue.TaggedOutput("drop", incoming_item)