Пример #1
0
def test_to_klio_message_raises(klio_config, logger, monkeypatch):
    incoming = b"Not a klio message"

    with pytest.raises(gproto_message.DecodeError):
        serializer.to_klio_message(incoming, klio_config, logger)

    # Just asserting it's called - not testing the error string itself
    # to avoid making brittle tests
    assert 1 == logger.error.call_count
Пример #2
0
    def update_kmsg_metadata(self, raw_kmsg):
        """Update KlioMessage to enable partial bottom-up execution.

        Args:
            raw_kmsg (bytes): Unserialized KlioMessage
        Returns:
            bytes: KlioMessage deserialized to ``bytes`` with updated intended
                recipients metadata.
        """
        # Use `serializer.to_klio_message` instead of @handle_klio in order to
        # get the full KlioMessage object (not just the data).
        kmsg = serializer.to_klio_message(
            raw_kmsg, kconfig=self._klio.config, logger=self._klio.logger
        )

        # Make sure upstream job doesn't skip the message
        upstream_job = self._generate_upstream_job_object()
        lmtd = kmsg.metadata.intended_recipients.limited
        lmtd.recipients.extend([upstream_job])

        # Assign the current job to `trigger_children_of` so that top-down
        # execution resumes after this job is done.
        current_job = self._generate_current_job_object()
        lmtd.recipients.extend([current_job])
        lmtd.trigger_children_of.CopyFrom(current_job)
        return serializer.from_klio_message(kmsg)
Пример #3
0
 def print_debug(self, raw_message):
     klio_message = serializer.to_klio_message(raw_message,
                                               self._klio.config,
                                               self._klio.logger)
     self._klio.logger.log(self.log_level,
                           "{}{}".format(self.prefix, klio_message))
     return raw_message
Пример #4
0
def test_to_klio_message(klio_message, klio_message_str, klio_config, logger):
    actual_message = serializer.to_klio_message(
        klio_message_str, klio_config, logger
    )

    assert klio_message == actual_message
    logger.error.assert_not_called()
Пример #5
0
 def process(self, raw_message):
     klio_message = serializer.to_klio_message(raw_message)
     if self._should_process(klio_message):
         yield pvalue.TaggedOutput(
             _helpers.TaggedStates.PROCESS.value, raw_message
         )
     else:
         yield pvalue.TaggedOutput(
             _helpers.TaggedStates.DROP.value, raw_message
         )
Пример #6
0
 def process(self, raw_message):
     klio_message = serializer.to_klio_message(raw_message)
     if self._should_process(klio_message):
         # the message could have updated, so let's re-serialize to a new
         # raw message
         raw_message = klio_message.SerializeToString()
         yield pvalue.TaggedOutput(
             _helpers.TaggedStates.PROCESS.value, raw_message
         )
     else:
         yield pvalue.TaggedOutput(
             _helpers.TaggedStates.DROP.value, raw_message
         )
Пример #7
0
    def process(self, klio_message):
        # In batch, the read transform produces a KlioMessage. However, in
        # streaming, it's still bytes. And for some reason this isn't
        # pickleable when it's in its own transform.
        # TODO: maybe create a read/write klio pub/sub transform to do
        # this for us.
        if not isinstance(klio_message, klio_pb2.KlioMessage):
            klio_message = serializer.to_klio_message(klio_message)

        if klio_message.version == klio_pb2.Version.V2:
            yield pvalue.TaggedOutput("v2", klio_message.SerializeToString())
        else:
            yield pvalue.TaggedOutput("v1", klio_message.SerializeToString())
Пример #8
0
    def wrapper(self, incoming_item, *args, **kwargs):
        try:
            kmsg = serializer.to_klio_message(incoming_item, self._klio.config,
                                              self._klio.logger)
            yield from meth(self, kmsg, *args, **kwargs)

        except Exception as err:
            self._klio.logger.error(
                "Dropping KlioMessage - exception occurred when serializing "
                "'%s' to a KlioMessage.\nError: %s" % (incoming_item, err),
                exc_info=True,
            )
            return
Пример #9
0
def test_to_klio_message_allow_non_kmsg(klio_config, logger, monkeypatch):
    monkeypatch.setattr(
        klio_config.job_config, "allow_non_klio_messages", True
    )
    incoming = b"Not a klio message"
    expected = klio_pb2.KlioMessage()
    expected.data.element = incoming
    expected.version = klio_pb2.Version.V2

    actual_message = serializer.to_klio_message(incoming, klio_config, logger)

    assert expected == actual_message
    logger.error.assert_not_called()
Пример #10
0
def __serialize_klio_message_generator(self, meth, incoming_item, *args,
                                       **kwargs):
    try:
        kmsg = serializer.to_klio_message(incoming_item, self._klio.config,
                                          self._klio.logger)
    except Exception as err:
        self._klio.logger.error(
            _ERROR_MSG_KMSG_FROM_BYTES.format(incoming_item, err),
            exc_info=True,
        )
        # Since the yielded value in the `try` clause is not tagged, that
        # one will be used by default by whatever executed this function,
        # and anything that has a tagged output value (like this dropped one)
        # will just be ignored, which is fine for dropped values.
        # But if the caller function wanted to, they could access this via
        # pcoll.drop.
        yield pvalue.TaggedOutput("drop", incoming_item)
        # explicitly return so that Beam doesn't call `next` and
        # executes the next `yield`
        return

    try:
        payload = meth(self, kmsg.data, *args, **kwargs)

    except Exception as err:
        func_path = self.__class__.__name__ + "." + meth.__name__
        log_msg, exc_info = __get_user_error_message(err, func_path, kmsg)
        self._klio.logger.error(log_msg, exc_info=exc_info)
        # Since the yielded value in the `try` clause is not tagged, that
        # one will be used by default by whatever executed this function,
        # and anything that has a tagged output value (like this dropped one)
        # will just be ignored, which is fine for dropped values.
        # But if the caller function wanted to, they could access this via
        # pcoll.drop.
        # We won't try to serialize kmsg to bytes since something already
        # went wrong.
        yield pvalue.TaggedOutput("drop", incoming_item)
        # explicitly return so that Beam doesn't call `next` and
        # executes the next `yield`
        return

    else:
        if isinstance(payload, types.GeneratorType):
            for pl in payload:
                yield from __from_klio_message_generator(
                    self, kmsg, pl, incoming_item)
        else:
            yield from __from_klio_message_generator(self, kmsg, payload,
                                                     incoming_item)
Пример #11
0
    def process(self, raw_message):
        klio_message = serializer.to_klio_message(raw_message,
                                                  self._klio.config,
                                                  self._klio.logger)
        audit_log_item = self._create_audit_item()
        klio_message.metadata.job_audit_log.extend([audit_log_item])

        audit_log = klio_message.metadata.job_audit_log
        traversed_dag = " -> ".join("{}::{}".format(
            str(al.klio_job.gcp_project), str(al.klio_job.job_name))
                                    for al in audit_log)
        traversed_dag = "{} (current job)".format(traversed_dag)

        base_log_msg = "KlioMessage full audit log"
        log_msg = "{} - Entity ID: {} - Path: {}".format(
            base_log_msg, klio_message.data.entity_id, traversed_dag)
        self._klio.logger.debug(log_msg)
        yield klio_message.SerializeToString()
Пример #12
0
def __serialize_klio_message(metrics, ctx, func, incoming_item, *args,
                             **kwargs):
    metrics.received.inc()
    # manipulate `ctx` to handle both methods and functions depending on
    # what we're wrapping. Functions just have `ctx` object, but methods
    # have `self._klio` as its context, and we also need access to `self`
    # in order to call the method
    _self = ctx
    if not isinstance(ctx, core.KlioContext):
        ctx = _self._klio

    with metrics.timer:
        try:
            kmsg = serializer.to_klio_message(incoming_item, ctx.config,
                                              ctx.logger)
        except Exception as err:
            ctx.logger.error(
                _ERROR_MSG_KMSG_FROM_BYTES.format(incoming_item, err),
                exc_info=True,
            )
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(incoming_item, ctx)
            # Since the returned value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            return pvalue.TaggedOutput("drop", incoming_item)

        try:
            ret = func(_self, kmsg.data, *args, **kwargs)
            if isinstance(ret, types.GeneratorType):
                raise TypeError("can't pickle generator object: '{}'".format(
                    func.__name__))
        except TypeError:
            metrics.error.inc()
            # If we get here, we threw a type error because we found a generator
            # and those can't be pickled. But there's no need to do any special
            # error handling - this will contain enough info for the user so
            # we just re-raise
            raise

        except Exception as err:
            log_msg, exc_info = __get_user_error_message(
                err, func.__name__, kmsg)
            ctx.logger.error(log_msg, exc_info=exc_info)
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(kmsg, ctx)
            # Since the returned value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            # We won't try to serialize kmsg to bytes since something already
            # went wrong.
            return pvalue.TaggedOutput("drop", incoming_item)

        try:
            to_ret = serializer.from_klio_message(kmsg, ret)
            metrics.success.inc()
            return to_ret

        except Exception as err:
            ctx.logger.error(_ERROR_MSG_KMSG_TO_BYTES.format(kmsg, err),
                             exc_info=True)
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(kmsg, ctx)
            # Since the returned value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            # We won't try to serialize kmsg to bytes since something already
            # went wrong.
            return pvalue.TaggedOutput("drop", incoming_item)
Пример #13
0
def __serialize_klio_message_generator(metrics, self, meth, incoming_item,
                                       *args, **kwargs):
    metrics.received.inc()
    with metrics.timer:
        try:
            kmsg = serializer.to_klio_message(incoming_item, self._klio.config,
                                              self._klio.logger)
        except Exception as err:
            self._klio.logger.error(
                _ERROR_MSG_KMSG_FROM_BYTES.format(incoming_item, err),
                exc_info=True,
            )
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(incoming_item, self._klio)
            # Since the yielded value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            yield pvalue.TaggedOutput("drop", incoming_item)
            # explicitly return so that Beam doesn't call `next` and
            # executes the next `yield`
            return

        try:
            payload = meth(self, kmsg.data, *args, **kwargs)

        except Exception as err:
            func_path = self.__class__.__name__ + "." + meth.__name__
            log_msg, exc_info = __get_user_error_message(err, func_path, kmsg)
            self._klio.logger.error(log_msg, exc_info=exc_info)
            metrics.error.inc()
            __ack_pubsub_if_direct_gke(kmsg, self._klio)
            # Since the yielded value in the `try` clause is not tagged, that
            # one will be used by default by whatever executed this function,
            # and anything that has a tagged output value (like this dropped
            # one) will just be ignored, which is fine for dropped values.
            # But if the caller function wanted to, they could access this via
            # pcoll.drop.
            # We won't try to serialize kmsg to bytes since something already
            # went wrong.
            yield pvalue.TaggedOutput("drop", incoming_item)
            # explicitly return so that Beam doesn't call `next` and
            # executes the next `yield`
            return

        else:
            if isinstance(payload, types.GeneratorType):
                try:
                    for pl in payload:
                        yield from __from_klio_message_generator(
                            metrics, self, kmsg, pl, incoming_item)
                # This exception block will the execute
                # if the pl item is an Exception
                except Exception as err:
                    func_path = self.__class__.__name__ + "." + meth.__name__
                    log_msg, exc_info = __get_user_error_message(
                        err, func_path, kmsg)
                    self._klio.logger.error(log_msg, exc_info=exc_info)
                    metrics.error.inc()
                    __ack_pubsub_if_direct_gke(kmsg, self._klio)
                    # This will catch an exception present in the generator
                    # containing items yielded by a function/method
                    # decorated by @handle_klio.
                    # Following items in the generator will be ignored
                    # since an exception has already been detected.
                    # We won't try to serialize kmsg to bytes since
                    # something already went wrong.
                    yield pvalue.TaggedOutput("drop", incoming_item)
                    # explicitly return so that Beam doesn't call `next` and
                    # executes the next `yield`
                    return
            else:
                yield from __from_klio_message_generator(
                    metrics, self, kmsg, payload, incoming_item)