Exemplo n.º 1
0
    def __delivery_callback(
        self,
        future: Future[Message[KafkaPayload]],
        payload: KafkaPayload,
        error: KafkaError,
        message: ConfluentMessage,
    ) -> None:
        if error is not None:
            future.set_exception(TransportError(error))
        else:
            try:
                timestamp_type, timestamp_value = message.timestamp()
                if timestamp_type is TIMESTAMP_NOT_AVAILABLE:
                    raise ValueError("timestamp not available")

                future.set_result(
                    Message(
                        Partition(Topic(message.topic()), message.partition()),
                        message.offset(),
                        payload,
                        datetime.utcfromtimestamp(timestamp_value / 1000.0),
                    )
                )
            except Exception as error:
                future.set_exception(error)
Exemplo n.º 2
0
def msg_to_topic_partition(msg: Message) -> TopicPartition:
    """
    Takes a :class:`confluent_kafka.Message` and reads its attributes in order to create a
    :class:`confluent_kafka.TopicPartition`.

    :param msg: Message to read partition and offset information from.
    :return: The extracted partition and offset.
    """
    return TopicPartition(msg.topic(), msg.partition(), msg.offset())
Exemplo n.º 3
0
 def from_message(cls, msg: confluent_kafka.Message) -> 'Metadata':
     return cls(
         topic=msg.topic(),
         partition=msg.partition(),
         offset=msg.offset(),
         timestamp=msg.timestamp()[1],
         key=msg.key(),
         _raw=msg,
     )
Exemplo n.º 4
0
 def __on_delivery(self, error: kafka.KafkaError, message: kafka.Message):
     if error is not None:
         logging.error(
             "KafkaStorage:__on_delivery():error:{}".format(error))
     else:
         if globals.__DEBUG__:
             logging.debug(
                 "KafkaStorage:__on_delivery():topic:{}:partition:{}:offset{}"
                 .format(message.topic(), message.partition(),
                         message.offset()))
Exemplo n.º 5
0
 def commit_offset(context: RequestContext, data: Any,
                   message: confluent_kafka.Message) -> None:
     logger.debug(
         "committing topic %s partition %s offset %s",
         message.topic(),
         message.partition(),
         message.offset(),
     )
     with context.span.make_child("kafka.commit"):
         self.consumer.commit(message=message, asynchronous=False)
Exemplo n.º 6
0
 def _confluent_to_binary_message(
         self, consumed_message: Message) -> BinaryMessage:
     binary_message = BinaryMessage(
         key=consumed_message.key(),
         value=consumed_message.value(),
         partition=consumed_message.partition(),
         offset=consumed_message.offset(),
         timestamp=self._confluent_to_io_timestamp(consumed_message),
         headers=self._confluent_to_io_headers(consumed_message.headers()),
     )
     return binary_message
Exemplo n.º 7
0
    def _on_eof_received(self, msg: Message):
        key = (msg.topic(), msg.partition())
        self.__eof_reached[key] = True

        # when the sink has been restarted but is already at the end of the topic, this is how we
        # get the current offset. We need to keep committing this so the offsets in kafka won't get deleted.
        topic_partition = msg_to_topic_partition(msg)
        key = (topic_partition.topic, topic_partition.partition)
        logger.debug(f"Updating offset: {topic_partition}")
        self.__offsets[key] = topic_partition

        self._unsafe_call_and_set_status(self.on_eof_received, msg)
Exemplo n.º 8
0
def check_msg_equality(msgnum, expected, given: Message, expectFail=False):
    errs = []
    if expected['value'] != given.value():
        errs.append(f"Values do not match! Expected {expected['value']} got {given.value()}")
    if expected['key'] != given.key():
        errs.append(f"Keys do not match! Expected {expected['key']} got {given.key()}")
    if 'headers' in expected and expected['headers'] != given.headers():
        errs.append(f"Headers do not match! Expected {expected['headers']} got {given.headers()}")
    if expectFail:
        if len(errs) == 0:
            print(f"{msgnum}: expected ({expected}) matched given. Did not expect that")
            exit(1)
    elif len(errs) > 0:
        for err in errs:
            print(f"{msgnum}" + err)
        exit(255)
def notify(slack_client: WebClient, msg: confluent_kafka.Message) -> None:
    prior_plans_count, prior_times_sum, prior_reads_sum, prior_execs_sum, prior_last_execution = 0, 0, 0, 0, 0
    msg_val = dict(msg.value())
    for pp in msg_val['prior_plans']:
        prior_plans_count += 1
        prior_times_sum += pp['total_elapsed_time']
        prior_reads_sum += pp['total_logical_reads']
        prior_execs_sum += pp['execution_count']
        prior_last_execution = max(prior_last_execution, pp['last_execution_time'])

    avg_prior_time_ms = prior_times_sum / prior_execs_sum / 1000
    avg_prior_reads = prior_reads_sum / prior_execs_sum
    avg_time_ms = msg_val['total_elapsed_time'] / msg_val['execution_count'] / 1000
    avg_reads = msg_val['total_logical_reads'] / msg_val['execution_count']
    time_increase_factor = avg_time_ms / avg_prior_time_ms
    read_increase_factor = (avg_reads / avg_prior_reads) if avg_prior_reads else 0
    eviction_latency_seconds = int((msg_val['eviction_time'] - msg_val['creation_time']) / 1000)

    template = Template(config.SLACK_MESSAGE_TEMPLATE)
    template.globals['format_ts'] = common.format_ts
    rendered = template.render(msg=msg, prior_plans_count=prior_plans_count, prior_times_sum=prior_times_sum,
                               prior_reads_sum=prior_reads_sum, prior_execs_sum=prior_execs_sum,
                               prior_last_execution=prior_last_execution, avg_prior_time_ms=avg_prior_time_ms,
                               avg_prior_reads=avg_prior_reads, avg_time_ms=avg_time_ms, avg_reads=avg_reads,
                               time_increase_factor=time_increase_factor, read_increase_factor=read_increase_factor,
                               eviction_latency_seconds=eviction_latency_seconds, hostname=socket.getfqdn())

    if slack_client:
        try:
            if config.SLACK_POST_AS_BLOCKS:
                slack_client.chat_postMessage(channel=config.SLACK_NOTIFY_CHANNEL, blocks=json.loads(rendered))
            else:
                slack_client.chat_postMessage(channel=config.SLACK_NOTIFY_CHANNEL, text=rendered)
        except SlackApiError as e:
            logger.warning(f"Error sending message to Slack: {e.response.get('error', '<none>')}")
Exemplo n.º 10
0
 def process_message(
         self, message: Message) -> Optional[MutableMapping[str, Any]]:
     message = msgpack.unpackb(message.value(), use_list=False)
     profile = cast(Dict[str, Any], json.loads(message["payload"]))
     profile.update({
         "organization_id": message["organization_id"],
         "project_id": message["project_id"],
         "received": message["received"],
     })
     return profile
Exemplo n.º 11
0
    def __decode(self, message: Message):
        """
        Decode a message coming from Kafka.
        It will become a Python Dict
        """
        value = message.value()  # can be None, str, bytes

        if value:
            value = json.loads(value, encoding='utf-8')

        return value
Exemplo n.º 12
0
    def __handle_msg(self, message: Message) -> CommitDecision:
        value = json.loads(message.value())
        parsed_message = parse_control_message(value)

        if isinstance(parsed_message, SnapshotInit):
            commit_decision = self.__recovery_state.process_init(parsed_message)
        elif isinstance(parsed_message, SnapshotAbort):
            commit_decision = self.__recovery_state.process_abort(parsed_message)
        elif isinstance(parsed_message, SnapshotLoaded):
            commit_decision = self.__recovery_state.process_snapshot_loaded(
                parsed_message,
            )
        else:
            logger.warning("Received an unrecognized message: %r", parsed_message)
            commit_decision = CommitDecision.DO_NOT_COMMIT

        return commit_decision
Exemplo n.º 13
0
def create_incident(message: Message, topic: str) -> dict:
    """Create incident from kafka's message.

    Args:
        message: Kafka message to create incident from
        topic: Message's topic

    Return incident
    """
    message_value = message.value()
    raw = {
        'Topic': topic,
        'Partition': message.partition(),
        'Offset': message.offset(),
        'Message': message_value.decode('utf-8')
    }
    incident = {
        'name':
        'Kafka {} partition:{} offset:{}'.format(topic, message.partition(),
                                                 message.offset()),
        'details':
        message_value.decode('utf-8'),
        'rawJSON':
        json.dumps(raw)
    }

    timestamp = message.timestamp(
    )  # returns a list of [timestamp_type, timestamp]
    if timestamp and len(
            timestamp) == 2 and timestamp[0] != TIMESTAMP_NOT_AVAILABLE:
        incident['occurred'] = timestamp_to_datestring(timestamp[1])

    demisto.debug(
        f"Creating incident from topic {topic} partition {message.partition()} offset {message.offset()}"
    )
    return incident
Exemplo n.º 14
0
 def message_to_metadata(self, message: _Message) -> RecordMetadata:
     topic, partition = tp = TP(message.topic(), message.partition())
     return RecordMetadata(topic, partition, tp, message.offset())
Exemplo n.º 15
0
    def process_message(self, message: confluent_kafka.Message) -> None:
        self.total_count += 1

        if message.partition() not in self._last_processed_offset_by_partition:
            self._last_processed_offset_by_partition[message.partition()] = -1

        self.missing_offsets += (
            message.offset() -
            self._last_processed_offset_by_partition[message.partition()] - 1)
        self._last_processed_offset_by_partition[
            message.partition()] = message.offset()

        if message.value() is None:
            self.tombstone_count += 1
            return

        if constants.UNIFIED_TOPIC_MSG_DATA_WRAPPER_NAME in message.value():
            message_body = dict(
                message.value()[constants.UNIFIED_TOPIC_MSG_DATA_WRAPPER_NAME])
        else:
            message_body = dict(message.value())

        key = extract_key_tuple(self.table, message_body)
        operation_name = message_body[constants.OPERATION_NAME]

        if operation_name == constants.SNAPSHOT_OPERATION_NAME:
            self.snapshot_count += 1
            self.keys_seen_in_snapshots.add(key)
            if self.min_snapshot_key_seen is None or key < self.min_snapshot_key_seen:
                self.min_snapshot_key_seen = key
            if self.max_snapshot_key_seen is None or key > self.max_snapshot_key_seen:
                self.max_snapshot_key_seen = key
            if message.partition() in self._last_snapshot_key_seen_for_partition and \
                    self._last_snapshot_key_seen_for_partition[message.partition()] < key:
                self.snapshot_key_order_regressions_count += 1
                print(message.offset())
            self._last_snapshot_key_seen_for_partition[
                message.partition()] = key
            return

        if operation_name == constants.DELETE_OPERATION_NAME:
            self.all_deletes_in_topic += 1
            self.deleted_keys.add(key)

        msg_change_index = change_index.ChangeIndex.from_avro_ready_dict(
            message_body)
        if msg_change_index.lsn < self.table.min_lsn:
            # the live change table has been truncated and no longer has this entry
            return

        if operation_name == constants.DELETE_OPERATION_NAME:
            self.delete_count += 1
        elif operation_name == constants.INSERT_OPERATION_NAME:
            self.insert_count += 1
        elif operation_name == constants.POST_UPDATE_OPERATION_NAME:
            self.update_count += 1
        else:
            self.unknown_operation_count += 1
            return

        change_idx = change_index.ChangeIndex.from_avro_ready_dict(
            message_body)
        self.keys_seen_in_changes.add(key)
        if self.min_change_index_seen is None or change_idx < self.min_change_index_seen:
            self.min_change_index_seen = change_idx
        if self.max_change_index_seen is None or change_idx > self.max_change_index_seen:
            self.max_change_index_seen = change_idx
        if message.partition() in self._last_change_index_seen_for_partition and \
                self._last_change_index_seen_for_partition[message.partition()] > change_idx:
            self.change_index_order_regressions_count += 1
        self._last_change_index_seen_for_partition[
            message.partition()] = change_idx
        event_time = datetime.datetime.fromisoformat(
            message_body[constants.EVENT_TIME_NAME])
        if self.latest_change_seen is None or event_time > self.latest_change_seen:
            self.latest_change_seen = event_time
        return
Exemplo n.º 16
0
def raise_for_message(message: Message):
    if message is None:
        raise MessageEmptyException
    elif message.error() is not None:
        raise_for_kafka_error(message.error())
Exemplo n.º 17
0
 def _on_message_received(self, msg: Message):
     self.__eof_reached[(msg.topic(), msg.partition())] = False
     self._unsafe_call_and_set_status(self.on_message_received, msg)
     self._update_offset_from_message(msg)
Exemplo n.º 18
0
 def delivery_callback(error: KafkaError, message: Message) -> None:
     if error is not None:
         raise error
     else:
         logger.info("Message sent %r", message.value())
Exemplo n.º 19
0
 def _confluent_to_io_timestamp(
         consumed_message: Message) -> datetime.datetime:
     return datetime.datetime.fromtimestamp(
         consumed_message.timestamp()[1] / 1000, tz=datetime.timezone.utc)
Exemplo n.º 20
0
    def handle(self, message: confluent_kafka.Message) -> None:
        context = self.baseplate.make_context_object()
        try:
            # We place the call to ``baseplate.make_server_span`` inside the
            # try/except block because we still want Baseplate to see and
            # handle the error (publish it to error reporting)
            with self.baseplate.make_server_span(
                    context, f"{self.name}.handler") as span:
                error = message.error()
                if error:
                    # this isn't a real message, but is an error from Kafka
                    raise ValueError(f"KafkaError: {error.str()}")

                topic = message.topic()
                offset = message.offset()
                partition = message.partition()

                span.set_tag("kind", "consumer")
                span.set_tag("kafka.topic", topic)
                span.set_tag("kafka.key", message.key())
                span.set_tag("kafka.partition", partition)
                span.set_tag("kafka.offset", offset)
                span.set_tag("kafka.timestamp", message.timestamp())

                blob: bytes = message.value()

                try:
                    data = self.message_unpack_fn(blob)
                except Exception:
                    logger.error("skipping invalid message")
                    context.span.incr_tag(
                        f"{self.name}.{topic}.invalid_message")
                    return

                try:
                    ingest_timestamp_ms = data["endpoint_timestamp"]
                    now_ms = int(time.time() * 1000)
                    message_latency = (now_ms - ingest_timestamp_ms) / 1000
                except (KeyError, TypeError):
                    # we can't guarantee that all publishers populate this field
                    # v2 events publishers (event collectors) do, but future
                    # kafka publishers may not
                    message_latency = None

                self.handler_fn(context, data, message)

                if self.on_success_fn:
                    self.on_success_fn(context, data, message)

                if message_latency is not None:
                    context.metrics.timer(f"{self.name}.{topic}.latency").send(
                        message_latency)

                context.metrics.gauge(
                    f"{self.name}.{topic}.offset.{partition}").replace(offset)
        except Exception:
            # let this exception crash the server so we'll stop processing messages
            # and won't commit offsets. when the server restarts it will get
            # this message again and try to process it.
            logger.exception(
                "Unhandled error while trying to process a message, terminating the server"
            )
            raise
Exemplo n.º 21
0
def raise_delivery_errors(kafka_error: confluent_kafka.KafkaError,
                          msg: confluent_kafka.Message) -> None:
    if kafka_error is not None:
        raise KafkaException.from_kafka_error(kafka_error)
    elif msg.error() is not None:
        raise KafkaException.from_kafka_error(msg.error())
Exemplo n.º 22
0
def kafka_producer_delivery_cb(err: confluent_kafka.KafkaError, msg: confluent_kafka.Message) -> None:
    if err is not None:
        logger.error("Delivery failed for record %s: %s", msg.key(), err)
Exemplo n.º 23
0
def decode_standard_message(msg: Message) -> dict:
    return json.loads(msg.value().decode('utf-8'))
Exemplo n.º 24
0
    def handle_message(self, message: Message) -> None:
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        with sentry_sdk.push_scope() as scope:
            try:
                with metrics.timer(
                        "snuba_query_subscriber.parse_message_value"):
                    contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                with metrics.timer(
                        "snuba_query_subscriber.fetch_subscription"):
                    subscription: QuerySubscription = QuerySubscription.objects.get_from_cache(
                        subscription_id=contents["subscription_id"])
                    if subscription.status != QuerySubscription.Status.ACTIVE.value:
                        metrics.incr(
                            "snuba_query_subscriber.subscription_inactive")
                        return
            except QuerySubscription.DoesNotExist:
                metrics.incr(
                    "snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                try:
                    _delete_from_snuba(self.topic_to_dataset[message.topic()],
                                       contents["subscription_id"])
                except Exception:
                    logger.exception(
                        "Failed to delete unused subscription from snuba.")
                return

            if subscription.type not in subscriber_registry:
                metrics.incr(
                    "snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            sentry_sdk.set_tag("project_id", subscription.project_id)
            sentry_sdk.set_tag("query_subscription_id",
                               contents["subscription_id"])

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(
                    op="process_message") as span, metrics.timer(
                        "snuba_query_subscriber.callback.duration",
                        instance=subscription.type):
                span.set_data("payload", contents)
                span.set_data("subscription_dataset",
                              subscription.snuba_query.dataset)
                span.set_data("subscription_query",
                              subscription.snuba_query.query)
                span.set_data("subscription_aggregation",
                              subscription.snuba_query.aggregate)
                span.set_data("subscription_time_window",
                              subscription.snuba_query.time_window)
                span.set_data("subscription_resolution",
                              subscription.snuba_query.resolution)
                span.set_data("message_offset", message.offset())
                span.set_data("message_partition", message.partition())
                span.set_data("message_value", message.value())

                callback(contents, subscription)
Exemplo n.º 25
0
def decode_image_message(msg: Message) -> np.array:
    img_mat = np.fromstring(msg.value(), dtype=np.uint8)
    return cv2.imdecode(img_mat, -1)
Exemplo n.º 26
0
    def handle_message(self, message: Message) -> None:
        """
        Parses the value from Kafka, and if valid passes the payload to the callback defined by the
        subscription. If the subscription has been removed, or no longer has a valid callback then
        just log metrics/errors and continue.
        :param message:
        :return:
        """
        # set a commit time deadline only after the first message for this batch is seen
        if not self.__batch_deadline:
            self.__batch_deadline = self.commit_batch_timeout_ms / 1000.0 + time.time()

        with sentry_sdk.push_scope() as scope:
            try:
                with metrics.timer("snuba_query_subscriber.parse_message_value"):
                    contents = self.parse_message_value(message.value())
            except InvalidMessageError:
                # If the message is in an invalid format, just log the error
                # and continue
                logger.exception(
                    "Subscription update could not be parsed",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return
            scope.set_tag("query_subscription_id", contents["subscription_id"])

            try:
                with metrics.timer("snuba_query_subscriber.fetch_subscription"):
                    subscription: QuerySubscription = QuerySubscription.objects.get_from_cache(
                        subscription_id=contents["subscription_id"]
                    )
                    if subscription.status != QuerySubscription.Status.ACTIVE.value:
                        metrics.incr("snuba_query_subscriber.subscription_inactive")
                        return
            except QuerySubscription.DoesNotExist:
                metrics.incr("snuba_query_subscriber.subscription_doesnt_exist")
                logger.error(
                    "Received subscription update, but subscription does not exist",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                try:
                    if "entity" in contents:
                        entity_key = contents["entity"]
                    else:
                        # XXX(ahmed): Remove this logic. This was kept here as backwards compat
                        # for subscription updates with schema version `2`. However schema version 3
                        # sends the "entity" in the payload
                        entity_regex = r"^(MATCH|match)[ ]*\(([^)]+)\)"
                        entity_match = re.match(entity_regex, contents["request"]["query"])
                        if not entity_match:
                            raise InvalidMessageError(
                                "Unable to fetch entity from query in message"
                            )
                        entity_key = entity_match.group(2)
                    _delete_from_snuba(
                        self.topic_to_dataset[message.topic()],
                        contents["subscription_id"],
                        EntityKey(entity_key),
                    )
                except InvalidMessageError as e:
                    logger.exception(e)
                except Exception:
                    logger.exception("Failed to delete unused subscription from snuba.")
                return

            if subscription.type not in subscriber_registry:
                metrics.incr("snuba_query_subscriber.subscription_type_not_registered")
                logger.error(
                    "Received subscription update, but no subscription handler registered",
                    extra={
                        "offset": message.offset(),
                        "partition": message.partition(),
                        "value": message.value(),
                    },
                )
                return

            sentry_sdk.set_tag("project_id", subscription.project_id)
            sentry_sdk.set_tag("query_subscription_id", contents["subscription_id"])

            callback = subscriber_registry[subscription.type]
            with sentry_sdk.start_span(op="process_message") as span, metrics.timer(
                "snuba_query_subscriber.callback.duration", instance=subscription.type
            ):
                span.set_data("payload", contents)
                span.set_data("subscription_dataset", subscription.snuba_query.dataset)
                span.set_data("subscription_query", subscription.snuba_query.query)
                span.set_data("subscription_aggregation", subscription.snuba_query.aggregate)
                span.set_data("subscription_time_window", subscription.snuba_query.time_window)
                span.set_data("subscription_resolution", subscription.snuba_query.resolution)
                span.set_data("message_offset", message.offset())
                span.set_data("message_partition", message.partition())
                span.set_data("message_value", message.value())

                callback(contents, subscription)