示例#1
0
def extract_http(output: MutableMapping[str, Any],
                 request: Mapping[str, Any]) -> None:
    http_headers: Mapping[str,
                          Any] = _as_dict_safe(request.get("headers", None))
    output["http_method"] = _unicodify(request.get("method", None))
    output["http_referer"] = _unicodify(http_headers.get("Referer", None))
    output["http_url"] = _unicodify(request.get("url", None))
示例#2
0
    def extract_custom(
        self,
        output: MutableMapping[str, Any],
        event: Mapping[str, Any],
        metadata: Optional[KafkaMessageMetadata] = None,
    ) -> None:
        data = event.get("data", {})
        user_dict = data.get("user", data.get("sentry.interfaces.User",
                                              None)) or {}

        user_data: MutableMapping[str, Any] = {}
        extract_user(user_data, user_dict)
        output["user_name"] = user_data["username"]
        output["user_id"] = user_data["user_id"]
        output["user_email"] = user_data["email"]

        ip_address = _ensure_valid_ip(user_data["ip_address"])
        if ip_address:
            if ip_address.version == 4:
                output["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                output["ip_address_v6"] = str(ip_address)

        contexts = _as_dict_safe(data.get("contexts", None))
        geo = user_dict.get("geo", {})
        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        request = data.get("request", data.get("sentry.interfaces.Http",
                                               None)) or {}
        if "request" not in contexts and isinstance(request, dict):
            http = {}
            http["http_method"] = _unicodify(request.get("method", None))
            http_headers = _as_dict_safe(request.get("headers", None))
            http["http_referer"] = _unicodify(http_headers.get(
                "Referer", None))
            contexts["request"] = http

        # _as_dict_safe may not return a reference to the entry in the data
        # dictionary in some cases.
        data["contexts"] = contexts

        output["message"] = _unicodify(event["message"])
        output["org_id"] = event["organization_id"]
示例#3
0
    def process_insert(
        self,
        event: Mapping[str, Any],
        metadata: Optional[KafkaMessageMetadata] = None
    ) -> Optional[Mapping[str, Any]]:
        if not self._should_process(event):
            return None

        processed = {"deleted": 0}
        extract_project_id(processed, event)
        self._extract_event_id(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.strptime(event["datetime"],
                              settings.PAYLOAD_DATETIME_FORMAT),
        )

        self.extract_required(processed, event)

        data = event.get("data", {})
        # HACK: https://sentry.io/sentry/snuba/issues/802102397/
        if not data:
            logger.error("No data for event: %s", event, exc_info=True)
            return None
        self.extract_common(processed, event, metadata)
        self.extract_custom(processed, event, metadata)

        sdk = data.get("sdk", None) or {}
        self.extract_sdk(processed, sdk)

        tags = _as_dict_safe(data.get("tags", None))
        self.extract_promoted_tags(processed, tags)
        self.extract_tags_custom(processed, event, tags, metadata)

        contexts = data.get("contexts", None) or {}
        self.extract_promoted_contexts(processed, contexts, tags)
        self.extract_contexts_custom(processed, event, contexts, metadata)

        processed["contexts.key"], processed[
            "contexts.value"] = extract_extra_contexts(contexts)
        processed["tags.key"], processed["tags.value"] = extract_extra_tags(
            tags)
        processed["_tags_flattened"] = flatten_nested_field(
            processed["tags.key"], processed["tags.value"])

        exception = (data.get("exception",
                              data.get("sentry.interfaces.Exception", None))
                     or {})
        stacks = exception.get("values", None) or []
        self.extract_stacktraces(processed, stacks)

        if metadata is not None:
            processed["offset"] = metadata.offset
            processed["partition"] = metadata.partition

        return processed
示例#4
0
    def extract_custom(
        self,
        output: MutableMapping[str, Any],
        event: InsertEvent,
        metadata: KafkaMessageMetadata,
    ) -> None:
        data = event.get("data", {})
        user_dict = data.get("user", data.get("sentry.interfaces.User",
                                              None)) or {}

        user_data: MutableMapping[str, Any] = {}
        extract_user(user_data, user_dict)
        output["user_name"] = user_data["username"]
        output["user_id"] = user_data["user_id"]
        output["user_email"] = user_data["email"]

        ip_address = _ensure_valid_ip(user_data["ip_address"])
        if ip_address:
            if ip_address.version == 4:
                output["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                output["ip_address_v6"] = str(ip_address)

        contexts: MutableMapping[str, Any] = _as_dict_safe(
            data.get("contexts", None))
        geo = user_dict.get("geo", {})
        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        request = data.get("request", data.get("sentry.interfaces.Http",
                                               None)) or {}
        http_data: MutableMapping[str, Any] = {}
        extract_http(http_data, request)
        output["http_method"] = http_data["http_method"]
        output["http_referer"] = http_data["http_referer"]

        # _as_dict_safe may not return a reference to the entry in the data
        # dictionary in some cases.
        data["contexts"] = contexts

        output["message"] = _unicodify(event["message"])

        output["primary_hash"] = str(uuid.UUID(_hashify(
            event["primary_hash"])))
        output["hierarchical_hashes"] = list(
            str(uuid.UUID(_hashify(x)))
            for x in data.get("hierarchical_hashes") or ())

        output["culprit"] = _unicodify(data.get("culprit", ""))
        output["type"] = _unicodify(data.get("type", ""))
        output["title"] = _unicodify(data.get("title", ""))
示例#5
0
    def process_insert(self, message, metadata=None):
        processed = {'deleted': 0}
        extract_base(processed, message)
        processed["retention_days"] = enforce_retention(
            message,
            datetime.strptime(message['datetime'],
                              settings.PAYLOAD_DATETIME_FORMAT),
        )

        self.extract_required(processed, message)

        data = message.get('data', {})
        # HACK: https://sentry.io/sentry/snuba/issues/802102397/
        if not data:
            logger.error('No data for event: %s', message, exc_info=True)
            return None
        self.extract_common(processed, message, data)

        sdk = data.get('sdk', None) or {}
        self.extract_sdk(processed, sdk)

        tags = _as_dict_safe(data.get('tags', None))
        self.extract_promoted_tags(processed, tags)

        contexts = data.get('contexts', None) or {}
        self.extract_promoted_contexts(processed, contexts, tags)

        user = data.get('user', data.get('sentry.interfaces.User', None)) or {}
        extract_user(processed, user)

        geo = user.get('geo', None) or {}
        self.extract_geo(processed, geo)

        http = data.get('request', data.get('sentry.interfaces.Http',
                                            None)) or {}
        self.extract_http(processed, http)

        extract_extra_contexts(processed, contexts)
        extract_extra_tags(processed, tags)

        exception = data.get(
            'exception', data.get('sentry.interfaces.Exception', None)) or {}
        stacks = exception.get('values', None) or []
        self.extract_stacktraces(processed, stacks)

        if metadata is not None:
            processed['offset'] = metadata.offset
            processed['partition'] = metadata.partition

        return processed
示例#6
0
    def __fill_common(self, span: MutableMapping[str, Any],
                      data: Mapping[str, Any]) -> None:
        """
        Fills in the fields that have the same structure between transactions and spans
        but that come from a different dictionary.
        """
        span["start_ts"], span["start_ns"] = self.__extract_timestamp(
            data["start_timestamp"], )
        span["finish_ts"], span["finish_ns"] = self.__extract_timestamp(
            data["timestamp"], )
        duration_secs = (span["finish_ts"] - span["start_ts"]).total_seconds()
        # duration is in milliseconds
        span["duration_ms"] = max(int(duration_secs * 1000), 0)

        tags = _as_dict_safe(data.get("tags", None))
        span["tags.key"], span["tags.value"] = extract_extra_tags(tags)
示例#7
0
    def _sanitize_contexts(
        self,
        processed: MutableMapping[str, Any],
        event_dict: EventDict,
    ) -> MutableMapping[str, Any]:
        """
        Contexts can store a lot of data. We don't want to store all the data in
        the database. This api removes elements from contexts which are not required
        to be stored. It does that by creating a deepcopy of the contexts and working
        with the deepcopy so that the contexts in the original message are not
        mutated. It returns the new modified context object which can be used to fill
        in the processed object.
        """
        contexts: MutableMapping[str,
                                 Any] = _as_dict_safe(event_dict["data"].get(
                                     "contexts", None))
        if not contexts:
            return {}

        sanitized_context = copy.deepcopy(contexts)
        # We store trace_id and span_id as promoted columns and on the query level
        # we make sure that all queries on contexts[trace.trace_id/span_id] use those promoted
        # columns instead. So we don't need to store them in the contexts array as well
        transaction_ctx = sanitized_context.get("trace", {})
        transaction_ctx.pop("trace_id", None)
        transaction_ctx.pop("span_id", None)

        # The hash and exclusive_time is being stored in the spans columns
        # so there is no need to store it again in the context array.
        transaction_ctx.pop("hash", None)
        transaction_ctx.pop("exclusive_time", None)

        skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get(
            processed["project_id"], set())
        for context in skipped_contexts:
            if context in sanitized_context:
                del sanitized_context[context]

        return sanitized_context
示例#8
0
    def _process_tags(
        self,
        processed: MutableMapping[str, Any],
        event_dict: EventDict,
    ) -> None:

        tags: Mapping[str, Any] = _as_dict_safe(event_dict["data"].get(
            "tags", None))
        processed["tags.key"], processed["tags.value"] = extract_extra_tags(
            tags)
        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event_dict.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", event_dict["data"].get("dist")), )
示例#9
0
    def _process_contexts_and_user(
        self,
        processed: MutableMapping[str, Any],
        event_dict: EventDict,
    ) -> None:
        contexts: MutableMapping[str,
                                 Any] = _as_dict_safe(event_dict["data"].get(
                                     "contexts", None))
        user_dict = (event_dict["data"].get(
            "user", event_dict["data"].get("sentry.interfaces.User", None))
                     or {})
        geo = user_dict.get("geo", None) or {}

        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get(
            processed["project_id"], set())
        for context in skipped_contexts:
            if context in contexts:
                del contexts[context]

        sanitized_contexts = self._sanitize_contexts(processed, event_dict)
        processed["contexts.key"], processed[
            "contexts.value"] = extract_extra_contexts(sanitized_contexts)

        user_data: MutableMapping[str, Any] = {}
        extract_user(user_data, user_dict)
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])
        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)
示例#10
0
    def process_message(self, message, metadata) -> Optional[ProcessedMessage]:
        processed = {"deleted": 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != "insert":
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.fromtimestamp(data["timestamp"]),
        )
        if not data.get("contexts", {}).get("trace"):
            return None

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        try:
            processed["event_id"] = str(uuid.UUID(processed["event_id"]))
            processed["trace_id"] = str(uuid.UUID(trace_id))
            processed["span_id"] = int(transaction_ctx["span_id"], 16)
            processed["transaction_op"] = _unicodify(
                transaction_ctx.get("op") or "")
            processed["transaction_name"] = _unicodify(
                data.get("transaction") or "")
            processed[
                "start_ts"], processed["start_ms"] = self.__extract_timestamp(
                    data["start_timestamp"], )

            status = transaction_ctx.get("status", None)
            if status:
                int_status = SPAN_STATUS_NAME_TO_CODE.get(
                    status, UNKNOWN_SPAN_STATUS)
            else:
                int_status = UNKNOWN_SPAN_STATUS

            processed["transaction_status"] = int_status

            if data["timestamp"] - data["start_timestamp"] < 0:
                # Seems we have some negative durations in the DB
                metrics.increment("negative_duration")
        except Exception:
            # all these fields are required but we saw some events go through here
            # in the past.  For now bail.
            return
        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        duration_secs = (processed["finish_ts"] -
                         processed["start_ts"]).total_seconds()
        processed["duration"] = max(int(duration_secs * 1000), 0)

        processed["platform"] = _unicodify(event["platform"])

        tags = _as_dict_safe(data.get("tags", None))
        processed["tags.key"], processed["tags.value"] = extract_extra_tags(
            tags)
        processed["_tags_flattened"] = flatten_nested_field(
            processed["tags.key"], processed["tags.value"])

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts = _as_dict_safe(data.get("contexts", None))

        user_dict = data.get("user", data.get("sentry.interfaces.User",
                                              None)) or {}
        geo = user_dict.get("geo", None) or {}
        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        measurements = data.get("measurements")
        if measurements is not None:
            try:
                (
                    processed["measurements.key"],
                    processed["measurements.value"],
                ) = extract_nested(measurements,
                                   lambda value: float(value["value"]))
            except Exception:
                # Not failing the event in this case just yet, because we are still
                # developing this feature.
                logger.error(
                    "Invalid measurements field.",
                    extra={"measurements": measurements},
                    exc_info=True,
                )
        request = data.get("request", data.get("sentry.interfaces.Http",
                                               None)) or {}
        http_data: MutableMapping[str, Any] = {}
        extract_http(http_data, request)
        processed["http_method"] = http_data["http_method"]
        processed["http_referer"] = http_data["http_referer"]

        processed["contexts.key"], processed[
            "contexts.value"] = extract_extra_contexts(contexts)
        processed["_contexts_flattened"] = flatten_nested_field(
            processed["contexts.key"], processed["contexts.value"])

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data = {}
        extract_user(user_data, user_dict)
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        processed["partition"] = metadata.partition
        processed["offset"] = metadata.offset

        sdk = data.get("sdk", None) or {}
        processed["sdk_name"] = _unicodify(sdk.get("name") or "")
        processed["sdk_version"] = _unicodify(sdk.get("version") or "")

        if processed["sdk_name"] == "":
            metrics.increment("missing_sdk_name")
        if processed["sdk_version"] == "":
            metrics.increment("missing_sdk_version")

        return InsertBatch([processed])
示例#11
0
 def extract_http(self, output, http):
     output['http_method'] = _unicodify(http.get('method', None))
     http_headers = _as_dict_safe(http.get('headers', None))
     output['http_referer'] = _unicodify(http_headers.get('Referer', None))
示例#12
0
    def process_message(
            self, message: Tuple[int, str, Any],
            metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]:
        processed: MutableMapping[str, Any] = {"deleted": 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != "insert":
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)

        try:
            # We are purposely using a naive datetime here to work with the
            # rest of the codebase. We can be confident that clients are only
            # sending UTC dates.
            processed["retention_days"] = enforce_retention(
                event,
                datetime.utcfromtimestamp(data["timestamp"]),
            )
        except EventTooOld:
            return None

        if not data.get("contexts", {}).get("trace"):
            return None

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        processed["event_id"] = str(uuid.UUID(processed["event_id"]))
        processed["trace_id"] = str(uuid.UUID(trace_id))
        processed["span_id"] = int(transaction_ctx["span_id"], 16)
        processed["transaction_op"] = _unicodify(
            transaction_ctx.get("op") or "")
        processed["transaction_name"] = _unicodify(
            data.get("transaction") or "")
        processed["start_ts"], processed[
            "start_ms"] = self.__extract_timestamp(data["start_timestamp"], )
        status = transaction_ctx.get("status", None)
        if status:
            int_status = SPAN_STATUS_NAME_TO_CODE.get(status,
                                                      UNKNOWN_SPAN_STATUS)
        else:
            int_status = UNKNOWN_SPAN_STATUS

        processed["transaction_status"] = int_status
        if data["timestamp"] - data["start_timestamp"] < 0:
            # Seems we have some negative durations in the DB
            metrics.increment("negative_duration")

        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        duration_secs = (processed["finish_ts"] -
                         processed["start_ts"]).total_seconds()
        processed["duration"] = max(int(duration_secs * 1000), 0)

        processed["platform"] = _unicodify(event["platform"])

        tags: Mapping[str, Any] = _as_dict_safe(data.get("tags", None))
        processed["tags.key"], processed["tags.value"] = extract_extra_tags(
            tags)

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts: MutableMapping[str, Any] = _as_dict_safe(
            data.get("contexts", None))

        user_dict = data.get("user", data.get("sentry.interfaces.User",
                                              None)) or {}
        geo = user_dict.get("geo", None) or {}
        if "geo" not in contexts and isinstance(geo, dict):
            contexts["geo"] = geo

        measurements = data.get("measurements")
        if measurements is not None:
            try:
                (
                    processed["measurements.key"],
                    processed["measurements.value"],
                ) = extract_nested(
                    measurements,
                    lambda value: float(value["value"])
                    if (value is not None and isinstance(
                        value.get("value"), numbers.Number)) else None,
                )
            except Exception:
                # Not failing the event in this case just yet, because we are still
                # developing this feature.
                logger.error(
                    "Invalid measurements field.",
                    extra={"measurements": measurements},
                    exc_info=True,
                )

        breakdowns = data.get("breakdowns")
        if breakdowns is not None:
            span_op_breakdowns = breakdowns.get("span_ops")
            if span_op_breakdowns is not None:
                try:
                    (
                        processed["span_op_breakdowns.key"],
                        processed["span_op_breakdowns.value"],
                    ) = extract_nested(
                        span_op_breakdowns,
                        lambda value: float(value["value"])
                        if (value is not None and isinstance(
                            value.get("value"), numbers.Number)) else None,
                    )
                except Exception:
                    # Not failing the event in this case just yet, because we are still
                    # developing this feature.
                    logger.error(
                        "Invalid breakdowns.span_ops field.",
                        extra={"span_op_breakdowns": span_op_breakdowns},
                        exc_info=True,
                    )

        request = data.get("request", data.get("sentry.interfaces.Http",
                                               None)) or {}
        http_data: MutableMapping[str, Any] = {}
        extract_http(http_data, request)
        processed["http_method"] = http_data["http_method"]
        processed["http_referer"] = http_data["http_referer"]

        skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get(
            processed["project_id"], set())
        for context in skipped_contexts:
            if context in contexts:
                del contexts[context]

        processed["contexts.key"], processed[
            "contexts.value"] = extract_extra_contexts(contexts)

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data: MutableMapping[str, Any] = {}
        extract_user(user_data, user_dict)
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        processed["partition"] = metadata.partition
        processed["offset"] = metadata.offset

        sdk = data.get("sdk", None) or {}
        processed["sdk_name"] = _unicodify(sdk.get("name") or "")
        processed["sdk_version"] = _unicodify(sdk.get("version") or "")

        if processed["sdk_name"] == "":
            metrics.increment("missing_sdk_name")
        if processed["sdk_version"] == "":
            metrics.increment("missing_sdk_version")

        return InsertBatch([processed], None)
示例#13
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        action_type = ProcessorAction.INSERT
        processed = {'deleted': 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != 'insert':
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.fromtimestamp(data['timestamp']),
        )

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        processed["event_id"] = str(uuid.UUID(processed["event_id"]))
        processed["trace_id"] = str(uuid.UUID(trace_id))
        processed["span_id"] = int(transaction_ctx["span_id"], 16)
        processed["transaction_op"] = _unicodify(transaction_ctx.get("op", ""))
        processed["transaction_name"] = _unicodify(data["transaction"])

        processed["start_ts"], processed[
            "start_ms"] = self.__extract_timestamp(data["start_timestamp"], )
        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        processed['platform'] = _unicodify(event['platform'])

        tags = _as_dict_safe(data.get('tags', None))
        extract_extra_tags(processed, tags)

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts = _as_dict_safe(data.get('contexts', None))
        extract_extra_contexts(processed, contexts)

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data = {}
        extract_user(user_data, data.get("user", {}))
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        if metadata is not None:
            processed['partition'] = metadata.partition
            processed['offset'] = metadata.offset

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )
示例#14
0
 def extract_http(self, output, http):
     output["http_method"] = _unicodify(http.get("method", None))
     http_headers = _as_dict_safe(http.get("headers", None))
     output["http_referer"] = _unicodify(http_headers.get("Referer", None))
示例#15
0
    def process_message(self, message, metadata) -> Optional[ProcessedMessage]:
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != "insert":
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None

        ret: List[MutableMapping[str, Any]] = []

        # Add the transaction span
        transaction_ctx = data["contexts"].get("trace")
        if not transaction_ctx:
            metrics.increment("missing_trace_ctx")
            return None

        # Add the transaction root span
        processed = self.__init_span(event)
        processed["span_id"] = self.__safe_extract_int(
            "transaction:span_id", transaction_ctx["span_id"], False)
        if processed["span_id"] is None:
            return None
        processed["transaction_name"] = _unicodify(
            data.get("transaction") or "")
        processed["parent_span_id"] = self.__safe_extract_int(
            "transaction:parent_span_id",
            transaction_ctx.get("parent_span_id"), True)

        processed["description"] = _unicodify(data.get("transaction") or "")
        processed["op"] = _unicodify(transaction_ctx.get("op") or "")
        status = transaction_ctx.get("status", None)
        self.__fill_status(processed, status)
        self.__fill_common(processed, event["data"])
        ret.append(processed)

        spans = data.get("spans", [])
        for span in spans:
            processed = self.__init_span(event)
            processed["span_id"] = self.__safe_extract_int(
                "span:span_id", span["span_id"], False)
            if processed["span_id"] is None:
                return None
            processed["parent_span_id"] = self.__safe_extract_int(
                "span:parent_span_id", span.get("parent_span_id"), True)

            processed["description"] = span.get("description", "") or ""
            processed["op"] = span["op"]
            tags = _as_dict_safe(span.get("tags", None))
            processed["tags.key"], processed[
                "tags.value"] = extract_extra_tags(tags)

            status = span.get("status", None)
            self.__fill_status(processed, status)
            self.__fill_common(processed, span)
            ret.append(processed)

        if ret:
            return InsertBatch(ret)
        else:
            return None
示例#16
0
    def process_message(self,
                        message,
                        metadata=None) -> Optional[ProcessedMessage]:
        action_type = ProcessorAction.INSERT
        processed = {'deleted': 0}
        if not (isinstance(message, (list, tuple)) and len(message) >= 2):
            return None
        version = message[0]
        if version not in (0, 1, 2):
            return None
        type_, event = message[1:3]
        if type_ != 'insert':
            return None

        data = event["data"]
        event_type = data.get("type")
        if event_type != "transaction":
            return None
        extract_base(processed, event)
        processed["retention_days"] = enforce_retention(
            event,
            datetime.fromtimestamp(data['timestamp']),
        )
        if not data.get('contexts', {}).get('trace'):
            return None

        transaction_ctx = data["contexts"]["trace"]
        trace_id = transaction_ctx["trace_id"]
        try:
            processed["event_id"] = str(uuid.UUID(processed["event_id"]))
            processed["trace_id"] = str(uuid.UUID(trace_id))
            processed["span_id"] = int(transaction_ctx["span_id"], 16)
            processed["transaction_op"] = _unicodify(
                transaction_ctx.get("op", ""))
            processed["transaction_name"] = _unicodify(data["transaction"])
            processed[
                "start_ts"], processed["start_ms"] = self.__extract_timestamp(
                    data["start_timestamp"], )
            if data["timestamp"] - data["start_timestamp"] < 0:
                # Seems we have some negative durations in the DB
                metrics.increment('negative_duration')
        except Exception:
            # all these fields are required but we saw some events go through here
            # in the past.  For now bail.
            return
        processed["finish_ts"], processed[
            "finish_ms"] = self.__extract_timestamp(data["timestamp"], )

        duration_secs = (processed["finish_ts"] -
                         processed["start_ts"]).total_seconds()
        processed['duration'] = max(int(duration_secs * 1000), 0)

        processed['platform'] = _unicodify(event['platform'])

        tags = _as_dict_safe(data.get('tags', None))
        extract_extra_tags(processed, tags)

        promoted_tags = {
            col: tags[col]
            for col in self.PROMOTED_TAGS if col in tags
        }
        processed["release"] = promoted_tags.get(
            "sentry:release",
            event.get("release"),
        )
        processed["environment"] = promoted_tags.get("environment")

        contexts = _as_dict_safe(data.get('contexts', None))
        extract_extra_contexts(processed, contexts)

        processed["dist"] = _unicodify(
            promoted_tags.get("sentry:dist", data.get("dist")), )

        user_data = {}
        extract_user(user_data, data.get("user", {}))
        processed["user"] = promoted_tags.get("sentry:user", "")
        processed["user_name"] = user_data["username"]
        processed["user_id"] = user_data["user_id"]
        processed["user_email"] = user_data["email"]
        ip_address = _ensure_valid_ip(user_data["ip_address"])

        if ip_address:
            if ip_address.version == 4:
                processed["ip_address_v4"] = str(ip_address)
            elif ip_address.version == 6:
                processed["ip_address_v6"] = str(ip_address)

        if metadata is not None:
            processed['partition'] = metadata.partition
            processed['offset'] = metadata.offset

        return ProcessedMessage(
            action=action_type,
            data=[processed],
        )