def __fill_status(self, span: MutableMapping[str, Any], status: Optional[str]) -> None: if status: int_status = SPAN_STATUS_NAME_TO_CODE.get(status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS span["status"] = int_status
def _process_base_event_values( self, processed: MutableMapping[str, Any], event_dict: EventDict) -> MutableMapping[str, Any]: extract_base(processed, event_dict) transaction_ctx = event_dict["data"]["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( event_dict["data"].get("transaction") or "") processed["start_ts"], processed[ "start_ms"] = self.__extract_timestamp( event_dict["data"]["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get(status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if event_dict["data"]["timestamp"] - event_dict["data"][ "start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp( event_dict["data"]["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event_dict["platform"]) return processed
def convert_search_filter_to_snuba_query(search_filter): name = search_filter.key.name value = search_filter.value.value if name in no_conversion: return elif name == "id" and search_filter.value.is_wildcard(): raise InvalidSearchQuery("Wildcard conditions are not permitted on `id` field.") elif name == "environment": # conditions added to env_conditions are OR'd env_conditions = [] values = set(value if isinstance(value, (list, tuple)) else [value]) # the "no environment" environment is null in snuba if "" in values: values.remove("") operator = "IS NULL" if search_filter.operator == "=" else "IS NOT NULL" env_conditions.append(["environment", operator, None]) if len(values) == 1: operator = "=" if search_filter.operator == "=" else "!=" env_conditions.append(["environment", operator, values.pop()]) elif values: operator = "IN" if search_filter.operator == "=" else "NOT IN" env_conditions.append(["environment", operator, values]) return env_conditions elif name == "message": if search_filter.value.is_wildcard(): # XXX: We don't want the '^$' values at the beginning and end of # the regex since we want to find the pattern anywhere in the # message. Strip off here value = search_filter.value.value[1:-1] return [["match", ["message", u"'(?i){}'".format(value)]], search_filter.operator, 1] else: # https://clickhouse.yandex/docs/en/query_language/functions/string_search_functions/#position-haystack-needle # positionCaseInsensitive returns 0 if not found and an index of 1 or more if found # so we should flip the operator here operator = "=" if search_filter.operator == "!=" else "!=" # make message search case insensitive return [["positionCaseInsensitive", ["message", u"'{}'".format(value)]], operator, 0] elif ( name.startswith("stack.") or name.startswith("error.") ) and search_filter.value.is_wildcard(): # Escape and convert meta characters for LIKE expressions. raw_value = search_filter.value.raw_value like_value = raw_value.replace("%", "\\%").replace("_", "\\_").replace("*", "%") operator = "LIKE" if search_filter.operator == "=" else "NOT LIKE" return [name, operator, like_value] elif name == "transaction.status": internal_value = SPAN_STATUS_NAME_TO_CODE.get(search_filter.value.raw_value) if internal_value is None: raise InvalidSearchQuery( u"Invalid value for transaction.status condition. Accepted values are {}".format( ", ".join(SPAN_STATUS_NAME_TO_CODE.keys()) ) ) return [name, search_filter.operator, internal_value] else: value = ( int(to_timestamp(value)) * 1000 if isinstance(value, datetime) and name != "timestamp" else value ) # Tags are never null, but promoted tags are columns and so can be null. # To handle both cases, use `ifNull` to convert to an empty string and # compare so we need to check for empty values. if search_filter.key.is_tag: name = ["ifNull", [name, "''"]] # Handle checks for existence if search_filter.operator in ("=", "!=") and search_filter.value.value == "": if search_filter.key.is_tag: return [name, search_filter.operator, value] else: # If not a tag, we can just check that the column is null. return [["isNull", [name]], search_filter.operator, 1] is_null_condition = None if search_filter.operator == "!=" and not search_filter.key.is_tag: # Handle null columns on inequality comparisons. Any comparison # between a value and a null will result to null, so we need to # explicitly check for whether the condition is null, and OR it # together with the inequality check. # We don't need to apply this for tags, since if they don't exist # they'll always be an empty string. is_null_condition = [["isNull", [name]], "=", 1] if search_filter.value.is_wildcard(): condition = [["match", [name, u"'(?i){}'".format(value)]], search_filter.operator, 1] else: condition = [name, search_filter.operator, value] # We only want to return as a list if we have the check for null # present. Returning as a list causes these conditions to be ORed # together. Otherwise just return the raw condition, so that it can be # used correctly in aggregates. if is_null_condition: return [is_null_condition, condition] else: return condition
def process_message(self, message, metadata) -> Optional[ProcessedMessage]: processed = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) processed["retention_days"] = enforce_retention( event, datetime.fromtimestamp(data["timestamp"]), ) if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] try: processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed[ "start_ts"], processed["start_ms"] = self.__extract_timestamp( data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get( status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") except Exception: # all these fields are required but we saw some events go through here # in the past. For now bail. return processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) processed["_tags_flattened"] = flatten_nested_field( processed["tags.key"], processed["tags.value"]) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts = _as_dict_safe(data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested(measurements, lambda value: float(value["value"])) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["_contexts_flattened"] = flatten_nested_field( processed["contexts.key"], processed["contexts.value"]) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed])
def process_message( self, message: Tuple[int, str, Any], metadata: KafkaMessageMetadata) -> Optional[ProcessedMessage]: processed: MutableMapping[str, Any] = {"deleted": 0} if not (isinstance(message, (list, tuple)) and len(message) >= 2): return None version = message[0] if version not in (0, 1, 2): return None type_, event = message[1:3] if type_ != "insert": return None data = event["data"] event_type = data.get("type") if event_type != "transaction": return None extract_base(processed, event) try: # We are purposely using a naive datetime here to work with the # rest of the codebase. We can be confident that clients are only # sending UTC dates. processed["retention_days"] = enforce_retention( event, datetime.utcfromtimestamp(data["timestamp"]), ) except EventTooOld: return None if not data.get("contexts", {}).get("trace"): return None transaction_ctx = data["contexts"]["trace"] trace_id = transaction_ctx["trace_id"] processed["event_id"] = str(uuid.UUID(processed["event_id"])) processed["trace_id"] = str(uuid.UUID(trace_id)) processed["span_id"] = int(transaction_ctx["span_id"], 16) processed["transaction_op"] = _unicodify( transaction_ctx.get("op") or "") processed["transaction_name"] = _unicodify( data.get("transaction") or "") processed["start_ts"], processed[ "start_ms"] = self.__extract_timestamp(data["start_timestamp"], ) status = transaction_ctx.get("status", None) if status: int_status = SPAN_STATUS_NAME_TO_CODE.get(status, UNKNOWN_SPAN_STATUS) else: int_status = UNKNOWN_SPAN_STATUS processed["transaction_status"] = int_status if data["timestamp"] - data["start_timestamp"] < 0: # Seems we have some negative durations in the DB metrics.increment("negative_duration") processed["finish_ts"], processed[ "finish_ms"] = self.__extract_timestamp(data["timestamp"], ) duration_secs = (processed["finish_ts"] - processed["start_ts"]).total_seconds() processed["duration"] = max(int(duration_secs * 1000), 0) processed["platform"] = _unicodify(event["platform"]) tags: Mapping[str, Any] = _as_dict_safe(data.get("tags", None)) processed["tags.key"], processed["tags.value"] = extract_extra_tags( tags) promoted_tags = { col: tags[col] for col in self.PROMOTED_TAGS if col in tags } processed["release"] = promoted_tags.get( "sentry:release", event.get("release"), ) processed["environment"] = promoted_tags.get("environment") contexts: MutableMapping[str, Any] = _as_dict_safe( data.get("contexts", None)) user_dict = data.get("user", data.get("sentry.interfaces.User", None)) or {} geo = user_dict.get("geo", None) or {} if "geo" not in contexts and isinstance(geo, dict): contexts["geo"] = geo measurements = data.get("measurements") if measurements is not None: try: ( processed["measurements.key"], processed["measurements.value"], ) = extract_nested( measurements, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid measurements field.", extra={"measurements": measurements}, exc_info=True, ) breakdowns = data.get("breakdowns") if breakdowns is not None: span_op_breakdowns = breakdowns.get("span_ops") if span_op_breakdowns is not None: try: ( processed["span_op_breakdowns.key"], processed["span_op_breakdowns.value"], ) = extract_nested( span_op_breakdowns, lambda value: float(value["value"]) if (value is not None and isinstance( value.get("value"), numbers.Number)) else None, ) except Exception: # Not failing the event in this case just yet, because we are still # developing this feature. logger.error( "Invalid breakdowns.span_ops field.", extra={"span_op_breakdowns": span_op_breakdowns}, exc_info=True, ) request = data.get("request", data.get("sentry.interfaces.Http", None)) or {} http_data: MutableMapping[str, Any] = {} extract_http(http_data, request) processed["http_method"] = http_data["http_method"] processed["http_referer"] = http_data["http_referer"] skipped_contexts = settings.TRANSACT_SKIP_CONTEXT_STORE.get( processed["project_id"], set()) for context in skipped_contexts: if context in contexts: del contexts[context] processed["contexts.key"], processed[ "contexts.value"] = extract_extra_contexts(contexts) processed["dist"] = _unicodify( promoted_tags.get("sentry:dist", data.get("dist")), ) user_data: MutableMapping[str, Any] = {} extract_user(user_data, user_dict) processed["user"] = promoted_tags.get("sentry:user", "") processed["user_name"] = user_data["username"] processed["user_id"] = user_data["user_id"] processed["user_email"] = user_data["email"] ip_address = _ensure_valid_ip(user_data["ip_address"]) if ip_address: if ip_address.version == 4: processed["ip_address_v4"] = str(ip_address) elif ip_address.version == 6: processed["ip_address_v6"] = str(ip_address) processed["partition"] = metadata.partition processed["offset"] = metadata.offset sdk = data.get("sdk", None) or {} processed["sdk_name"] = _unicodify(sdk.get("name") or "") processed["sdk_version"] = _unicodify(sdk.get("version") or "") if processed["sdk_name"] == "": metrics.increment("missing_sdk_name") if processed["sdk_version"] == "": metrics.increment("missing_sdk_version") return InsertBatch([processed], None)