def get_single_metric_info(projects: Sequence[Project], metric_name: str) -> MetricMetaWithTagKeys: assert projects metric_id = indexer.resolve(metric_name) if metric_id is None: raise InvalidParams for metric_type in ("counter", "set", "distribution"): # TODO: What if metric_id exists for multiple types / units? entity_key = METRIC_TYPE_TO_ENTITY[metric_type] data = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column("tags.key")], where=[Condition(Column("metric_id"), Op.EQ, metric_id)], groupby=[Column("metric_id"), Column("tags.key")], referrer="snuba.metrics.meta.get_single_metric", projects=projects, org_id=projects[0].organization_id, ) if data: tag_ids = {tag_id for row in data for tag_id in row["tags.key"]} return { "name": metric_name, "type": metric_type, "operations": AVAILABLE_OPERATIONS[entity_key.value], "tags": sorted( ({ "key": reverse_resolve(tag_id) } for tag_id in tag_ids), key=itemgetter("key"), ), "unit": None, } raise InvalidParams(f"Raw metric {metric_name} does not exit")
def get_metrics(projects: Sequence[Project]) -> Sequence[MetricMeta]: assert projects metrics_meta = [] metric_ids_in_entities = {} for metric_type in ("counter", "set", "distribution"): metric_ids_in_entities.setdefault(metric_type, set()) for row in _get_metrics_for_entity( entity_key=METRIC_TYPE_TO_ENTITY[metric_type], projects=projects, org_id=projects[0].organization_id, ): metrics_meta.append( MetricMeta( name=reverse_resolve(row["metric_id"]), type=metric_type, operations=AVAILABLE_OPERATIONS[METRIC_TYPE_TO_ENTITY[metric_type].value], unit=None, # snuba does not know the unit ) ) metric_ids_in_entities[metric_type].add(row["metric_id"]) # In the previous loop, we find all available metric ids per entity with respect to the # projects filter, and so to figure out which derived metrics are supported for these # projects, we need to iterate over the list of derived metrics and generate the ids of # their constituent metrics. A derived metric should be added to the response list if its # metric ids are a subset of the metric ids in one of the entities i.e. Its an instance of # SingularEntityDerivedMetric. # ToDo(ahmed): When CompositeEntityDerivedMetrics are introduced we need to do these checks # not on the instance of the CompositeEntityDerivedMetric but rather on its # SingularEntityDerivedMetric constituents found_derived_metrics = get_available_derived_metrics(metric_ids_in_entities) for derived_metric_name in found_derived_metrics: derived_metric_obj = DERIVED_METRICS[derived_metric_name] metrics_meta.append( MetricMeta( name=derived_metric_obj.metric_name, type=derived_metric_obj.result_type, operations=derived_metric_obj.generate_available_operations(), unit=derived_metric_obj.unit, ) ) return sorted(metrics_meta, key=itemgetter("name"))
def translate_sessions_tag_keys_and_values( data: List[Dict[str, Any]], org_id: int, alias: Optional[str] = None) -> Tuple[int, int]: value_col_name = alias if alias else "value" try: translated_data: Dict[str, Any] = {} session_status = resolve_tag_key("session.status") for row in data: tag_value = reverse_resolve(row[session_status]) translated_data[tag_value] = row[value_col_name] total_session_count = translated_data.get("init", 0) crash_count = translated_data.get("crashed", 0) except MetricIndexNotFound: metrics.incr( "incidents.entity_subscription.metric_index_not_found") total_session_count = crash_count = 0 return total_session_count, crash_count
def run_sessions_query( org_id: int, query: QueryDefinition, span_op: str, ) -> SessionsQueryResult: """Convert a QueryDefinition to multiple snuba queries and reformat the results""" # This is necessary so that we do not mutate the query object shared between different # backend runs query_clone = deepcopy(query) data, metric_to_output_field = _fetch_data(org_id, query_clone) data_points = _flatten_data(org_id, data) intervals = list(get_intervals(query_clone)) timestamp_index = { timestamp.isoformat(): index for index, timestamp in enumerate(intervals) } def default_for(field: SessionsQueryFunction) -> SessionsQueryValue: return 0 if field in ("sum(session)", "count_unique(user)") else None GroupKey = Tuple[Tuple[GroupByFieldName, Union[str, int]], ...] class Group(TypedDict): series: MutableMapping[SessionsQueryFunction, List[SessionsQueryValue]] totals: MutableMapping[SessionsQueryFunction, SessionsQueryValue] groups: MutableMapping[GroupKey, Group] = defaultdict( lambda: { "totals": {field: default_for(field) for field in query_clone.raw_fields}, "series": { field: len(intervals) * [default_for(field)] for field in query_clone.raw_fields }, }) if len(data_points) == 0: # We're only interested in `session.status` group-byes. The rest of the # conditions require work (e.g. getting all environments) that we can't # get without querying the DB, including group-byes consisting of # multiple parameters (even if `session.status` is one of them). if query_clone.raw_groupby == ["session.status"]: for status in get_args(_SessionStatus): gkey: GroupKey = (("session.status", status), ) groups[gkey] else: for key in data_points.keys(): try: output_field = metric_to_output_field[key.metric_key, key.column] except KeyError: continue # secondary metric, like session.error by: MutableMapping[GroupByFieldName, Union[str, int]] = {} if key.release is not None: # Every session has a release, so this should not throw by["release"] = reverse_resolve(key.release) if key.environment is not None: # To match behavior of the old sessions backend, session data # without environment is grouped under the empty string. by["environment"] = reverse_resolve_weak(key.environment) or "" if key.project_id is not None: by["project"] = key.project_id for status_value in output_field.get_values(data_points, key): if status_value.session_status is not None: by["session.status"] = status_value.session_status # ! group_key: GroupKey = tuple(sorted(by.items())) group: Group = groups[group_key] value = status_value.value if value is not None: value = finite_or_none(value) if key.bucketed_time is None: group["totals"][output_field.get_name()] = value else: index = timestamp_index[key.bucketed_time] group["series"][output_field.get_name()][index] = value groups_as_list: List[SessionsQueryGroup] = [{ "by": dict(by), "totals": group["totals"], "series": group["series"], } for by, group in groups.items()] def format_datetime(dt: datetime) -> str: return dt.isoformat().replace("+00:00", "Z") return { "start": format_datetime(query_clone.start), "end": format_datetime(query_clone.end), "query": query_clone.query, "intervals": [format_datetime(dt) for dt in intervals], "groups": groups_as_list, }
def _fetch_tags_or_values_per_ids( projects: Sequence[Project], metric_names: Optional[Sequence[str]], referrer: str, column: str, ) -> Tuple[Union[Sequence[Tag], Sequence[TagValue]], Optional[str]]: """ Function that takes as input projects, metric_names, and a column, and based on the column selection, either returns tags or tag values for the combination of projects and metric_names selected or in the case of no metric_names passed, returns basically all the tags or the tag values available for those projects. In addition, when exactly one metric name is passed in metric_names, then the type (i.e. mapping to the entity) is also returned """ try: metric_ids = _get_metrics_filter_ids(metric_names) except MetricDoesNotExistInIndexer: raise InvalidParams( f"Some or all of the metric names in {metric_names} do not exist in the indexer" ) else: where = [Condition(Column("metric_id"), Op.IN, list(metric_ids))] if metric_ids else [] tag_or_value_ids_per_metric_id = defaultdict(list) # This dictionary is required as a mapping from an entity to the ids available in it to # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single # entity by validating that the ids of the constituent metrics all lie in the same entity supported_metric_ids_in_entities = {} for metric_type in ("counter", "set", "distribution"): entity_key = METRIC_TYPE_TO_ENTITY[metric_type] rows = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column(column)], where=where, groupby=[Column("metric_id"), Column(column)], referrer=referrer, projects=projects, org_id=projects[0].organization_id, ) for row in rows: metric_id = row["metric_id"] if column.startswith("tags["): value_id = row[column] if value_id > 0: tag_or_value_ids_per_metric_id[metric_id].append(value_id) else: tag_or_value_ids_per_metric_id[metric_id].extend(row[column]) supported_metric_ids_in_entities.setdefault(metric_type, []).append(row["metric_id"]) # If we get not results back from snuba, then raise an InvalidParams with an appropriate # error message if not tag_or_value_ids_per_metric_id: if metric_names: error_str = f"The following metrics {metric_names} do not exist in the dataset" else: error_str = "Dataset contains no metric data for your project selection" raise InvalidParams(error_str) tag_or_value_id_lists = tag_or_value_ids_per_metric_id.values() if metric_names: # If there are metric_ids that map to the metric_names provided as an arg that were not # found in the dataset, then we raise an instance of InvalidParams exception if metric_ids != set(tag_or_value_ids_per_metric_id.keys()): # This can occur for metric names that don't have an equivalent in the dataset. raise InvalidParams( f"Not all the requested metrics or the constituent metrics in {metric_names} have " f"data in the dataset" ) # At this point, we are sure that every metric_name/metric_id that was requested is # present in the dataset, and now we need to check that for all derived metrics requested # (if any are requested) are setup correctly i.e. constituent of # SingularEntityDerivedMetric actually span a single entity _validate_requested_derived_metrics_in_input_metrics( metric_names=metric_names, supported_metric_ids_in_entities=supported_metric_ids_in_entities, ) # Only return tags/tag values that occur in all metrics tag_or_value_ids = set.intersection(*map(set, tag_or_value_id_lists)) else: tag_or_value_ids = {tag_id for ids in tag_or_value_id_lists for tag_id in ids} if column.startswith("tags["): tag_id = column.split("tags[")[1].split("]")[0] tags_or_values = [ {"key": reverse_resolve(int(tag_id)), "value": reverse_resolve(value_id)} for value_id in tag_or_value_ids ] tags_or_values.sort(key=lambda tag: (tag["key"], tag["value"])) else: tags_or_values = [{"key": reverse_resolve(tag_id)} for tag_id in tag_or_value_ids] tags_or_values.sort(key=itemgetter("key")) if metric_names and len(metric_names) == 1: metric_type = list(supported_metric_ids_in_entities.keys())[0] return tags_or_values, metric_type return tags_or_values, None
def get_tag_values( projects: Sequence[Project], tag_name: str, metric_names: Optional[Sequence[str]]) -> Sequence[TagValue]: """Get all known values for a specific tag""" assert projects tag_id = indexer.resolve(tag_name) if tag_id is None: raise InvalidParams try: metric_ids = _get_metrics_filter_ids(metric_names) except MetricDoesNotExistInIndexer: return [] else: where = [Condition(Column("metric_id"), Op.IN, list(metric_ids)) ] if metric_ids else [] tag_values = defaultdict(list) # This dictionary is required as a mapping from an entity to the ids available in it to # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single # entity by validating that the ids of the constituent metrics all lie in the same entity supported_metric_ids_in_entities = {} column_name = f"tags[{tag_id}]" for metric_type in ("counter", "set", "distribution"): supported_metric_ids_in_entities.setdefault(metric_type, []) entity_key = METRIC_TYPE_TO_ENTITY[metric_type] rows = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column(column_name)], where=where, groupby=[Column("metric_id"), Column(column_name)], referrer="snuba.metrics.meta.get_tag_values", projects=projects, org_id=projects[0].organization_id, ) for row in rows: value_id = row[column_name] supported_metric_ids_in_entities[metric_type].append( row["metric_id"]) if value_id > 0: metric_id = row["metric_id"] tag_values[metric_id].append(value_id) # If we are trying to find the tag values for only one metric name, then no need to query # other entities once we find data for that metric_name in one of the entities if metric_names and len(metric_names) == 1 and rows: break value_id_lists = tag_values.values() if metric_names is not None: if metric_ids != set(tag_values.keys()): return [] # At this point, we are sure that every metric_name/metric_id that was requested is # present in the dataset, and now we need to check that all derived metrics requested are # setup correctly _validate_requested_derived_metrics( metric_names=metric_names, supported_metric_ids_in_entities=supported_metric_ids_in_entities, ) # Only return tags that occur in all metrics value_ids = set.intersection(*[set(ids) for ids in value_id_lists]) else: value_ids = {value_id for ids in value_id_lists for value_id in ids} tags = [{ "key": tag_name, "value": reverse_resolve(value_id) } for value_id in value_ids] tags.sort(key=lambda tag: (tag["key"], tag["value"])) return tags
def get_tags(projects: Sequence[Project], metric_names: Optional[Sequence[str]]) -> Sequence[Tag]: """Get all metric tags for the given projects and metric_names""" assert projects try: metric_ids = _get_metrics_filter_ids(metric_names) except MetricDoesNotExistInIndexer: return [] else: where = [Condition(Column("metric_id"), Op.IN, list(metric_ids)) ] if metric_ids else [] tag_ids_per_metric_id = defaultdict(list) # This dictionary is required as a mapping from an entity to the ids available in it to # validate that constituent metrics of a SingleEntityDerivedMetric actually span a single # entity by validating that the ids of the constituent metrics all lie in the same entity supported_metric_ids_in_entities = {} for metric_type in ("counter", "set", "distribution"): supported_metric_ids_in_entities.setdefault(metric_type, []) entity_key = METRIC_TYPE_TO_ENTITY[metric_type] rows = run_metrics_query( entity_key=entity_key, select=[Column("metric_id"), Column("tags.key")], where=where, groupby=[Column("metric_id"), Column("tags.key")], referrer="snuba.metrics.meta.get_tags", projects=projects, org_id=projects[0].organization_id, ) for row in rows: tag_ids_per_metric_id[row["metric_id"]].extend(row["tags.key"]) supported_metric_ids_in_entities[metric_type].append( row["metric_id"]) # If we are trying to find the tags for only one metric name, then no need to query other # entities once we find data for that metric_name in one of the entity if metric_names and len(metric_names) == 1 and rows: break # If we get not results back from snuba, then just return an empty set if not tag_ids_per_metric_id: return [] tag_id_lists = tag_ids_per_metric_id.values() if metric_names: # If there are metric_ids that were not found in the dataset, then just return an [] if metric_ids != set(tag_ids_per_metric_id.keys()): # This can occur for metric names that don't have an equivalent in the dataset. return [] # At this point, we are sure that every metric_name/metric_id that was requested is # present in the dataset, and now we need to check that all derived metrics requested are # setup correctly _validate_requested_derived_metrics( metric_names=metric_names, supported_metric_ids_in_entities=supported_metric_ids_in_entities, ) # Only return tags that occur in all metrics tag_ids = set.intersection(*map(set, tag_id_lists)) else: tag_ids = {tag_id for ids in tag_id_lists for tag_id in ids} tags = [{"key": reverse_resolve(tag_id)} for tag_id in tag_ids] tags.sort(key=itemgetter("key")) return tags
def _parse_tag(self, tag_string: str) -> str: tag_key = int(tag_string.replace("tags[", "").replace("]", "")) return reverse_resolve(tag_key)