def translate_results(self): groups = {} for entity, subresults in self._results.items(): totals = subresults["totals"]["data"] for data in totals: self._extract_data(data, groups) if "series" in subresults: series = subresults["series"]["data"] for data in series: self._extract_data(data, groups) groups = [ dict( by=dict( (self._parse_tag(key), reverse_resolve_weak(value) ) if key not in ALLOWED_GROUPBY_COLUMNS else (key, value) for key, value in tags), **data, ) for tags, data in groups.items() ] # Applying post query operations for totals and series for group in groups: totals, series = group["totals"], group["series"] for op, metric_name in self._bottom_up_dependency_tree: metric_obj = metric_object_factory(op=op, metric_name=metric_name) # Totals totals[metric_name] = metric_obj.run_post_query_function( totals) # Series for idx in range(0, len(self._intervals)): series.setdefault( metric_name, [metric_obj.generate_default_null_values()] * len(self._intervals), ) series[metric_name][ idx] = metric_obj.run_post_query_function(series, idx) # Remove the extra fields added due to the constituent metrics that were added # from the generated dependency tree. These metrics that are to be removed were added to # be able to generate fields that require further processing post query, but are not # required nor expected in the response for group in groups: totals, series = group["totals"], group["series"] for key in copy.deepcopy(list(totals.keys())): matches = FIELD_REGEX.match(key) if matches: operation = matches[1] metric_name = matches[2] else: operation = None metric_name = key if (operation, metric_name) not in self._query_definition_fields_set: del totals[key], series[key] return groups
def _build_orderby(self, query_definition: QueryDefinition) -> Optional[List[OrderBy]]: if query_definition.orderby is None: return None (op, metric_name), direction = query_definition.orderby metric_field_obj = metric_object_factory(op, metric_name) return metric_field_obj.generate_orderby_clause( projects=self._projects, direction=direction )
def _extract_data(self, data, groups): tags = tuple( (key, data[key]) for key in sorted(data.keys()) if (key.startswith("tags[") or key in ALLOWED_GROUPBY_COLUMNS)) tag_data = groups.setdefault( tags, { "totals": {}, "series": {} }, ) bucketed_time = data.pop(TS_COL_GROUP, None) if bucketed_time is not None: bucketed_time = parse_snuba_datetime(bucketed_time) # We query the union of the query_definition fields, and the fields_in_entities from the # QueryBuilder necessary as it contains the constituent instances of # SingularEntityDerivedMetric for instances of CompositeEntityDerivedMetric for op, metric_name in self._set_of_constituent_queries: key = f"{op}({metric_name})" if op else metric_name default_null_value = metric_object_factory( op, metric_name).generate_default_null_values() try: value = data[key] except KeyError: # This could occur when we have derived metrics that are generated from post # query operations, and so don't have a direct mapping to the query results # or also from raw_metrics that don't exist in clickhouse yet cleaned_value = default_null_value else: if op in OPERATIONS_PERCENTILES: value = value[0] cleaned_value = finite_or_none(value) if bucketed_time is None: # Only update the value, when either key does not exist or its a default if key not in tag_data["totals"] or tag_data["totals"][ key] == default_null_value: tag_data["totals"][key] = cleaned_value if bucketed_time is not None or tag_data["totals"][ key] == default_null_value: empty_values = len(self._intervals) * [default_null_value] series = tag_data["series"].setdefault(key, empty_values) if bucketed_time is not None: series_index = self._timestamp_index[bucketed_time] if series[series_index] == default_null_value: series[series_index] = cleaned_value
def __update_query_dicts_with_component_entities(self, component_entities, metric_name_to_obj_dict): # At this point in time, we are only supporting raw metrics in the metrics attribute of # any instance of DerivedMetric, and so in this case the op will always be None # ToDo(ahmed): In future PR, we might want to allow for dependency metrics to also have an # an aggregate and in this case, we would need to parse the op here op = None for entity, metric_names in component_entities.items(): for metric_name in metric_names: metric_key = (op, metric_name) if metric_key not in metric_name_to_obj_dict: metric_name_to_obj_dict[ metric_key] = metric_object_factory(op, metric_name) self._fields_in_entities.setdefault(entity, []).append(metric_key) return metric_name_to_obj_dict
def _build_queries(self, query_definition): metric_name_to_obj_dict = {} queries_by_entity = OrderedDict() for op, metric_name in query_definition.fields.values(): metric_field_obj = metric_object_factory(op, metric_name) # `get_entity` is called the first, to fetch the entities of constituent metrics, # and validate especially in the case of SingularEntityDerivedMetric that it is # actually composed of metrics that belong to the same entity try: entity = metric_field_obj.get_entity(projects=self._projects) except MetricDoesNotExistException: # If we get here, it means that one or more of the constituent metrics for a # derived metric does not exist, and so no further attempts to query that derived # metric will be made, and the field value will be set to the default value in # the response continue if not entity: # ToDo(ahmed): When we get to an instance of a MetricFieldBase where entity is # None, we know it is from a composite entity derived metric, and we need to # traverse down the constituent metrics dependency tree until we get to instances # of SingleEntityDerivedMetric, and add those to our queries so that we are able # to generate the original CompositeEntityDerivedMetric later on as a result of # a post query operation on the results of the constituent # SingleEntityDerivedMetric instances continue if entity not in self._implemented_datasets: raise NotImplementedError(f"Dataset not yet implemented: {entity}") metric_name_to_obj_dict[(op, metric_name)] = metric_field_obj queries_by_entity.setdefault(entity, []).append((op, metric_name)) where = self._build_where(query_definition) groupby = self._build_groupby(query_definition) queries_dict = {} for entity, fields in queries_by_entity.items(): select = [] metric_ids_set = set() for op, name in fields: metric_field_obj = metric_name_to_obj_dict[(op, name)] select += metric_field_obj.generate_select_statements(projects=self._projects) metric_ids_set |= metric_field_obj.generate_metric_ids() where_for_entity = [ Condition( Column("metric_id"), Op.IN, list(metric_ids_set), ), ] orderby = self._build_orderby(query_definition) queries_dict[entity] = self._build_totals_and_series_queries( entity=entity, select=select, where=where + where_for_entity, groupby=groupby, orderby=orderby, limit=query_definition.limit, offset=query_definition.offset, rollup=query_definition.rollup, intervals_len=len(list(get_intervals(query_definition))), ) return queries_dict