def record_query(request: Request, timer: Timer, query_metadata: SnubaQueryMetadata) -> None: """ Records a request after it has been parsed and validated, whether we actually ran a query or not. """ if settings.RECORD_QUERIES: # Send to redis # We convert this to a dict before passing it to state in order to avoid a # circular dependency, where state would depend on the higher level # QueryMetadata class state.record_query(query_metadata.to_dict()) final = str(request.query.get_final()) referrer = request.referrer or "none" timer.send_metrics_to( metrics, tags={ "status": query_metadata.status.value, "referrer": referrer, "final": final, }, mark_tags={ "final": final, "referrer": referrer }, ) _add_tags(timer, request)
def execute_query_with_rate_limits( clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: # XXX: We should consider moving this that it applies to the logical query, # not the physical query. with RateLimitAggregator(request_settings.get_rate_limit_params() ) as rate_limit_stats_container: stats.update(rate_limit_stats_container.to_dict()) timer.mark("rate_limit") project_rate_limit_stats = rate_limit_stats_container.get_stats( PROJECT_RATE_LIMIT_NAME) if ("max_threads" in query_settings and project_rate_limit_stats is not None and project_rate_limit_stats.concurrent > 1): maxt = query_settings["max_threads"] query_settings["max_threads"] = max( 1, maxt - project_rate_limit_stats.concurrent + 1) return execute_query( clickhouse_query, request_settings, formatted_query, reader, timer, stats, query_settings, )
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, referrer: str, clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, robust: bool, concurrent_queries_gauge: Optional[Gauge] = None, ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ from_clause = clickhouse_query.get_from_clause() visitor = TablesCollector() visitor.visit(from_clause) table_names = ",".join(sorted(visitor.get_tables())) with sentry_sdk.start_span(description="create_query", op="db") as span: _apply_turbo_sampling_if_needed(clickhouse_query, request_settings) formatted_query = format_query(clickhouse_query) span.set_data("query", formatted_query.structured()) span.set_data("query_size_bytes", _string_size_in_bytes(formatted_query.get_sql())) sentry_sdk.set_tag("query_size_group", get_query_size_group(formatted_query.get_sql())) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": table_names, "final": visitor.any_final(), "referrer": referrer, "sample": visitor.get_sample_rate(), } with sentry_sdk.start_span(description=formatted_query.get_sql(), op="db") as span: span.set_tag("table", table_names) def execute() -> QueryResult: return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, robust=robust, ) if concurrent_queries_gauge is not None: with concurrent_queries_gauge: return execute() else: return execute()
def execute_query_with_rate_limits( clickhouse_query: Union[Query, CompositeQuery[Table]], query_settings: QuerySettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, stats: MutableMapping[str, Any], clickhouse_query_settings: MutableMapping[str, Any], robust: bool, ) -> Result: # Global rate limiter is added at the end of the chain to be # the last for evaluation. # This allows us not to borrow capacity from the global quota # during the evaluation if one of the more specific limiters # (like the project rate limiter) rejects the query first. query_settings.add_rate_limit(get_global_rate_limit_params()) # XXX: We should consider moving this that it applies to the logical query, # not the physical query. with RateLimitAggregator( query_settings.get_rate_limit_params() ) as rate_limit_stats_container: stats.update(rate_limit_stats_container.to_dict()) timer.mark("rate_limit") project_rate_limit_stats = rate_limit_stats_container.get_stats( PROJECT_RATE_LIMIT_NAME ) thread_quota = query_settings.get_resource_quota() if ( ("max_threads" in clickhouse_query_settings or thread_quota is not None) and project_rate_limit_stats is not None and project_rate_limit_stats.concurrent > 1 ): maxt = ( clickhouse_query_settings["max_threads"] if thread_quota is None else thread_quota.max_threads ) clickhouse_query_settings["max_threads"] = max( 1, maxt - project_rate_limit_stats.concurrent + 1 ) _record_rate_limit_metrics(rate_limit_stats_container, reader, stats) return execute_query( clickhouse_query, query_settings, formatted_query, reader, timer, stats, clickhouse_query_settings, robust=robust, )
def _record_failure_building_request( status: QueryStatus, timer: Timer, referrer: Optional[str] ) -> None: # TODO: Revisit if recording some data for these queries in the querylog # table would be useful. if settings.RECORD_QUERIES: timer.send_metrics_to( metrics, tags={"status": status.value, "referrer": referrer or "none"}, ) _add_tags(timer)
def execute_query_with_caching( clickhouse_query: Union[Query, CompositeQuery[Table]], query_settings: QuerySettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, stats: MutableMapping[str, Any], clickhouse_query_settings: MutableMapping[str, Any], robust: bool, ) -> Result: # XXX: ``uncompressed_cache_max_cols`` is used to control both the result # cache, as well as the uncompressed cache. These should be independent. use_cache, uc_max = state.get_configs( [("use_cache", settings.USE_RESULT_CACHE), ("uncompressed_cache_max_cols", 5)] ) column_counter = ReferencedColumnsCounter() column_counter.visit(clickhouse_query.get_from_clause()) assert isinstance(uc_max, int) if column_counter.count_columns() > uc_max: use_cache = False execute = partial( execute_query_with_rate_limits, clickhouse_query, query_settings, formatted_query, reader, timer, stats, clickhouse_query_settings, robust=robust, ) with sentry_sdk.start_span(description="execute", op="db") as span: key = get_query_cache_key(formatted_query) clickhouse_query_settings["query_id"] = key if use_cache: cache_partition = _get_cache_partition(reader) result = cache_partition.get(key) timer.mark("cache_get") stats["cache_hit"] = result is not None if result is not None: span.set_tag("cache", "hit") return result span.set_tag("cache", "miss") result = execute() cache_partition.set(key, result) timer.mark("cache_set") return result else: return execute()
def validate_request_content(body, schema: RequestSchema, timer: Timer, dataset: Dataset, referrer: str) -> Request: with sentry_sdk.start_span(description="validate_request_content", op="validate") as span: try: request = schema.validate(body, dataset, referrer) span.set_data("snuba_query", request.body) except jsonschema.ValidationError as error: raise BadRequest(str(error)) from error timer.mark("validate_schema") return request
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response: assert http_request.method == "POST" with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_extensions(), HTTPRequestSettings ) request = build_request(body, schema, timer, dataset, http_request.referrer) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def execute_query_with_caching( clickhouse_query: Query, request_settings: RequestSettings, formatted_query: SqlQuery, reader: Reader[SqlQuery], timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: # XXX: ``uncompressed_cache_max_cols`` is used to control both the result # cache, as well as the uncompressed cache. These should be independent. use_cache, uc_max = state.get_configs([("use_cache", settings.USE_RESULT_CACHE), ("uncompressed_cache_max_cols", 5)]) if (len( set(( # Skip aliases when counting columns (c.table_name, c.column_name) for c in clickhouse_query.get_all_ast_referenced_columns()))) > uc_max): use_cache = False execute = partial( execute_query_with_rate_limits, clickhouse_query, request_settings, formatted_query, reader, timer, stats, query_settings, ) with sentry_sdk.start_span(description="execute", op="db") as span: if use_cache: key = get_query_cache_key(formatted_query) result = cache.get(key) timer.mark("cache_get") stats["cache_hit"] = result is not None if result is not None: span.set_tag("cache", "hit") return result span.set_tag("cache", "miss") result = execute() cache.set(key, result) timer.mark("cache_set") return result else: return execute()
def build_request( body: MutableMapping[str, Any], parser: Parser, settings_class: Union[Type[HTTPRequestSettings], Type[SubscriptionRequestSettings]], schema: RequestSchema, dataset: Dataset, timer: Timer, referrer: str, ) -> Request: with sentry_sdk.start_span(description="build_request", op="validate") as span: try: request_parts = schema.validate(body) if settings_class == HTTPRequestSettings: settings = { **request_parts.settings, "consistent": _consistent_override( request_parts.settings.get("consistent", False), referrer ), } settings_obj: Union[ HTTPRequestSettings, SubscriptionRequestSettings ] = settings_class(**settings) elif settings_class == SubscriptionRequestSettings: settings_obj = settings_class( consistent=_consistent_override(True, referrer) ) query = parser(request_parts, settings_obj, dataset) request_id = uuid.uuid4().hex request = Request( request_id, # TODO: Replace this with the actual query raw body. # this can have an impact on subscriptions so we need # to be careful with the change. ChainMap(request_parts.query, *request_parts.extensions.values()), query, settings_obj, referrer, ) except (InvalidJsonRequestException, InvalidQueryException) as exception: record_invalid_request(timer, referrer) raise exception except Exception as exception: record_error_building_request(timer, referrer) raise exception span.set_data("snuba_query", request.body) timer.mark("validate_schema") return request
def test_timer_send_metrics() -> None: backend = TestingMetricsBackend() time = TestingClock() set_tags = {"foo": "bar", "blue": "car"} t = Timer("timer", clock=time, tags=set_tags) time.sleep(10) t.mark("thing1") time.sleep(10) t.mark("thing2") t.send_metrics_to( backend, tags={"key": "value"}, mark_tags={"mark-key": "mark-value", "blue": "dog"}, ) overridden_tags = {"foo": "bar", "blue": "dog"} assert backend.calls == [ Timing("timer", (10.0 + 10.0) * 1000, {"key": "value", **set_tags}), Timing( "timer.thing1", 10.0 * 1000, {"mark-key": "mark-value", **overridden_tags} ), Timing( "timer.thing2", 10.0 * 1000, {"mark-key": "mark-value", **overridden_tags} ), ]
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, from_date: datetime, to_date: datetime, referrer: str, clickhouse_query: Query, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ # TODO: This function (well, it will be a wrapper of this function) # where we will transform the result according to the SelectedExpression # object in the query to ensure the fields in the QueryResult have # the same name the user expects. source = clickhouse_query.get_data_source().format_from() with sentry_sdk.start_span(description="create_query", op="db") as span: formatted_query = AstSqlQuery(clickhouse_query, request_settings) span.set_data("query", formatted_query.sql_data()) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": source, "final": clickhouse_query.get_final(), "referrer": referrer, "num_days": (to_date - from_date).days, "sample": clickhouse_query.get_sample(), } with sentry_sdk.start_span( description=formatted_query.format_sql(), op="db" ) as span: span.set_tag("table", source) return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, )
def execute_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, formatted_query: FormattedQuery, reader: Reader, timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], robust: bool, ) -> Result: """ Execute a query and return a result. """ # Experiment, if we are going to grab more than X columns worth of data, # don't use uncompressed_cache in ClickHouse. uc_max = state.get_config("uncompressed_cache_max_cols", 5) assert isinstance(uc_max, int) column_counter = ReferencedColumnsCounter() column_counter.visit(clickhouse_query.get_from_clause()) if column_counter.count_columns() > uc_max: query_settings["use_uncompressed_cache"] = 0 # Force query to use the first shard replica, which # should have synchronously received any cluster writes # before this query is run. consistent = request_settings.get_consistent() stats["consistent"] = consistent if consistent: query_settings["load_balancing"] = "in_order" query_settings["max_threads"] = 1 result = reader.execute( formatted_query, query_settings, with_totals=clickhouse_query.has_totals(), robust=robust, ) timer.mark("execute") stats.update({ "result_rows": len(result["data"]), "result_cols": len(result["meta"]) }) return result
def execute_query( # TODO: Passing the whole clickhouse query here is needed as long # as the execute method depends on it. Otherwise we can make this # file rely either entirely on clickhouse query or entirely on # the formatter. clickhouse_query: Query, request_settings: RequestSettings, formatted_query: SqlQuery, reader: Reader[SqlQuery], timer: Timer, stats: MutableMapping[str, Any], query_settings: MutableMapping[str, Any], ) -> Result: """ Execute a query and return a result. """ # Experiment, if we are going to grab more than X columns worth of data, # don't use uncompressed_cache in ClickHouse. uc_max = state.get_config("uncompressed_cache_max_cols", 5) if (len( set(( # Skip aliases when counting columns (c.table_name, c.column_name) for c in clickhouse_query.get_all_ast_referenced_columns()))) > uc_max): query_settings["use_uncompressed_cache"] = 0 # Force query to use the first shard replica, which # should have synchronously received any cluster writes # before this query is run. consistent = request_settings.get_consistent() stats["consistent"] = consistent if consistent: query_settings["load_balancing"] = "in_order" query_settings["max_threads"] = 1 result = reader.execute( formatted_query, query_settings, with_totals=clickhouse_query.has_totals(), ) timer.mark("execute") stats.update({ "result_rows": len(result["data"]), "result_cols": len(result["meta"]) }) return result
def test(self) -> None: creator = SubscriptionCreator(self.dataset, EntityKey.EVENTS) subscription = SubscriptionData( project_id=1, query="MATCH (events) SELECT count() AS count", time_window_sec=10 * 60, resolution_sec=60, entity_subscription=create_entity_subscription(), ) identifier = creator.create(subscription, Timer("test")) assert (cast( List[Tuple[UUID, SubscriptionData]], RedisSubscriptionDataStore( redis_client, self.entity_key, identifier.partition, ).all(), )[0][1] == subscription) SubscriptionDeleter(self.entity_key, identifier.partition).delete(identifier.uuid) assert (RedisSubscriptionDataStore( redis_client, self.entity_key, identifier.partition, ).all() == [])
class TestMetricsCountersSubscriptionCreator: timer = Timer("test") def setup_method(self) -> None: self.dataset = get_dataset("metrics") @pytest.mark.parametrize("subscription, entity_key", TESTS_CREATE_METRICS) def test(self, subscription: SubscriptionData, entity_key: EntityKey) -> None: creator = SubscriptionCreator(self.dataset, entity_key) identifier = creator.create(subscription, self.timer) assert (cast( List[Tuple[UUID, SubscriptionData]], RedisSubscriptionDataStore( redis_client, entity_key, identifier.partition, ).all(), )[0][1] == subscription) @pytest.mark.parametrize("subscription", TESTS_INVALID_METRICS) def test_missing_conditions_for_groupby_clause( self, subscription: SubscriptionData) -> None: creator = SubscriptionCreator(self.dataset, EntityKey.METRICS_COUNTERS) with raises(InvalidQueryException): creator.create( subscription, self.timer, )
def test(self) -> None: creator = SubscriptionCreator(self.dataset) subscription = LegacySubscriptionData( project_id=1, conditions=[], aggregations=[["count()", "", "count"]], time_window=timedelta(minutes=10), resolution=timedelta(minutes=1), ) identifier = creator.create(subscription, Timer("test")) assert ( cast( List[Tuple[UUID, SubscriptionData]], RedisSubscriptionDataStore( redis_client, self.dataset, identifier.partition, ).all(), )[0][1] == subscription ) SubscriptionDeleter(self.dataset, identifier.partition).delete(identifier.uuid) assert ( RedisSubscriptionDataStore( redis_client, self.dataset, identifier.partition, ).all() == [] )
def __execute_query(self, task: ScheduledSubscriptionTask, tick_upper_offset: int) -> Tuple[Request, Result]: # Measure the amount of time that took between the task's scheduled # time and it beginning to execute. self.__metrics.timing("executor.latency", (time.time() - task.timestamp.timestamp()) * 1000) timer = Timer("query") with self.__concurrent_gauge: request = task.task.subscription.data.build_request( self.__dataset, task.timestamp, tick_upper_offset, timer, self.__metrics, "subscriptions_executor", ) result = parse_and_run_query( self.__dataset, request, timer, robust=True, concurrent_queries_gauge=self.__concurrent_clickhouse_gauge, ).result return (request, result)
def compare_conditions( self, subscription: SubscriptionData, exception: Optional[Type[Exception]], aggregate: str, value: Union[int, float], ) -> None: timer = Timer("test") if exception is not None: with pytest.raises(exception): request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) parse_and_run_query(self.dataset, request, timer) return request = subscription.build_request( self.dataset, datetime.utcnow(), 100, timer, ) result = parse_and_run_query(self.dataset, request, timer) assert result.result["data"][0][aggregate] == value
def __execute(self, task: ScheduledTask[Subscription], tick: Tick) -> Tuple[Request, Result]: # Measure the amount of time that took between this task being # scheduled and it beginning to execute. self.__metrics.timing("executor.latency", (time.time() - task.timestamp.timestamp()) * 1000) # XXX: The ``query`` name is taken from the web views so that all query # performance metrics are reported to the same spot, regardless of # execution environment. timer = Timer("query") request = task.task.data.build_request( self.__dataset, task.timestamp, tick.offsets.upper, timer, ) with self.__concurrent_gauge: # XXX: The ``extra`` is discarded from ``QueryResult`` since it is # not particularly useful in this context and duplicates data that # is already being published to the query log. # XXX: The ``request`` instance is copied when passed to # ``parse_and_run_query`` since it can/will be mutated during # processing. return ( request, parse_and_run_query(self.__dataset, copy.deepcopy(request), timer).result, )
def run_query(dataset: Dataset, request: Request, timer: Timer) -> WebQueryResult: try: result = parse_and_run_query(dataset, request, timer) payload = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return WebQueryResult(payload, 200) except RawQueryException as e: return WebQueryResult( { "error": {"type": e.err_type, "message": e.message, **e.meta}, "sql": e.sql, "stats": e.stats, "timing": timer.for_json(), }, 429 if e.err_type == "rate-limited" else 500, )
def _format_storage_query_and_run( # TODO: remove dependency on Dataset. This is only for formatting the legacy ClickhouseQuery # with the AST this won't be needed. dataset: Dataset, timer: Timer, query_metadata: SnubaQueryMetadata, from_date: datetime, to_date: datetime, request: Request, ) -> RawQueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. TODO: When we will have the AST in production and we will have the StorageQuery abstraction, this function is probably going to collapse and disappear. """ source = request.query.get_data_source().format_from() with sentry_sdk.start_span(description="create_query", op="db"): # TODO: Move the performance logic and the pre_where generation into # ClickhouseQuery since they are Clickhouse specific query = DictClickhouseQuery(dataset, request.query, request.settings) timer.mark("prepare_query") stats = { "clickhouse_table": source, "final": request.query.get_final(), "referrer": request.referrer, "num_days": (to_date - from_date).days, "sample": request.query.get_sample(), } with sentry_sdk.start_span(description=query.format_sql(), op="db") as span: span.set_tag("table", source) try: span.set_tag( "ast_query", AstClickhouseQuery(request.query, request.settings).format_sql(), ) except Exception: logger.warning("Failed to format ast query", exc_info=True) return raw_query(request, query, timer, query_metadata, stats, span.trace_id)
def build_request(body, schema: RequestSchema, timer: Timer, dataset: Dataset, referrer: str) -> Request: with sentry_sdk.start_span(description="build_request", op="validate") as span: try: request = schema.validate(body, dataset, referrer) except (InvalidJsonRequestException, InvalidQueryException) as exception: record_invalid_request(timer, referrer) raise exception except Exception as exception: record_error_building_request(timer, referrer) raise exception span.set_data("snuba_query", request.body) timer.mark("validate_schema") return request
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, referrer: str, clickhouse_query: Union[Query, CompositeQuery[Table]], request_settings: RequestSettings, reader: Reader, ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ from_clause = clickhouse_query.get_from_clause() visitor = TablesCollector() visitor.visit(from_clause) table_names = ",".join(sorted(visitor.get_tables())) with sentry_sdk.start_span(description="create_query", op="db") as span: formatted_query = format_query(clickhouse_query, request_settings) span.set_data("query", formatted_query.structured()) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": table_names, "final": visitor.any_final(), "referrer": referrer, "sample": visitor.get_sample_rate(), } with sentry_sdk.start_span(description=formatted_query.get_sql(), op="db") as span: span.set_tag("table", table_names) return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, )
def _format_storage_query_and_run( timer: Timer, query_metadata: SnubaQueryMetadata, from_date: datetime, to_date: datetime, referrer: str, clickhouse_query: Query, request_settings: RequestSettings, reader: Reader[SqlQuery], ) -> QueryResult: """ Formats the Storage Query and pass it to the DB specific code for execution. """ source = clickhouse_query.get_from_clause().format_from() with sentry_sdk.start_span(description="create_query", op="db") as span: formatted_query = AstSqlQuery(clickhouse_query, request_settings) span.set_data("query", formatted_query.sql_data()) metrics.increment("execute") timer.mark("prepare_query") stats = { "clickhouse_table": source, "final": clickhouse_query.get_final(), "referrer": referrer, "num_days": (to_date - from_date).days, "sample": clickhouse_query.get_sample(), } with sentry_sdk.start_span(description=formatted_query.format_sql(), op="db") as span: span.set_tag("table", source) return raw_query( clickhouse_query, request_settings, formatted_query, reader, timer, query_metadata, stats, span.trace_id, )
def __execute(self, task: ScheduledTask[Subscription], tick: Tick) -> Tuple[Request, Result]: # Measure the amount of time that took between this task being # scheduled and it beginning to execute. self.__metrics.timing("executor.latency", (time.time() - task.timestamp.timestamp()) * 1000) # XXX: The ``query`` name is taken from the web views so that all query # performance metrics are reported to the same spot, regardless of # execution environment. timer = Timer("query") data_type = "legacy" if isinstance(task.task.data, DelegateSubscriptionData): data_type = "delegate" elif isinstance(task.task.data, SnQLSubscriptionData): data_type = "snql" self.__metrics.increment("incoming.task", tags={"type": data_type}) if isinstance(task.task.data, DelegateSubscriptionData): try: request = task.task.data.build_request( self.__dataset, task.timestamp, tick.offsets.upper, timer, self.__metrics, ) return self.__execute_query(request, timer, task) except Exception as e: self.__metrics.increment( "snql.subscription.delegate.error.execution") logger.warning( f"failed snql subscription query: {e}", exc_info=e, extra={ "error": str(e), "data": task.task.data.to_dict() }, ) request = task.task.data.to_legacy().build_request( self.__dataset, task.timestamp, tick.offsets.upper, timer, self.__metrics, ) return self.__execute_query(request, timer, task) request = task.task.data.build_request(self.__dataset, task.timestamp, tick.offsets.upper, timer, self.__metrics) return self.__execute_query(request, timer, task)
def record_query(request: Request, timer: Timer, query_metadata: SnubaQueryMetadata) -> None: if settings.RECORD_QUERIES: # Send to redis # We convert this to a dict before passing it to state in order to avoid a # circular dependency, where state would depend on the higher level # QueryMetadata class state.record_query(query_metadata.to_dict()) final = str(request.query.get_final()) referrer = request.referrer or "none" timer.send_metrics_to( metrics, tags={ "status": query_metadata.status, "referrer": referrer, "final": final, }, mark_tags={"final": final}, )
def _add_tags(timer: Timer, request: Optional[Request] = None) -> None: if Hub.current.scope.span: duration_group = timer.get_duration_group() sentry_sdk.set_tag("duration_group", duration_group) if duration_group == ">30s": sentry_sdk.set_tag("timeout", "too_long") if request is not None: experiments: MutableMapping[str, Any] = request.query.get_experiments() for name, value in experiments.items(): sentry_sdk.set_tag(f"exp-{name}", str(value))
def test_subscription_task_result_encoder() -> None: codec = SubscriptionTaskResultEncoder() timestamp = datetime.now() entity_subscription = EventsSubscription(data_dict={}) subscription_data = SubscriptionData( project_id=1, query="MATCH (events) SELECT count() AS count", time_window_sec=60, resolution_sec=60, entity_subscription=entity_subscription, ) # XXX: This seems way too coupled to the dataset. request = subscription_data.build_request(get_dataset("events"), timestamp, None, Timer("timer")) result: Result = { "meta": [{ "type": "UInt64", "name": "count" }], "data": [{ "count": 1 }], } task_result = SubscriptionTaskResult( ScheduledSubscriptionTask( timestamp, SubscriptionWithMetadata( EntityKey.EVENTS, Subscription( SubscriptionIdentifier(PartitionId(1), uuid.uuid1()), subscription_data, ), 5, ), ), (request, result), ) message = codec.encode(task_result) data = json.loads(message.value.decode("utf-8")) assert data["version"] == 3 payload = data["payload"] assert payload["subscription_id"] == str( task_result.task.task.subscription.identifier) assert payload["request"] == request.original_body assert payload["result"] == result assert payload["timestamp"] == task_result.task.timestamp.isoformat() assert payload["entity"] == EntityKey.EVENTS.value
def _record_timer_metrics( request: Request, timer: Timer, query_metadata: SnubaQueryMetadata, ) -> None: final = str(request.query.get_final()) referrer = request.referrer or "none" timer.send_metrics_to( metrics, tags={ "status": query_metadata.status.value, "referrer": referrer, "final": final, "dataset": query_metadata.dataset, }, mark_tags={ "final": final, "referrer": referrer, "dataset": query_metadata.dataset, }, )