def build_request(self, dataset: Dataset, timestamp: datetime, offset: Optional[int], timer: Timer) -> Request: """ Returns a Request that can be used to run a query via `parse_and_run_query`. :param dataset: The Dataset to build the request for :param timestamp: Date that the query should run up until :param offset: Maximum offset we should query for """ schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), SubscriptionRequestSettings, Language.LEGACY, ) extra_conditions: Sequence[Condition] = [] if offset is not None: extra_conditions = [[["ifnull", ["offset", 0]], "<=", offset]] return build_request( { "project": self.project_id, "conditions": [*self.conditions, *extra_conditions], "aggregations": self.aggregations, "from_date": (timestamp - self.time_window).isoformat(), "to_date": timestamp.isoformat(), }, schema, timer, dataset, SUBSCRIPTION_REFERRER, )
def enforce_table_writer(dataset: Dataset) -> TableWriter: writable_storage = dataset.get_default_entity().get_writable_storage() assert ( writable_storage is not None ), f"Dataset{dataset} does not have a writable storage." return writable_storage.get_table_writer()
def eventstream(*, dataset: Dataset) -> RespTuple: record = json.loads(http_request.data) version = record[0] if version != 2: raise RuntimeError("Unsupported protocol version: %s" % record) message: Message[KafkaPayload] = Message( Partition(Topic("topic"), 0), 0, KafkaPayload(None, http_request.data, []), datetime.now(), ) type_ = record[1] storage = dataset.get_default_entity().get_writable_storage() assert storage is not None if type_ == "insert": from arroyo.processing.strategies.streaming import ( KafkaConsumerStrategyFactory, ) from snuba.consumers.consumer import build_batch_writer, process_message table_writer = storage.get_table_writer() stream_loader = table_writer.get_stream_loader() strategy = KafkaConsumerStrategyFactory( stream_loader.get_pre_filter(), functools.partial( process_message, stream_loader.get_processor(), "consumer_grouup" ), build_batch_writer(table_writer, metrics=metrics), max_batch_size=1, max_batch_time=1.0, processes=None, input_block_size=None, output_block_size=None, ).create(lambda offsets: None) strategy.submit(message) strategy.close() strategy.join() else: from snuba.replacer import ReplacerWorker worker = ReplacerWorker(storage, "consumer_group", metrics=metrics) processed = worker.process_message(message) if processed is not None: batch = [processed] worker.flush_batch(batch) return ("ok", 200, {"Content-Type": "text/plain"})
def parse_query(body: MutableMapping[str, Any], dataset: Dataset) -> Query: """ Parses the query body generating the AST. This only takes into account the initial query body. Extensions are parsed by extension processors and are supposed to update the AST. Parsing includes two phases. The first transforms the json body into a minimal query Object resolving expressions, conditions, etc. The second phase performs some query processing to provide a sane query to the dataset specific section. - It prevents alias shadowing. - It transforms columns from the tags[asd] form into SubscriptableReference. - Applies aliases to all columns that do not have one and that do not represent a reference to an existing alias. During query processing a column can be transformed into a different expression. It is essential to preserve the original column name so that the result set still has a column with the name provided by the user no matter on which transformation we applied. By applying aliases at this stage every processor just needs to preserve them to guarantee the correctness of the query. - Expands all the references to aliases by inlining the expression to make aliasing transparent to all query processing phases. References to aliases are reintroduced at the end of the query processing. Alias references are packaged back at the end of processing. """ # TODO: Parse the entity out of the query body and select the correct one from the dataset entity = dataset.get_default_entity() query = _parse_query_impl(body, entity) # TODO: These should support composite queries. _validate_empty_table_names(query) _validate_aliases(query) _parse_subscriptables(query) _apply_column_aliases(query) _expand_aliases(query) # WARNING: These steps above assume table resolution did not happen # yet. If it is put earlier than here (unlikely), we need to adapt them. _deescape_aliases(query) _mangle_aliases(query) _validate_arrayjoin(query) # XXX: Select the entity to be used for the query. This step is temporary. Eventually # entity selection will be moved to Sentry and specified for all SnQL queries. selected_entity = dataset.select_entity(query) query_entity = QueryEntity( selected_entity, get_entity(selected_entity).get_data_model() ) query.set_from_clause(query_entity) validate_query(query) return query
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response: assert http_request.method == "POST" with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), HTTPRequestSettings ) request = build_request(body, schema, timer, dataset, http_request.referrer) try: result = parse_and_run_query(dataset, request, timer) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def dataset_query_view(*, dataset: Dataset, timer: Timer): if http_request.method == "GET": schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), HTTPRequestSettings ) return render_template( "query.html", query_template=json.dumps(schema.generate_template(), indent=4,), ) elif http_request.method == "POST": body = parse_request_body(http_request) _trace_transaction(dataset) return dataset_query(dataset, body, timer) else: assert False, "unexpected fallthrough"
def dataset_query( dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language ) -> Response: assert http_request.method == "POST" referrer = http_request.referrer or "<unknown>" # mypy if language == Language.SNQL: metrics.increment("snql.query.incoming", tags={"referrer": referrer}) parser: Callable[ [RequestParts, RequestSettings, Dataset], Union[Query, CompositeQuery[Entity]], ] = partial(parse_snql_query, []) else: parser = parse_legacy_query with sentry_sdk.start_span(description="build_schema", op="validate"): schema = RequestSchema.build_with_extensions( dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language ) request = build_request( body, parser, HTTPRequestSettings, schema, dataset, timer, referrer ) try: result = parse_and_run_query(dataset, request, timer) # Some metrics to track the adoption of SnQL query_type = "simple" if language == Language.SNQL: if isinstance(request.query, CompositeQuery): if isinstance(request.query.get_from_clause(), JoinClause): query_type = "join" else: query_type = "subquery" metrics.increment( "snql.query.success", tags={"referrer": referrer, "type": query_type} ) except QueryException as exception: status = 500 details: Mapping[str, Any] cause = exception.__cause__ if isinstance(cause, RateLimitExceeded): status = 429 details = { "type": "rate-limited", "message": "rate limit exceeded", } elif isinstance(cause, ClickhouseError): details = { "type": "clickhouse", "message": str(cause), "code": cause.code, } elif isinstance(cause, Exception): details = { "type": "unknown", "message": str(cause), } else: raise # exception should have been chained if language == Language.SNQL: metrics.increment( "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"}, ) return Response( json.dumps( {"error": details, "timing": timer.for_json(), **exception.extra} ), status, {"Content-Type": "application/json"}, ) payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()} if settings.STATS_IN_RESPONSE or request.settings.get_debug(): payload.update(result.extra) return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
def write(*, dataset: Dataset) -> RespTuple: return _write_to_entity(entity=dataset.get_default_entity())