示例#1
0
def dataset_query(dataset: Dataset, body, timer: Timer) -> Response:
    assert http_request.method == "POST"

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_extensions(), HTTPRequestSettings
        )

    request = build_request(body, schema, timer, dataset, http_request.referrer)

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
示例#2
0
文件: views.py 项目: ruezetle/snuba
def run_query(dataset: Dataset, request: Request, timer: Timer) -> WebQueryResult:
    try:
        result = parse_and_run_query(dataset, request, timer)
        payload = {**result.result, "timing": timer.for_json()}
        if settings.STATS_IN_RESPONSE or request.settings.get_debug():
            payload.update(result.extra)
        return WebQueryResult(payload, 200)
    except RawQueryException as e:
        return WebQueryResult(
            {
                "error": {"type": e.err_type, "message": e.message, **e.meta},
                "sql": e.sql,
                "stats": e.stats,
                "timing": timer.for_json(),
            },
            429 if e.err_type == "rate-limited" else 500,
        )
示例#3
0
def run_query(dataset: Dataset, request: Request, timer: Timer) -> QueryResult:
    try:
        return QueryResult(
            {
                **parse_and_run_query(dataset, request, timer),
                "timing":
                timer.for_json(),
            },
            200,
        )
    except RawQueryException as e:
        return QueryResult(
            {
                "error": {
                    "type": e.err_type,
                    "message": e.message,
                    **e.meta
                },
                "sql": e.sql,
                "stats": e.stats,
                "timing": timer.for_json(),
            },
            429 if e.err_type == "rate-limited" else 500,
        )
示例#4
0
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer, language: Language
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    if language == Language.SNQL:
        metrics.increment("snql.query.incoming", tags={"referrer": referrer})
        parser: Callable[
            [RequestParts, RequestSettings, Dataset],
            Union[Query, CompositeQuery[Entity]],
        ] = partial(parse_snql_query, [])
    else:
        parser = parse_legacy_query

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build_with_extensions(
            dataset.get_default_entity().get_extensions(), HTTPRequestSettings, language
        )

    request = build_request(
        body, parser, HTTPRequestSettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)

        # Some metrics to track the adoption of SnQL
        query_type = "simple"
        if language == Language.SNQL:
            if isinstance(request.query, CompositeQuery):
                if isinstance(request.query.get_from_clause(), JoinClause):
                    query_type = "join"
                else:
                    query_type = "subquery"

            metrics.increment(
                "snql.query.success", tags={"referrer": referrer, "type": query_type}
            )

    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": "rate limit exceeded",
            }
        elif isinstance(cause, ClickhouseError):
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        if language == Language.SNQL:
            metrics.increment(
                "snql.query.failed", tags={"referrer": referrer, "status": f"{status}"},
            )

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
示例#5
0
def test_simple():
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(
        request_body,
        get_storage(
            "events").get_schemas().get_read_schema().get_data_source(),
    )

    request = Request(
        uuid.UUID("a" * 32).hex, query, HTTPRequestSettings(), {}, "search")

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        dataset=get_dataset("events"),
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                stats={"sample": 10},
                status="success",
                trace_id="b" * 32)
        ]).to_dict()

    processor = (enforce_table_writer(
        get_dataset("querylog")).get_stream_loader().get_processor())

    assert processor.process_message(
        message
    ) == ProcessedMessage(ProcessorAction.INSERT, [{
        "request_id":
        str(uuid.UUID("a" * 32)),
        "request_body":
        '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
        "referrer":
        "search",
        "dataset":
        get_dataset("events"),
        "projects": [1],
        "organization":
        None,
        "timestamp":
        timer.for_json()["timestamp"],
        "duration_ms":
        10,
        "status":
        "success",
        "clickhouse_queries.sql": [
            "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
        ],
        "clickhouse_queries.status": ["success"],
        "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
        "clickhouse_queries.duration_ms": [0],
        "clickhouse_queries.stats": ['{"sample": 10}'],
        "clickhouse_queries.final": [0],
        "clickhouse_queries.cache_hit": [0],
        "clickhouse_queries.sample": [10.],
        "clickhouse_queries.max_threads": [0],
        "clickhouse_queries.num_days": [0],
        "clickhouse_queries.clickhouse_table": [""],
        "clickhouse_queries.query_id": [""],
        "clickhouse_queries.is_duplicate": [0],
        "clickhouse_queries.consistent": [0],
    }])
示例#6
0
文件: views.py 项目: getsentry/snuba
def dataset_query(
    dataset: Dataset, body: MutableMapping[str, Any], timer: Timer
) -> Response:
    assert http_request.method == "POST"
    referrer = http_request.referrer or "<unknown>"  # mypy

    # Try to detect if new requests are being sent to the api
    # after the shutdown command has been issued, and if so
    # how long after. I don't want to do a disk check for
    # every query, so randomly sample until the shutdown file
    # is detected, and then log everything
    if IS_SHUTTING_DOWN or random.random() < 0.05:
        if IS_SHUTTING_DOWN or check_down_file_exists():
            tags = {"dataset": get_dataset_name(dataset)}
            metrics.increment("post.shutdown.query", tags=tags)
            diff = time.time() - (shutdown_time() or 0.0)  # this should never be None
            metrics.timing("post.shutdown.query.delay", diff, tags=tags)

    with sentry_sdk.start_span(description="build_schema", op="validate"):
        schema = RequestSchema.build(HTTPQuerySettings)

    request = build_request(
        body, parse_snql_query, HTTPQuerySettings, schema, dataset, timer, referrer
    )

    try:
        result = parse_and_run_query(dataset, request, timer)
    except QueryException as exception:
        status = 500
        details: Mapping[str, Any]

        cause = exception.__cause__
        if isinstance(cause, RateLimitExceeded):
            status = 429
            details = {
                "type": "rate-limited",
                "message": str(cause),
            }
            logger.warning(
                str(cause),
                exc_info=True,
            )
        elif isinstance(cause, ClickhouseError):
            status = get_http_status_for_clickhouse_error(cause)
            details = {
                "type": "clickhouse",
                "message": str(cause),
                "code": cause.code,
            }
        elif isinstance(cause, QueryTooLongException):
            status = 400
            details = {"type": "query-too-long", "message": str(cause)}
        elif isinstance(cause, Exception):
            details = {
                "type": "unknown",
                "message": str(cause),
            }
        else:
            raise  # exception should have been chained

        return Response(
            json.dumps(
                {"error": details, "timing": timer.for_json(), **exception.extra}
            ),
            status,
            {"Content-Type": "application/json"},
        )

    payload: MutableMapping[str, Any] = {**result.result, "timing": timer.for_json()}

    if settings.STATS_IN_RESPONSE or request.query_settings.get_debug():
        payload.update(result.extra)

    return Response(json.dumps(payload), 200, {"Content-Type": "application/json"})
示例#7
0
def test_simple() -> None:
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(
        Entity(EntityKey.EVENTS,
               get_entity(EntityKey.EVENTS).get_data_model()))

    request = Request(
        id=uuid.UUID("a" * 32).hex,
        original_body=request_body,
        query=query,
        snql_anonymized="",
        query_settings=HTTPQuerySettings(referrer="search"),
        attribution_info=AttributionInfo(get_app_id("default"), "search", None,
                                         None, None),
    )

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        start_timestamp=datetime.utcnow() - timedelta(days=3),
        end_timestamp=datetime.utcnow(),
        dataset="events",
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                sql_anonymized=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100",
                start_timestamp=datetime.utcnow() - timedelta(days=3),
                end_timestamp=datetime.utcnow(),
                stats={
                    "sample": 10,
                    "error_code": 386
                },
                status=QueryStatus.SUCCESS,
                profile=ClickhouseQueryProfile(
                    time_range=10,
                    table="events",
                    all_columns={"timestamp", "tags"},
                    multi_level_condition=False,
                    where_profile=FilterProfile(
                        columns={"timestamp"},
                        mapping_cols={"tags"},
                    ),
                    groupby_cols=set(),
                    array_join_cols=set(),
                ),
                trace_id="b" * 32,
            )
        ],
        projects={2},
        snql_anonymized=request.snql_anonymized,
        entity=EntityKey.EVENTS.value,
    ).to_dict()

    processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer().
                 get_stream_loader().get_processor())

    assert processor.process_message(
        message, KafkaMessageMetadata(0, 0, datetime.now())
    ) == InsertBatch(
        [{
            "request_id":
            str(uuid.UUID("a" * 32)),
            "request_body":
            '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
            "referrer":
            "search",
            "dataset":
            "events",
            "projects": [2],
            "organization":
            None,
            "timestamp":
            timer.for_json()["timestamp"],
            "duration_ms":
            10,
            "status":
            "success",
            "clickhouse_queries.sql": [
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
            ],
            "clickhouse_queries.status": ["success"],
            "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
            "clickhouse_queries.duration_ms": [0],
            "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'],
            "clickhouse_queries.final": [0],
            "clickhouse_queries.cache_hit": [0],
            "clickhouse_queries.sample": [10.0],
            "clickhouse_queries.max_threads": [0],
            "clickhouse_queries.num_days": [10],
            "clickhouse_queries.clickhouse_table": [""],
            "clickhouse_queries.query_id": [""],
            "clickhouse_queries.is_duplicate": [0],
            "clickhouse_queries.consistent": [0],
            "clickhouse_queries.all_columns": [["tags", "timestamp"]],
            "clickhouse_queries.or_conditions": [False],
            "clickhouse_queries.where_columns": [["timestamp"]],
            "clickhouse_queries.where_mapping_columns": [["tags"]],
            "clickhouse_queries.groupby_columns": [[]],
            "clickhouse_queries.array_join_columns": [[]],
        }],
        None,
    )
示例#8
0
def test_simple() -> None:
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(get_storage(StorageKey.EVENTS).get_schema().get_data_source())

    request = Request(
        uuid.UUID("a" * 32).hex, request_body, query, HTTPRequestSettings(), "search",
    )

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        dataset="events",
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql="select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                stats={"sample": 10},
                status=QueryStatus.SUCCESS,
                profile=ClickhouseQueryProfile(
                    time_range=10,
                    table="events",
                    all_columns={"timestamp", "tags"},
                    multi_level_condition=False,
                    where_profile=FilterProfile(
                        columns={"timestamp"}, mapping_cols={"tags"},
                    ),
                    groupby_cols=set(),
                    array_join_cols=set(),
                ),
                trace_id="b" * 32,
            )
        ],
    ).to_dict()

    processor = (
        get_writable_storage(StorageKey.QUERYLOG)
        .get_table_writer()
        .get_stream_loader()
        .get_processor()
    )

    assert processor.process_message(
        message, KafkaMessageMetadata(0, 0, datetime.now())
    ) == InsertBatch(
        [
            {
                "request_id": str(uuid.UUID("a" * 32)),
                "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
                "referrer": "search",
                "dataset": "events",
                "projects": [1],
                "organization": None,
                "timestamp": timer.for_json()["timestamp"],
                "duration_ms": 10,
                "status": "success",
                "clickhouse_queries.sql": [
                    "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
                ],
                "clickhouse_queries.status": ["success"],
                "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
                "clickhouse_queries.duration_ms": [0],
                "clickhouse_queries.stats": ['{"sample": 10}'],
                "clickhouse_queries.final": [0],
                "clickhouse_queries.cache_hit": [0],
                "clickhouse_queries.sample": [10.0],
                "clickhouse_queries.max_threads": [0],
                "clickhouse_queries.num_days": [10],
                "clickhouse_queries.clickhouse_table": [""],
                "clickhouse_queries.query_id": [""],
                "clickhouse_queries.is_duplicate": [0],
                "clickhouse_queries.consistent": [0],
                "clickhouse_queries.all_columns": [["tags", "timestamp"]],
                "clickhouse_queries.or_conditions": [False],
                "clickhouse_queries.where_columns": [["timestamp"]],
                "clickhouse_queries.where_mapping_columns": [["tags"]],
                "clickhouse_queries.groupby_columns": [[]],
                "clickhouse_queries.array_join_columns": [[]],
            }
        ],
    )