Пример #1
0
def record_query(request: Request, timer: Timer,
                 query_metadata: SnubaQueryMetadata) -> None:
    """
    Records a request after it has been parsed and validated, whether
    we actually ran a query or not.
    """
    if settings.RECORD_QUERIES:
        # Send to redis
        # We convert this to a dict before passing it to state in order to avoid a
        # circular dependency, where state would depend on the higher level
        # QueryMetadata class
        state.record_query(query_metadata.to_dict())

        final = str(request.query.get_final())
        referrer = request.referrer or "none"
        timer.send_metrics_to(
            metrics,
            tags={
                "status": query_metadata.status.value,
                "referrer": referrer,
                "final": final,
            },
            mark_tags={
                "final": final,
                "referrer": referrer
            },
        )

        _add_tags(timer, request)
Пример #2
0
def parse_and_run_query(
    dataset: Dataset,
    request: Request,
    timer: Timer,
    robust: bool = False,
    concurrent_queries_gauge: Optional[Gauge] = None,
) -> QueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    query_metadata = SnubaQueryMetadata(
        request=request,
        dataset=get_dataset_name(dataset),
        timer=timer,
        query_list=[],
    )

    try:
        result = _run_query_pipeline(
            dataset=dataset,
            request=request,
            timer=timer,
            query_metadata=query_metadata,
            robust=robust,
            concurrent_queries_gauge=concurrent_queries_gauge,
        )
        if not request.settings.get_dry_run():
            record_query(request, timer, query_metadata, result.extra)
    except QueryException as error:
        record_query(request, timer, query_metadata, error.extra)
        raise error

    return result
Пример #3
0
def parse_and_run_query(
    dataset: Dataset,
    request: Request,
    timer: Timer,
    robust: bool = False,
    concurrent_queries_gauge: Optional[Gauge] = None,
) -> QueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    # from_clause = request.query.get_from_clause()
    start, end = None, None
    entity_name = "unknown"
    if isinstance(request.query, LogicalQuery):
        entity_key = request.query.get_from_clause().key
        entity = get_entity(entity_key)
        entity_name = entity_key.value
        if entity.required_time_column is not None:
            start, end = get_time_range(request.query,
                                        entity.required_time_column)

    query_metadata = SnubaQueryMetadata(
        request=request,
        start_timestamp=start,
        end_timestamp=end,
        dataset=get_dataset_name(dataset),
        entity=entity_name,
        timer=timer,
        query_list=[],
        projects=ProjectsFinder().visit(request.query),
        snql_anonymized=request.snql_anonymized,
    )

    try:
        result = _run_query_pipeline(
            dataset=dataset,
            request=request,
            timer=timer,
            query_metadata=query_metadata,
            robust=robust,
            concurrent_queries_gauge=concurrent_queries_gauge,
        )
        _set_query_final(request, result.extra)
        if not request.query_settings.get_dry_run():
            record_query(request, timer, query_metadata, result.extra)
    except QueryException as error:
        _set_query_final(request, error.extra)
        record_query(request, timer, query_metadata, error.extra)
        raise error

    return result
Пример #4
0
def record_query(
    request: Request,
    timer: Timer,
    query_metadata: SnubaQueryMetadata,
    extra_data: Mapping[str, Any],
) -> None:
    """
    Records a request after it has been parsed and validated, whether
    we actually ran a query or not.
    """
    if settings.RECORD_QUERIES:
        # Send to redis
        # We convert this to a dict before passing it to state in order to avoid a
        # circular dependency, where state would depend on the higher level
        # QueryMetadata class
        state.record_query(query_metadata.to_dict())
        _record_timer_metrics(request, timer, query_metadata)
        _record_attribution_metrics(request, query_metadata, extra_data)
        _add_tags(timer, extra_data.get("experiments"), query_metadata)
Пример #5
0
def parse_and_run_query(dataset: Dataset, request: Request,
                        timer: Timer) -> QueryResult:
    """
    Runs a Snuba Query, then records the metadata about each split query that was run.
    """
    request_copy = copy.deepcopy(request)
    query_metadata = SnubaQueryMetadata(
        request=request_copy,
        dataset=get_dataset_name(dataset),
        timer=timer,
        query_list=[],
    )

    try:
        result = _run_query_pipeline(request=request,
                                     timer=timer,
                                     query_metadata=query_metadata)
        record_query(request_copy, timer, query_metadata)
    except QueryException as error:
        record_query(request_copy, timer, query_metadata)
        raise error

    return result
Пример #6
0
def test_simple() -> None:
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(
        Entity(EntityKey.EVENTS,
               get_entity(EntityKey.EVENTS).get_data_model()))

    request = Request(
        id=uuid.UUID("a" * 32).hex,
        original_body=request_body,
        query=query,
        snql_anonymized="",
        query_settings=HTTPQuerySettings(referrer="search"),
        attribution_info=AttributionInfo(get_app_id("default"), "search", None,
                                         None, None),
    )

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        start_timestamp=datetime.utcnow() - timedelta(days=3),
        end_timestamp=datetime.utcnow(),
        dataset="events",
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                sql_anonymized=
                "select event_id from sentry_dist sample 0.1 prewhere project_id in ($I) limit 50, 100",
                start_timestamp=datetime.utcnow() - timedelta(days=3),
                end_timestamp=datetime.utcnow(),
                stats={
                    "sample": 10,
                    "error_code": 386
                },
                status=QueryStatus.SUCCESS,
                profile=ClickhouseQueryProfile(
                    time_range=10,
                    table="events",
                    all_columns={"timestamp", "tags"},
                    multi_level_condition=False,
                    where_profile=FilterProfile(
                        columns={"timestamp"},
                        mapping_cols={"tags"},
                    ),
                    groupby_cols=set(),
                    array_join_cols=set(),
                ),
                trace_id="b" * 32,
            )
        ],
        projects={2},
        snql_anonymized=request.snql_anonymized,
        entity=EntityKey.EVENTS.value,
    ).to_dict()

    processor = (get_writable_storage(StorageKey.QUERYLOG).get_table_writer().
                 get_stream_loader().get_processor())

    assert processor.process_message(
        message, KafkaMessageMetadata(0, 0, datetime.now())
    ) == InsertBatch(
        [{
            "request_id":
            str(uuid.UUID("a" * 32)),
            "request_body":
            '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
            "referrer":
            "search",
            "dataset":
            "events",
            "projects": [2],
            "organization":
            None,
            "timestamp":
            timer.for_json()["timestamp"],
            "duration_ms":
            10,
            "status":
            "success",
            "clickhouse_queries.sql": [
                "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
            ],
            "clickhouse_queries.status": ["success"],
            "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
            "clickhouse_queries.duration_ms": [0],
            "clickhouse_queries.stats": ['{"error_code": 386, "sample": 10}'],
            "clickhouse_queries.final": [0],
            "clickhouse_queries.cache_hit": [0],
            "clickhouse_queries.sample": [10.0],
            "clickhouse_queries.max_threads": [0],
            "clickhouse_queries.num_days": [10],
            "clickhouse_queries.clickhouse_table": [""],
            "clickhouse_queries.query_id": [""],
            "clickhouse_queries.is_duplicate": [0],
            "clickhouse_queries.consistent": [0],
            "clickhouse_queries.all_columns": [["tags", "timestamp"]],
            "clickhouse_queries.or_conditions": [False],
            "clickhouse_queries.where_columns": [["timestamp"]],
            "clickhouse_queries.where_mapping_columns": [["tags"]],
            "clickhouse_queries.groupby_columns": [[]],
            "clickhouse_queries.array_join_columns": [[]],
        }],
        None,
    )
Пример #7
0
def test_simple() -> None:
    request_body = {
        "selected_columns": ["event_id"],
        "orderby": "event_id",
        "sample": 0.1,
        "limit": 100,
        "offset": 50,
        "project": 1,
    }

    query = Query(get_storage(StorageKey.EVENTS).get_schema().get_data_source())

    request = Request(
        uuid.UUID("a" * 32).hex, request_body, query, HTTPRequestSettings(), "search",
    )

    time = TestingClock()

    timer = Timer("test", clock=time)
    time.sleep(0.01)

    message = SnubaQueryMetadata(
        request=request,
        dataset="events",
        timer=timer,
        query_list=[
            ClickhouseQueryMetadata(
                sql="select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100",
                stats={"sample": 10},
                status=QueryStatus.SUCCESS,
                profile=ClickhouseQueryProfile(
                    time_range=10,
                    table="events",
                    all_columns={"timestamp", "tags"},
                    multi_level_condition=False,
                    where_profile=FilterProfile(
                        columns={"timestamp"}, mapping_cols={"tags"},
                    ),
                    groupby_cols=set(),
                    array_join_cols=set(),
                ),
                trace_id="b" * 32,
            )
        ],
    ).to_dict()

    processor = (
        get_writable_storage(StorageKey.QUERYLOG)
        .get_table_writer()
        .get_stream_loader()
        .get_processor()
    )

    assert processor.process_message(
        message, KafkaMessageMetadata(0, 0, datetime.now())
    ) == InsertBatch(
        [
            {
                "request_id": str(uuid.UUID("a" * 32)),
                "request_body": '{"limit": 100, "offset": 50, "orderby": "event_id", "project": 1, "sample": 0.1, "selected_columns": ["event_id"]}',
                "referrer": "search",
                "dataset": "events",
                "projects": [1],
                "organization": None,
                "timestamp": timer.for_json()["timestamp"],
                "duration_ms": 10,
                "status": "success",
                "clickhouse_queries.sql": [
                    "select event_id from sentry_dist sample 0.1 prewhere project_id in (1) limit 50, 100"
                ],
                "clickhouse_queries.status": ["success"],
                "clickhouse_queries.trace_id": [str(uuid.UUID("b" * 32))],
                "clickhouse_queries.duration_ms": [0],
                "clickhouse_queries.stats": ['{"sample": 10}'],
                "clickhouse_queries.final": [0],
                "clickhouse_queries.cache_hit": [0],
                "clickhouse_queries.sample": [10.0],
                "clickhouse_queries.max_threads": [0],
                "clickhouse_queries.num_days": [10],
                "clickhouse_queries.clickhouse_table": [""],
                "clickhouse_queries.query_id": [""],
                "clickhouse_queries.is_duplicate": [0],
                "clickhouse_queries.consistent": [0],
                "clickhouse_queries.all_columns": [["tags", "timestamp"]],
                "clickhouse_queries.or_conditions": [False],
                "clickhouse_queries.where_columns": [["timestamp"]],
                "clickhouse_queries.where_mapping_columns": [["tags"]],
                "clickhouse_queries.groupby_columns": [[]],
                "clickhouse_queries.array_join_columns": [[]],
            }
        ],
    )