Exemplo n.º 1
0
def test_without_turbo_without_projects_needing_final(query: ClickhouseQuery) -> None:
    PostReplacementConsistencyEnforcer("project_id", None).process_query(
        query, HTTPRequestSettings()
    )

    assert query.get_condition_from_ast() == build_in("project_id", [2])
    assert not query.get_from_clause().final
Exemplo n.º 2
0
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 5)
    set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer(
        "project_id", ReplacerState.EVENTS
    ).process_query(query, HTTPRequestSettings())

    assert query.get_condition_from_ast() == FunctionCall(
        None,
        BooleanFunctions.AND,
        (
            FunctionCall(
                None,
                "notIn",
                (
                    FunctionCall(
                        None, "assumeNotNull", (Column(None, None, "group_id"),)
                    ),
                    FunctionCall(
                        None,
                        "tuple",
                        (Literal(None, 100), Literal(None, 101), Literal(None, 102),),
                    ),
                ),
            ),
            build_in("project_id", [2]),
        ),
    )
    assert not query.get_from_clause().final
Exemplo n.º 3
0
def test_not_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 5)
    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_and(
        FunctionCall(
            None,
            "notIn",
            (
                FunctionCall(None, "assumeNotNull",
                             (Column(None, None, "group_id"), )),
                FunctionCall(
                    None,
                    "tuple",
                    (
                        Literal(None, 100),
                        Literal(None, 101),
                        Literal(None, 102),
                    ),
                ),
            ),
        ),
        build_in("project_id", [2]),
    )
    assert not query.get_from_clause().final
Exemplo n.º 4
0
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 2)
    set_project_exclude_groups(2, [100, 101, 102], ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer(
        "project_id", ReplacerState.EVENTS
    ).process_query(query, HTTPRequestSettings())

    assert query.get_condition_from_ast() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemplo n.º 5
0
def test_without_turbo_with_projects_needing_final(
        query: ClickhouseQuery) -> None:
    set_project_needs_final(2, ReplacerState.EVENTS)

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.EVENTS).process_query(
                                           query, HTTPRequestSettings())

    assert query.get_condition_from_ast() == build_in("project_id", [2])
    assert query.get_final()
Exemplo n.º 6
0
def test_multiple_not_too_many_excludes(
    query_with_multiple_group_ids: ClickhouseQuery, ) -> None:
    """
    Query is looking for multiple groups and there are not too many groups to exclude, but
    there are fewer groups queried for than replaced.
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query_with_multiple_group_ids, True)
    state.set_config("max_group_ids_exclude", 5)

    enforcer.process_query(query_with_multiple_group_ids, HTTPQuerySettings())
    assert query_with_multiple_group_ids.get_condition() == build_and(
        build_not_in("group_id", [101, 102]),
        build_and(build_in("project_id", [2]),
                  build_in("group_id", [101, 102])),
    )
    assert not query_with_multiple_group_ids.get_from_clause().final
Exemplo n.º 7
0
def test_single_too_many_exclude(
        query_with_single_group_id: ClickhouseQuery) -> None:
    """
    Query is looking for a group that has been replaced, and there are too many
    groups to exclude.
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query_with_single_group_id, True)
    state.set_config("max_group_ids_exclude", 2)

    enforcer.process_query(query_with_single_group_id, HTTPQuerySettings())
    assert query_with_single_group_id.get_condition() == build_and(
        build_not_in("group_id", [101]),
        build_and(build_in("project_id", [2]), build_in("group_id", [101])),
    )
    assert not query_with_single_group_id.get_from_clause().final
Exemplo n.º 8
0
def test_without_turbo_with_projects_needing_final(
        query: ClickhouseQuery) -> None:
    set_project_needs_final(
        2,
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemplo n.º 9
0
def test_too_many_groups_to_exclude(query: ClickhouseQuery) -> None:
    state.set_config("max_group_ids_exclude", 2)
    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    PostReplacementConsistencyEnforcer("project_id",
                                       ReplacerState.ERRORS).process_query(
                                           query, HTTPQuerySettings())

    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemplo n.º 10
0
def test_no_groups_too_many_excludes(query: ClickhouseQuery) -> None:
    """
    Query has no groups, and too many to exclude.
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query, True)
    state.set_config("max_group_ids_exclude", 1)

    enforcer.process_query(query, HTTPQuerySettings())
    assert query.get_condition() == build_in("project_id", [2])
    assert query.get_from_clause().final
Exemplo n.º 11
0
def test_multiple_disjoint_replaced(
    query_with_multiple_group_ids: ClickhouseQuery, ) -> None:
    """
    Query is looking for multiple groups and there are replaced groups, but these
    sets of group ids are disjoint. (No queried groups have been replaced)
    """
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    set_project_exclude_groups(
        2,
        [110, 120, 130],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )

    enforcer._set_query_final(query_with_multiple_group_ids, True)
    state.set_config("max_group_ids_exclude", 5)

    enforcer.process_query(query_with_multiple_group_ids, HTTPQuerySettings())
    assert query_with_multiple_group_ids.get_condition() == build_and(
        build_in("project_id", [2]), build_in("group_id", [101, 102]))
    assert not query_with_multiple_group_ids.get_from_clause().final
Exemplo n.º 12
0
def test_with_turbo(query: ClickhouseQuery) -> None:
    PostReplacementConsistencyEnforcer("project_id", None).process_query(
        query, HTTPRequestSettings(turbo=True)
    )

    assert query.get_condition_from_ast() == build_in("project_id", [2])
Exemplo n.º 13
0
required_columns = [
    "event_id",
    "project_id",
    "group_id",
    "timestamp",
    "deleted",
    "retention_days",
]

storage = WritableTableStorage(
    storage_key=StorageKey.ERRORS,
    storage_set_key=StorageSetKey.EVENTS,
    schema=schema,
    query_processors=[
        PostReplacementConsistencyEnforcer(
            project_column="project_id", replacer_state_name=ReplacerState.ERRORS,
        ),
        MappingColumnPromoter(mapping_specs={"tags": promoted_tag_columns}),
        ArrayJoinKeyValueOptimizer("tags"),
        PrewhereProcessor(),
    ],
    stream_loader=KafkaStreamLoader(
        processor=ErrorsProcessor(promoted_tag_columns),
        default_topic="events",
        replacement_topic="errors-replacements",
    ),
    replacer_processor=ErrorsReplacer(
        write_schema=schema,
        read_schema=schema,
        required_columns=required_columns,
        tag_column_map={"tags": promoted_tag_columns, "contexts": {}},
Exemplo n.º 14
0
def test_query_overlaps_replacements_processor(
    query: ClickhouseQuery,
    query_with_timestamp: ClickhouseQuery,
    query_with_future_timestamp: ClickhouseQuery,
) -> None:
    enforcer = PostReplacementConsistencyEnforcer("project_id",
                                                  ReplacerState.ERRORS)

    # replacement time unknown, default to "overlaps" but no groups to exclude so shouldn't be final
    enforcer._set_query_final(query_with_timestamp, True)
    enforcer.process_query(query_with_timestamp, HTTPQuerySettings())
    assert not query_with_timestamp.get_from_clause().final

    # overlaps replacement and should be final due to too many groups to exclude
    state.set_config("max_group_ids_exclude", 2)
    set_project_exclude_groups(
        2,
        [100, 101, 102],
        ReplacerState.ERRORS,
        ReplacementType.
        EXCLUDE_GROUPS,  # Arbitrary replacement type, no impact on tests
    )
    enforcer._set_query_final(query_with_timestamp, False)
    enforcer.process_query(query_with_timestamp, HTTPQuerySettings())
    assert query_with_timestamp.get_from_clause().final

    # query time range unknown and should be final due to too many groups to exclude
    enforcer._set_query_final(query, False)
    enforcer.process_query(query, HTTPQuerySettings())
    assert query.get_from_clause().final

    # doesn't overlap replacements
    enforcer._set_query_final(query_with_future_timestamp, True)
    enforcer.process_query(query_with_future_timestamp, HTTPQuerySettings())
    assert not query_with_future_timestamp.get_from_clause().final
Exemplo n.º 15
0
prewhere_candidates = [
    "event_id",
    "group_id",
    "tags[sentry:release]",
    "sentry:release",
    "message",
    "title",
    "environment",
    "project_id",
]

query_processors = [
    PostReplacementConsistencyEnforcer(
        project_column="project_id",
        # key migration is on going. As soon as all the keys we are interested
        # into in redis are stored with "EVENTS" in the name, we can change this.
        replacer_state_name=None,
    ),
    EventsColumnProcessor(),
    MappingColumnPromoter(
        mapping_specs={
            "tags": ChainMap(
                {col.flattened: col.flattened for col in promoted_tag_columns},
                get_promoted_context_tag_col_mapping(),
            ),
            "contexts": get_promoted_context_col_mapping(),
        },
    ),
    # This processor must not be ported to the errors dataset. We should
    # not support promoting tags/contexts with boolean values. There is
    # no way to convert them back consistently to the value provided by