Exemplo n.º 1
0
def detect_table(query: Query, events_only_columns: ColumnSet,
                 transactions_only_columns: ColumnSet) -> str:
    """
    Given a query, we attempt to guess whether it is better to fetch data from the
    "events" or "transactions" storage. This is going to be wrong in some cases.
    """
    # First check for a top level condition that matches either type = transaction
    # type != transaction.
    conditions = query.get_conditions()
    if conditions:
        for idx, condition in enumerate(conditions):
            if is_condition(condition):
                if tuple(condition) == ("type", "=", "error"):
                    return EVENTS
                elif tuple(condition) == ("type", "=", "transaction"):
                    return TRANSACTIONS

    # Check for any conditions that reference a table specific field
    condition_columns = query.get_columns_referenced_in_conditions()
    if any(events_only_columns.get(col) for col in condition_columns):
        return EVENTS
    if any(transactions_only_columns.get(col) for col in condition_columns):
        return TRANSACTIONS

    # Check for any other references to a table specific field
    all_referenced_columns = query.get_all_referenced_columns()
    if any(events_only_columns.get(col) for col in all_referenced_columns):
        return EVENTS
    if any(
            transactions_only_columns.get(col)
            for col in all_referenced_columns):
        return TRANSACTIONS

    # Use events by default
    return EVENTS
Exemplo n.º 2
0
def test_referenced_columns():
    # a = 1 AND b = 1
    dataset = get_dataset("events")
    source = dataset.get_dataset_schemas().get_read_schema().get_data_source()
    body = {"conditions": [["a", "=", "1"], ["b", "=", "1"]]}
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b"])
    assert query.get_columns_referenced_in_conditions() == set(["a", "b"])
    assert query.get_columns_referenced_in_having() == set([])

    # a = 1 AND (b = 1 OR c = 1)
    body = {
        "conditions": [["a", "=", "1"], [["b", "=", "1"], ["c", "=", "1"]]]
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_having() == set([])

    # a = 1 AND (b = 1 OR foo(c) = 1)
    body = {
        "conditions": [["a", "=", "1"],
                       [["b", "=", "1"], [["foo", ["c"]], "=", "1"]]]
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_conditions() == set(["a", "b", "c"])
    assert query.get_columns_referenced_in_having() == set([])

    # a = 1 AND (b = 1 OR foo(c, bar(d)) = 1)
    body = {
        "conditions": [
            ["a", "=", "1"],
            [["b", "=", "1"], [["foo", ["c", ["bar", ["d"]]]], "=", "1"]],
        ]
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(["a", "b", "c", "d"])
    assert query.get_columns_referenced_in_conditions() == set(
        ["a", "b", "c", "d"])
    assert query.get_columns_referenced_in_having() == set([])

    # Other fields, including expressions in selected columns
    body = {
        "arrayjoin": "tags_key",
        "groupby": ["time", "group_id"],
        "orderby": "-time",
        "selected_columns": [
            "group_id",
            "time",
            ["foo", ["c", ["bar", ["d"]]]],  # foo(c, bar(d))
        ],
        "aggregations": [["uniq", "tags_value", "values_seen"]],
    }
    query = Query(body, source)
    assert query.get_all_referenced_columns() == set(
        ["tags_key", "tags_value", "time", "group_id", "c", "d"])
    assert query.get_columns_referenced_in_conditions() == set([])
    assert query.get_columns_referenced_in_having() == set([])

    body = {
        "conditions": [["a", "=", "1"]],
        "having": [
            ["b", "=", "1"],
            [["c", "=", "1"], [["foo", ["d", ["bar", ["e"]]]], "=", "1"]],
        ],
    }
    query = Query(body, source)
    query.set_prewhere([["pc6", "=", "10"]])
    assert query.get_all_referenced_columns() == set(
        ["a", "b", "c", "d", "e", "pc6"])
    assert query.get_columns_referenced_in_having() == set(
        ["b", "c", "d", "e"])