Пример #1
0
 def visitColumn(self, exp: Column) -> str:
     ret = []
     if exp.table_name:
         ret.append(escape_identifier(exp.table_name) or "")
         ret.append(".")
     ret.append(escape_identifier(exp.column_name) or "")
     return self.__alias("".join(ret), exp.alias)
Пример #2
0
    def __string_col(self, col: str) -> str:
        col_type = self.__columns.get(col, None)
        col_type = str(col_type) if col_type else None

        if col_type and "String" in col_type and "FixedString" not in col_type:
            return escape_identifier(col)
        else:
            return "toString({})".format(escape_identifier(col))
Пример #3
0
def function_expr(fn: str, args_expr: str = "") -> str:
    """
    DEPRECATED. Please do not add anything else here. In order to manipulate the
    query, create a QueryProcessor and register it into your dataset.

    Generate an expression for a given function name and an already-evaluated
    args expression. This is a place to define convenience functions that evaluate
    to more complex expressions.

    """
    if fn.startswith("apdex("):
        match = APDEX_FUNCTION_RE.match(fn)
        if match:
            return "(countIf({col} <= {satisfied}) + (countIf(({col} > {satisfied}) AND ({col} <= {tolerated})) / 2)) / count()".format(
                col=escape_identifier(match.group(1)),
                satisfied=match.group(2),
                tolerated=int(match.group(2)) * 4,
            )
        raise ValueError("Invalid format for apdex()")
    elif fn.startswith("impact("):
        match = IMPACT_FUNCTION_RE.match(fn)
        if match:
            apdex = "(countIf({col} <= {satisfied}) + (countIf(({col} > {satisfied}) AND ({col} <= {tolerated})) / 2)) / count()".format(
                col=escape_identifier(match.group(1)),
                satisfied=match.group(2),
                tolerated=int(match.group(2)) * 4,
            )

            return "(1 - {apdex}) + ((1 - (1 / sqrt(uniq({user_col})))) * 3)".format(
                apdex=apdex,
                user_col=escape_identifier(match.group(3)),
            )
        raise ValueError("Invalid format for impact()")
    # For functions with no args, (or static args) we allow them to already
    # include them as part of the function name, eg, "count()" or "sleep(1)"
    if not args_expr and fn.endswith(")"):
        return fn

    # Convenience topK function eg "top10", "top3" etc.
    topk = TOPK_FUNCTION_RE.match(fn)
    if topk:
        return "topK({})({})".format(topk.group(1), args_expr)

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where
    # a number was expected.
    if fn == "uniq":
        return "ifNull({}({}), 0)".format(fn, args_expr)

    # emptyIfNull(col) is a simple pseudo function supported by Snuba that expands
    # to the actual clickhouse function ifNull(col, '') Until we figure out the best
    # way to disambiguate column names from string literals in complex functions.
    if fn == "emptyIfNull" and args_expr:
        return "ifNull({}, '')".format(args_expr)

    # default: just return fn(args_expr)
    return "{}({})".format(fn, args_expr)
Пример #4
0
    def __init__(self, base_name: Optional[str], name: str,
                 type: ColumnType) -> None:
        self.base_name = base_name
        self.name = name
        self.type = type

        self.flattened = ("{}.{}".format(self.base_name, self.name)
                          if self.base_name else self.name)
        self.escaped = escape_identifier(self.flattened)
Пример #5
0
 def visit_column(self, exp: Column) -> str:
     ret = []
     ret_unescaped = []
     if exp.table_name:
         ret.append(escape_identifier(exp.table_name) or "")
         ret_unescaped.append(exp.table_name or "")
         ret.append(".")
         ret_unescaped.append(".")
     ret.append(escape_identifier(exp.column_name) or "")
     ret_unescaped.append(exp.column_name)
     # De-clutter the output query by not applying an alias to a
     # column if the column name is the same as the alias to make
     # the query more readable.
     # This happens often since we apply column aliases during
     # parsing so the names are preserved during query processing.
     if exp.alias != "".join(ret_unescaped):
         return self.__alias("".join(ret), exp.alias)
     else:
         return "".join(ret)
Пример #6
0
    def __init__(self, base_name: Optional[str], name: str,
                 type: ColumnType[TModifiers]) -> None:
        self.base_name = base_name
        self.name = name
        self.type = type

        self.flattened = ("{}.{}".format(self.base_name, self.name)
                          if self.base_name else self.name)
        escaped = escape_identifier(self.flattened)
        assert escaped is not None
        self.escaped: str = escaped
Пример #7
0
 def column_expr(
     self,
     column_name: str,
     query: Query,
     parsing_context: ParsingContext,
     table_alias: str = "",
 ) -> Union[None, Any]:
     """
     Return an expression for the column name. Handle special column aliases
     that evaluate to something else.
     """
     return escape_identifier(qualified_column(column_name, table_alias))
Пример #8
0
    def test_escape_identifier(self):
        assert escape_identifier(None) is None
        assert escape_identifier("") == ""
        assert escape_identifier("foo") == "foo"
        assert escape_identifier("foo.bar") == "foo.bar"
        assert escape_identifier("foo:bar") == "`foo:bar`"

        # Even though backtick characters in columns should be
        # disallowed by the query schema, make sure we dont allow
        # injection anyway.
        assert escape_identifier("`") == r"`\``"
        assert escape_identifier("production`; --") == r"`production\`; --`"
Пример #9
0
 def visit_column(self, exp: Column) -> str:
     ret = []
     ret_unescaped = []
     if exp.table_name:
         ret.append(escape_identifier(exp.table_name) or "")
         ret_unescaped.append(exp.table_name or "")
         ret.append(".")
         ret_unescaped.append(".")
         # If there is a table name and the column name contains a ".",
         # then we need to escape the column name using alias regex rules
         # to clearly demarcate the table and columns
         ret.append(escape_alias(exp.column_name) or "")
     else:
         ret.append(escape_identifier(exp.column_name) or "")
     ret_unescaped.append(exp.column_name)
     # De-clutter the output query by not applying an alias to a
     # column if the column name is the same as the alias to make
     # the query more readable.
     # This happens often since we apply column aliases during
     # parsing so the names are preserved during query processing.
     if exp.alias != "".join(ret_unescaped):
         return self._alias("".join(ret), exp.alias)
     else:
         return "".join(ret)
Пример #10
0
 def for_schema(self) -> str:
     return "{} {}".format(escape_identifier(self.name),
                           self.type.for_schema())
Пример #11
0
def process_delete_tag(
    message: Mapping[str, Any],
    schema: TableSchema,
    tag_column_map: Mapping[str, Mapping[str, str]],
    promoted_tags: Mapping[str, Sequence[str]],
) -> Optional[Replacement]:
    tag = message["tag"]
    if not tag:
        return None

    assert isinstance(tag, str)
    timestamp = datetime.strptime(message["datetime"], settings.PAYLOAD_DATETIME_FORMAT)
    tag_column_name = tag_column_map["tags"].get(tag, tag)
    is_promoted = tag in promoted_tags["tags"]

    where = """\
        WHERE project_id = %(project_id)s
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    if is_promoted:
        prewhere = " PREWHERE %(tag_column)s IS NOT NULL "
    else:
        prewhere = " PREWHERE has(`tags.key`, %(tag_str)s) "

    insert_query_template = (
        """\
        INSERT INTO %(dist_write_table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(dist_read_table_name)s FINAL
    """
        + prewhere + where
    )

    all_columns = [
        col
        for col in schema.get_columns()
        if Materialized not in col.type.get_all_modifiers()
    ]
    select_columns = []
    for col in all_columns:
        if is_promoted and col.flattened == tag_column_name:
            select_columns.append("NULL")
        elif col.flattened == "tags.key":
            select_columns.append(
                "arrayFilter(x -> (indexOf(`tags.key`, x) != indexOf(`tags.key`, %s)), `tags.key`)"
                % escape_string(tag)
            )
        elif col.flattened == "tags.value":
            select_columns.append(
                "arrayMap(x -> arrayElement(`tags.value`, x), arrayFilter(x -> x != indexOf(`tags.key`, %s), arrayEnumerate(`tags.value`)))"
                % escape_string(tag)
            )
        elif col.flattened == "_tags_flattened":
            select_columns.append(FLATTENED_COLUMN_TEMPLATE % escape_string(tag))
        else:
            select_columns.append(col.escaped)

    all_column_names = [col.escaped for col in all_columns]
    query_args = {
        "all_columns": ", ".join(all_column_names),
        "select_columns": ", ".join(select_columns),
        "project_id": message["project_id"],
        "tag_str": escape_string(tag),
        "tag_column": escape_identifier(tag_column_name),
        "timestamp": timestamp.strftime(DATETIME_FORMAT),
    }

    count_query_template = (
        """\
        SELECT count()
        FROM %(dist_read_table_name)s FINAL
    """
        + prewhere + where
    )

    query_time_flags = (NEEDS_FINAL, message["project_id"])

    return Replacement(
        count_query_template, insert_query_template, query_args, query_time_flags
    )
Пример #12
0
def function_expr(fn: str, args_expr: str = "") -> str:
    """
    DEPRECATED. Please do not add anything else here. In order to manipulate the
    query, create a QueryProcessor and register it into your dataset.

    Generate an expression for a given function name and an already-evaluated
    args expression. This is a place to define convenience functions that evaluate
    to more complex expressions.

    """
    if fn.startswith("apdex("):
        match = APDEX_FUNCTION_RE.match(fn)
        if match:
            return "(countIf({col} <= {satisfied}) + (countIf(({col} > {satisfied}) AND ({col} <= {tolerated})) / 2)) / count()".format(
                col=escape_identifier(match.group(1)),
                satisfied=match.group(2),
                tolerated=int(match.group(2)) * 4,
            )
        raise ValueError("Invalid format for apdex()")
    elif fn.startswith("impact("):
        match = IMPACT_FUNCTION_RE.match(fn)
        if match:
            apdex = "(countIf({col} <= {satisfied}) + (countIf(({col} > {satisfied}) AND ({col} <= {tolerated})) / 2)) / count()".format(
                col=escape_identifier(match.group(1)),
                satisfied=match.group(2),
                tolerated=int(match.group(2)) * 4,
            )

            return "(1 - {apdex}) + ((1 - (1 / sqrt(uniq({user_col})))) * 3)".format(
                apdex=apdex,
                user_col=escape_identifier(match.group(3)),
            )
        raise ValueError("Invalid format for impact()")
    elif fn.startswith("failure_rate("):
        match = FAILURE_RATE_FUNCTION_RE.match(fn)
        if match:
            return "countIf(notIn(transaction_status, tuple({ok}, {cancelled}, {unknown}))) / count()".format(
                ok=SPAN_STATUS_NAME_TO_CODE["ok"],
                cancelled=SPAN_STATUS_NAME_TO_CODE["cancelled"],
                unknown=SPAN_STATUS_NAME_TO_CODE["unknown"],
            )
        raise ValueError("Invalid format for failure_rate()")
    # For functions with no args, (or static args) we allow them to already
    # include them as part of the function name, eg, "count()" or "sleep(1)"
    if not args_expr and fn.endswith(")"):
        return fn

    # Convenience topK function eg "top10", "top3" etc.
    topk = TOPK_FUNCTION_RE.match(fn)
    if topk:
        return "topK({})({})".format(topk.group(1), args_expr)

    # turn uniq() into ifNull(uniq(), 0) so it doesn't return null where
    # a number was expected.
    if fn == "uniq":
        return "ifNull({}({}), 0)".format(fn, args_expr)

    # emptyIfNull(col) is a simple pseudo function supported by Snuba that expands
    # to the actual clickhouse function ifNull(col, '') Until we figure out the best
    # way to disambiguate column names from string literals in complex functions.
    if fn == "emptyIfNull" and args_expr:
        return "ifNull({}, '')".format(args_expr)

    # Workaround for https://github.com/ClickHouse/ClickHouse/issues/11622
    # Some distributed queries fail when arrays are passed as array(1,2,3)
    # and work when they are passed as [1, 2, 3]
    if get_config("format_clickhouse_arrays", 1) and fn == "array":
        return f"[{args_expr}]"

    # default: just return fn(args_expr)
    return "{}({})".format(fn, args_expr)
Пример #13
0
def process_delete_tag(
    message: ReplacementMessage,
    all_columns: Sequence[FlattenedColumn],
    tag_column_map: Mapping[str, Mapping[str, str]],
    promoted_tags: Mapping[str, Sequence[str]],
    use_promoted_prewhere: bool,
    schema: WritableTableSchema,
) -> Optional[Replacement]:
    tag = message.data["tag"]
    if not tag:
        return None

    assert isinstance(tag, str)
    timestamp = datetime.strptime(
        message.data["datetime"], settings.PAYLOAD_DATETIME_FORMAT
    )
    tag_column_name = tag_column_map["tags"].get(tag, tag)
    is_promoted = tag in promoted_tags["tags"]

    where = """\
        WHERE project_id = %(project_id)s
        AND received <= CAST('%(timestamp)s' AS DateTime)
        AND NOT deleted
    """

    if is_promoted and use_promoted_prewhere:
        prewhere = " PREWHERE %(tag_column)s IS NOT NULL "
    else:
        prewhere = " PREWHERE has(`tags.key`, %(tag_str)s) "

    insert_query_template = (
        """\
        INSERT INTO %(table_name)s (%(all_columns)s)
        SELECT %(select_columns)s
        FROM %(table_name)s FINAL
    """
        + prewhere
        + where
    )

    select_columns = []
    for col in all_columns:
        if is_promoted and col.flattened == tag_column_name:
            # The promoted tag columns of events are non nullable, but those of
            # errors are non nullable. We check the column against the schema
            # to determine whether to write an empty string or NULL.
            column_type = schema.get_data_source().get_columns().get(tag_column_name)
            assert column_type is not None
            is_nullable = column_type.type.has_modifier(Nullable)
            if is_nullable:
                select_columns.append("NULL")
            else:
                select_columns.append("''")
        elif col.flattened == "tags.key":
            select_columns.append(
                "arrayFilter(x -> (indexOf(`tags.key`, x) != indexOf(`tags.key`, %s)), `tags.key`)"
                % escape_string(tag)
            )
        elif col.flattened == "tags.value":
            select_columns.append(
                "arrayMap(x -> arrayElement(`tags.value`, x), arrayFilter(x -> x != indexOf(`tags.key`, %s), arrayEnumerate(`tags.value`)))"
                % escape_string(tag)
            )
        else:
            select_columns.append(col.escaped)

    all_column_names = [col.escaped for col in all_columns]
    query_args = {
        "all_columns": ", ".join(all_column_names),
        "select_columns": ", ".join(select_columns),
        "project_id": message.data["project_id"],
        "tag_str": escape_string(tag),
        "tag_column": escape_identifier(tag_column_name),
        "timestamp": timestamp.strftime(DATETIME_FORMAT),
    }

    count_query_template = (
        """\
        SELECT count()
        FROM %(table_name)s FINAL
    """
        + prewhere
        + where
    )

    query_time_flags = (NEEDS_FINAL, message.data["project_id"])

    return LegacyReplacement(
        count_query_template,
        insert_query_template,
        query_args,
        query_time_flags,
        replacement_type=message.action_type,
        replacement_message_metadata=message.metadata,
    )
Пример #14
0
 def __escape_identifier_enforce(self, expr: str) -> str:
     ret = escape_identifier(expr)
     # This is for the type checker. escape_identifier can return
     # None if the input is None. Here the input is not None.
     assert ret is not None
     return ret