Python qualified_column примеры, snuba.util.qualified_column Python примеры использования

Пример #1

0

Показать файл

    def __tag_expr(
        self,
        parsed_col: ParsedNestedColumn,
        table_alias: str = "",
    ) -> str:
        """
        Return an expression for the value of a single named tag.

        For tags/contexts, we expand the expression depending on whether the tag is
        "promoted" to a top level column, or whether we have to look in the tags map.
        """
        # For promoted tags, return the column name.
        assert parsed_col.tag_name
        tag_name = parsed_col.tag_name
        col = parsed_col.col_name
        if col in self.__promoted_columns:
            actual_tag = self.__get_tag_column_map()[col].get(
                tag_name, tag_name)
            if actual_tag in self.__promoted_columns[col]:
                return qualified_column(self.__string_col(actual_tag),
                                        table_alias)

        # For the rest, return an expression that looks it up in the nested tags.
        return "{col}.value[indexOf({col}.key, {tag})]".format(
            **{
                "col": qualified_column(col, table_alias),
                "tag": escape_literal(tag_name),
            })

Пример #2

0

Показать файл

Файл: tags_column_processor.py Проект: denisgolius/snuba

    def __tag_expr(
        self,
        column_name: str,
        table_alias: str = "",
    ) -> str:
        """
        Return an expression for the value of a single named tag.

        For tags/contexts, we expand the expression depending on whether the tag is
        "promoted" to a top level column, or whether we have to look in the tags map.
        """
        col, tag = NESTED_COL_EXPR_RE.match(column_name).group(1, 2)
        # For promoted tags, return the column name.
        if col in self.__promoted_columns:
            actual_tag = self.__get_tag_column_map()[col].get(tag, tag)
            if actual_tag in self.__promoted_columns[col]:
                return qualified_column(self.__string_col(actual_tag),
                                        table_alias)

        # For the rest, return an expression that looks it up in the nested tags.
        return u'{col}.value[indexOf({col}.key, {tag})]'.format(
            **{
                'col': qualified_column(col, table_alias),
                'tag': escape_literal(tag)
            })

Пример #3

0

Показать файл

Файл: tags_column_processor.py Проект: Appva/snuba

    def __tags_expr(self,
        column_name: str,
        query: Query,
        parsing_context: ParsingContext,
        table_alias: str="",
    ) -> str:
        """
        Return an expression that array-joins on tags to produce an output with one
        row per tag.
        """
        assert column_name in ['tags_key', 'tags_value']
        col, k_or_v = column_name.split('_', 1)
        nested_tags_only = state.get_config('nested_tags_only', 1)

        qualified_col = qualified_column(col, table_alias)
        # Generate parallel lists of keys and values to arrayJoin on
        if nested_tags_only:
            key_list = '{}.key'.format(qualified_col)
            val_list = '{}.value'.format(qualified_col)
        else:
            promoted = self.__promoted_columns[col]
            col_map = self.__column_tag_map[col]
            key_list = u'arrayConcat([{}], {}.key)'.format(
                u', '.join(u'\'{}\''.format(col_map.get(p, p)) for p in promoted),
                qualified_col
            )
            val_list = u'arrayConcat([{}], {}.value)'.format(
                ', '.join(self.__string_col(p) for p in promoted),
                qualified_col
            )

        qualified_key = qualified_column("tags_key", table_alias)
        qualified_value = qualified_column("tags_value", table_alias)
        cols_used = query.get_all_referenced_columns() & set([qualified_key, qualified_value])
        if len(cols_used) == 2:
            # If we use both tags_key and tags_value in this query, arrayjoin
            # on (key, value) tag tuples.
            expr = (u'arrayJoin(arrayMap((x,y) -> [x,y], {}, {}))').format(
                key_list,
                val_list
            )

            # put the all_tags expression in the alias cache so we can use the alias
            # to refer to it next time (eg. 'all_tags[1] AS tags_key'). instead of
            # expanding the whole tags expression again.
            expr = alias_expr(expr, 'all_tags', parsing_context)
            return u'({})[{}]'.format(expr, 1 if k_or_v == 'key' else 2)
        else:
            # If we are only ever going to use one of tags_key or tags_value, don't
            # bother creating the k/v tuples to arrayJoin on, or the all_tags alias
            # to re-use as we won't need it.
            return 'arrayJoin({})'.format(key_list if k_or_v == 'key' else val_list)

Пример #4

0

Показать файл

Файл: events.py Проект: Appva/snuba

 def column_expr(self, column_name, query: Query, parsing_context: ParsingContext, table_alias: str=""):
     processed_column = self.__tags_processor.process_column_expression(column_name, query, parsing_context, table_alias)
     if processed_column:
         # If processed_column is None, this was not a tag/context expression
         return processed_column
     elif column_name == 'issue' or column_name == 'group_id':
         return f"nullIf({qualified_column('group_id', table_alias)}, 0)"
     elif column_name == 'message':
         # Because of the rename from message->search_message without backfill,
         # records will have one or the other of these fields.
         # TODO this can be removed once all data has search_message filled in.
         search_message = qualified_column('search_message', table_alias)
         message = qualified_column('message', table_alias)
         return f"coalesce({search_message}, {message})"
     else:
         return super().column_expr(column_name, query, parsing_context, table_alias)

Пример #5

0

Показать файл

    def column_expr(
        self,
        column_name: str,
        query: Query,
        parsing_context: ParsingContext,
        table_alias: str = "",
    ) -> Union[None, Any]:
        processed_column = self.__tags_processor.process_column_expression(
            column_name, query, parsing_context, table_alias)
        if processed_column:
            # If processed_column is None, this was not a tag/context expression

            # This conversion must not be ported to the errors dataset. We should
            # not support promoting tags/contexts with boolean values. There is
            # no way to convert them back consistently to the value provided by
            # the client when the event is ingested, in all ways to access
            # tags/contexts. Once the errors dataset is in use, we will not have
            # boolean promoted tags/contexts so this constraint will be easy to enforce.
            boolean_contexts = {
                "contexts[device.simulator]",
                "contexts[device.online]",
                "contexts[device.charging]",
            }
            boolean_context_template = (
                "multiIf(equals(%(processed_column)s, ''), '', "
                "in(%(processed_column)s, tuple('1', 'True')), 'True', 'False')"
            )
            if column_name in boolean_contexts:
                return boolean_context_template % ({
                    "processed_column":
                    processed_column
                })
            return processed_column
        elif column_name == "group_id":
            return f"nullIf({qualified_column('group_id', table_alias)}, 0)"
        elif column_name == "message":
            # Because of the rename from message->search_message without backfill,
            # records will have one or the other of these fields.
            # TODO this can be removed once all data has search_message filled in.
            search_message = qualified_column("search_message", table_alias)
            message = qualified_column("message", table_alias)
            return f"coalesce({search_message}, {message})"
        else:
            return super().column_expr(column_name, query, parsing_context,
                                       table_alias)

Пример #6

0

Показать файл

Файл: dataset.py Проект: Appva/snuba

 def __time_expr(self, column_name: str, granularity: int, table_alias: str="") -> str:
     real_column = self.__time_group_columns[column_name]
     real_column = qualified_column(real_column, table_alias)
     template = {
         3600: 'toStartOfHour({column})',
         60: 'toStartOfMinute({column})',
         86400: 'toDate({column})',
     }.get(granularity, 'toDateTime(intDiv(toUInt32({column}), {granularity}) * {granularity})')
     return template.format(column=real_column, granularity=granularity)

Пример #7

0

Показать файл

 def column_expr(self,
                 column_name,
                 query: Query,
                 parsing_context: ParsingContext,
                 table_alias: str = ""):
     """
     Return an expression for the column name. Handle special column aliases
     that evaluate to something else.
     """
     return escape_col(qualified_column(column_name, table_alias))

Пример #8

0

Показать файл

 def time_expr(self, column_name: str, granularity: int,
               table_alias: str) -> str:
     real_column = qualified_column(column_name, table_alias)
     template = {
         3600: "toStartOfHour({column})",
         60: "toStartOfMinute({column})",
         86400: "toDate({column})",
     }.get(
         granularity,
         "toDateTime(intDiv(toUInt32({column}), {granularity}) * {granularity})",
     )
     return template.format(column=real_column, granularity=granularity)

Пример #9

0

Показать файл

Файл: discover.py Проект: cafebazaar/snuba

 def attempt_map(
     self, expression: Column, children_translator: SnubaClickhouseStrictTranslator,
 ) -> Optional[Literal]:
     if expression.column_name in self.columns:
         return Literal(
             alias=expression.alias
             or qualified_column(
                 expression.column_name, expression.table_name or ""
             ),
             value=None,
         )
     else:
         return None

Пример #10

0

Показать файл

Файл: discover.py Проект: getsentry/snuba

 def attempt_map(
     self,
     expression: Column,
     children_translator: SnubaClickhouseStrictTranslator,
 ) -> Optional[FunctionCall]:
     if expression.column_name in self.columns:
         return identity(
             Literal(None, None),
             expression.alias or qualified_column(
                 expression.column_name, expression.table_name or ""),
         )
     else:
         return None

Пример #11

0

Показать файл

Файл: groups.py Проект: jiankunking/snuba

    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = (self.__grouped_message.get_dataset_schemas().
                                 get_read_schema().get_data_source())

        self.__events = get_dataset("events")
        events_source = (self.__events.get_dataset_schemas().get_read_schema().
                         get_data_source())

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column("record_deleted",
                                      self.GROUPS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column("deleted", self.EVENTS_ALIAS), "=", 0)
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=None,
        )
        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={"events.time": "events.timestamp"},
            time_parse_columns=[
                "events.timestamp",
                "events.received",
                "groups.last_seen",
                "groups.first_seen",
                "groups.active_at",
            ],
        )

Пример #12

0

Показать файл

    def __init__(self) -> None:
        self.__grouped_message = get_entity(EntityKey.GROUPEDMESSAGES)
        groupedmessage_source = (get_storage(
            StorageKey.GROUPEDMESSAGES).get_schema().get_data_source())

        self.__events = get_entity(EntityKey.EVENTS)
        events_source = get_storage(
            StorageKey.EVENTS).get_schema().get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.GROUPS_ALIAS, "record_deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.GROUPS_ALIAS)
                    for col in groupedmessage_source.get_prewhere_candidates()
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    binary_condition(
                        None,
                        ConditionFunctions.EQ,
                        Column(None, self.EVENTS_ALIAS, "deleted"),
                        Literal(None, 0),
                    ),
                ],
                prewhere_candidates=[
                    qualified_column(col, self.EVENTS_ALIAS)
                    for col in events_source.get_prewhere_candidates()
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        storage = JoinedStorage(StorageSetKey.EVENTS, join_structure)
        super().__init__(
            storages=[storage],
            query_plan_builder=SingleStorageQueryPlanBuilder(storage=storage),
            abstract_column_set=schema.get_columns(),
            writable_storage=None,
        )

Пример #13

0

Показать файл

    def __tags_expr(
        self,
        parsed_col: ParsedNestedColumn,
        query: Query,
        parsing_context: ParsingContext,
        table_alias: str = "",
    ) -> str:
        """
        Return an expression that array-joins on tags to produce an output with one
        row per tag.

        It can also apply an arrayFilter in the arrayJoin if an equivalent condition
        is found in the query in order to reduce the size of the arrayJoin.
        """
        col, k_or_v = parsed_col.col_name.split("_", 1)
        nested_tags_only = state.get_config("nested_tags_only", 1)

        qualified_col = qualified_column(col, table_alias)
        # Generate parallel lists of keys and values to arrayJoin on
        if nested_tags_only:
            key_list = "{}.key".format(qualified_col)
            val_list = "{}.value".format(qualified_col)
        else:
            promoted = self.__promoted_columns[col]
            col_map = self.__column_tag_map[col]
            key_list = "arrayConcat([{}], {}.key)".format(
                ", ".join("'{}'".format(col_map.get(p, p)) for p in promoted),
                qualified_col,
            )
            val_list = "arrayConcat([{}], {}.value)".format(
                ", ".join(self.__string_col(p) for p in promoted),
                qualified_col)

        qualified_key = qualified_column("tags_key", table_alias)
        qualified_value = qualified_column("tags_value", table_alias)
        cols_used = query.get_all_referenced_columns() & set(
            [qualified_key, qualified_value])

        filter_tags = ",".join(
            [f"'{tag}'" for tag in self.__get_filter_tags(query)])
        if len(cols_used) == 2:
            # If we use both tags_key and tags_value in this query, arrayjoin
            # on (key, value) tag tuples.
            mapping = f"arrayMap((x,y) -> [x,y], {key_list}, {val_list})"
            if filter_tags:
                filtering = (
                    f"arrayFilter(pair -> pair[1] IN ({filter_tags}), {mapping})"
                )
            else:
                filtering = mapping

            expr = f"arrayJoin({filtering})"

            # put the all_tags expression in the alias cache so we can use the alias
            # to refer to it next time (eg. 'all_tags[1] AS tags_key'). instead of
            # expanding the whole tags expression again.
            expr = alias_expr(expr, "all_tags", parsing_context)
            return "({})[{}]".format(expr, 1 if k_or_v == "key" else 2)
        else:
            # If we are only ever going to use one of tags_key or tags_value, don't
            # bother creating the k/v tuples to arrayJoin on, or the all_tags alias
            # to re-use as we won't need it.
            if filter_tags:
                return (
                    f"arrayJoin(arrayFilter(tag -> tag IN ({filter_tags}), {key_list}))"
                )
            else:
                return f"arrayJoin({key_list if k_or_v == 'key' else val_list})"

Пример #14

0

Показать файл

    def __init__(self) -> None:
        self.__grouped_message = get_dataset("groupedmessage")
        groupedmessage_source = self.__grouped_message \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        self.__events = get_dataset("events")
        events_source = self.__events \
            .get_dataset_schemas() \
            .get_read_schema() \
            .get_data_source()

        join_structure = JoinClause(
            left_node=TableJoinNode(
                table_name=groupedmessage_source.format_from(),
                columns=groupedmessage_source.get_columns(),
                mandatory_conditions=[
                    # TODO: This will be replaced as soon as expressions won't be strings
                    # thus we will be able to easily add an alias to a column in an
                    # expression.
                    (qualified_column('record_deleted',
                                      self.GROUPS_ALIAS), '=', 0)
                ],
                alias=self.GROUPS_ALIAS,
            ),
            right_node=TableJoinNode(
                table_name=events_source.format_from(),
                columns=events_source.get_columns(),
                mandatory_conditions=[
                    (qualified_column('deleted', self.EVENTS_ALIAS), '=', 0)
                ],
                alias=self.EVENTS_ALIAS,
            ),
            mapping=[
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="project_id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="project_id"),
                ),
                JoinCondition(
                    left=JoinConditionExpression(table_alias=self.GROUPS_ALIAS,
                                                 column="id"),
                    right=JoinConditionExpression(
                        table_alias=self.EVENTS_ALIAS, column="group_id"),
                ),
            ],
            join_type=JoinType.LEFT,
        )

        schema = JoinedSchema(join_structure)
        dataset_schemas = DatasetSchemas(
            read_schema=schema,
            write_schema=None,
        )
        super().__init__(
            dataset_schemas=dataset_schemas,
            time_group_columns={
                'events.time': 'events.timestamp',
            },
            time_parse_columns=['events.timestamp'],
        )

Пример #15

0

Показать файл

 def default_conditions(self, table_alias: str = "") -> Sequence[Condition]:
     return [
         (qualified_column('deleted', table_alias), '=', 0),
     ]

Python qualified_column примеры использования