示例#1
0
def build_snuba_filter(dataset, query, aggregate, environment, params=None):
    resolve_func = (resolve_column(Dataset.Events)
                    if dataset == QueryDatasets.EVENTS else resolve_column(
                        Dataset.Transactions))
    query = apply_dataset_query_conditions(dataset, query)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(
        resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        resolve_func = resolve_column(Dataset(self.dataset.value))
        aggregations = [self.aggregate]
        # This aggregation is added to return the total number of sessions in crash
        # rate alerts that is used to identify if we are below a general minimum alert threshold
        count_col = re.search(r"(sessions|users)", self.aggregate)
        if not count_col:
            raise UnsupportedQuerySubscription(
                "Only crash free percentage queries are supported for subscriptions"
                "over the sessions dataset"
            )
        count_col_matched = count_col.group()

        aggregations += [f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"]
        functions_acl = ["identity"]
        snuba_filter = get_filter(query, params=params)
        snuba_filter.update_with(
            resolve_field_list(
                aggregations, snuba_filter, auto_fields=False, functions_acl=functions_acl
            )
        )
        snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
        if environment:
            snuba_filter.conditions.append(["environment", "=", environment.name])
        return snuba_filter
示例#3
0
def build_snuba_filter(dataset, query, aggregate, environment, params=None):
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(resolve_field_list([aggregate], snuba_filter, auto_fields=False))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_column(Dataset.Discover))[0]
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    snuba_filter.conditions = apply_dataset_conditions(dataset, snuba_filter.conditions)
    return snuba_filter
示例#4
0
def build_snuba_filter(dataset,
                       query,
                       aggregate,
                       environment,
                       event_types,
                       params=None):
    resolve_func = {
        QueryDatasets.EVENTS: resolve_column(Dataset.Events),
        QueryDatasets.SESSIONS: resolve_column(Dataset.Sessions),
        QueryDatasets.TRANSACTIONS: resolve_column(Dataset.Transactions),
    }[dataset]

    functions_acl = None

    aggregations = [aggregate]
    if dataset == QueryDatasets.SESSIONS:
        # This aggregation is added to return the total number of sessions in crash
        # rate alerts that is used to identify if we are below a general minimum alert threshold
        count_col = re.search(r"(sessions|users)", aggregate)
        count_col_matched = count_col.group()

        aggregations += [
            f"identity({count_col_matched}) AS {CRASH_RATE_ALERT_SESSION_COUNT_ALIAS}"
        ]
        functions_acl = ["identity"]

    query = apply_dataset_query_conditions(dataset, query, event_types)
    snuba_filter = get_filter(query, params=params)
    snuba_filter.update_with(
        resolve_field_list(aggregations,
                           snuba_filter,
                           auto_fields=False,
                           functions_acl=functions_acl))
    snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
    if snuba_filter.group_ids:
        snuba_filter.conditions.append(
            ["group_id", "IN",
             list(map(int, snuba_filter.group_ids))])
    if environment:
        snuba_filter.conditions.append(["environment", "=", environment.name])
    return snuba_filter
示例#5
0
    def __init__(self, dataset: Dataset, params: ParamsType):
        self.params = params
        self.dataset = dataset

        # Function is a subclass of CurriedFunction
        self.where: List[WhereType] = []
        self.aggregates: List[CurriedFunction] = []
        self.columns: List[SelectType] = []
        self.orderby: List[OrderBy] = []

        self.projects_to_filter: Set[int] = set()

        self.resolve_column_name = resolve_column(self.dataset)
示例#6
0
文件: base.py 项目: code-watch/sentry
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        orderby: Optional[List[str]] = None,
    ):
        # Function is a subclass of CurriedFunction
        self.aggregates: List[CurriedFunction] = []
        self.columns: List[SelectType] = []
        self.where: List[WhereType] = []

        self.params = params
        self.dataset = dataset
        self.orderby_columns: List[str] = orderby if orderby else []

        self.resolve_column_name = resolve_column(self.dataset)
示例#7
0
    def __init__(self,
                 dataset: Dataset,
                 params: ParamsType,
                 functions_acl: Optional[List[str]] = None):
        self.dataset = dataset
        self.params = params
        self.functions_acl = set() if functions_acl is None else functions_acl

        # Function is a subclass of CurriedFunction
        self.where: List[WhereType] = []
        self.aggregates: List[CurriedFunction] = []
        self.columns: List[SelectType] = []
        self.orderby: List[OrderBy] = []

        self.projects_to_filter: Set[int] = set()

        self.resolve_column_name = resolve_column(self.dataset)
    def build_snuba_filter(
        self,
        query: str,
        environment: Optional[Environment],
        params: Optional[Mapping[str, Any]] = None,
    ) -> Filter:
        resolve_func = resolve_column(Dataset(self.dataset.value))

        query = apply_dataset_query_conditions(QueryDatasets(self.dataset), query, self.event_types)
        snuba_filter = get_filter(query, params=params)
        snuba_filter.update_with(
            resolve_field_list([self.aggregate], snuba_filter, auto_fields=False)
        )
        snuba_filter = resolve_snuba_aliases(snuba_filter, resolve_func)[0]
        if snuba_filter.group_ids:
            snuba_filter.conditions.append(
                ["group_id", "IN", list(map(int, snuba_filter.group_ids))]
            )
        if environment:
            snuba_filter.conditions.append(["environment", "=", environment.name])
        return snuba_filter
示例#9
0
    def __init__(
        self,
        dataset: Dataset,
        params: ParamsType,
        auto_fields: bool = False,
        functions_acl: Optional[List[str]] = None,
        equation_config: Optional[Dict[str, bool]] = None,
    ):
        self.dataset = dataset
        self.params = params
        self.auto_fields = auto_fields
        self.functions_acl = set() if functions_acl is None else functions_acl
        self.equation_config = equation_config if equation_config is not None else {}

        # Function is a subclass of CurriedFunction
        self.where: List[WhereType] = []
        # The list of aggregates to be selected
        self.aggregates: List[CurriedFunction] = []
        self.columns: List[SelectType] = []
        self.orderby: List[OrderBy] = []

        self.projects_to_filter: Set[int] = set()

        self.resolve_column_name = resolve_column(self.dataset)
示例#10
0
    "get_facets",
    "transform_data",
    "zerofill",
    "histogram_query",
    "check_multihistogram_fields",
)

logger = logging.getLogger(__name__)

PaginationResult = namedtuple("PaginationResult",
                              ["next", "previous", "oldest", "latest"])
FacetResult = namedtuple("FacetResult", ["key", "value", "count"])
PerformanceFacetResult = namedtuple("PerformanceFacetResult",
                                    ["key", "value", "performance", "count"])

resolve_discover_column = resolve_column(Dataset.Discover)


def is_real_column(col):
    """
    Return true if col corresponds to an actual column to be fetched
    (not an aggregate function or field alias)
    """
    if is_function(col):
        return False

    if col in FIELD_ALIASES:
        return False

    return True
示例#11
0
    def get_tag_value_paginator_for_projects(
        self,
        projects,
        environments,
        key,
        start=None,
        end=None,
        query=None,
        order_by="-last_seen",
        include_transactions=False,
    ):
        from sentry.api.paginator import SequencePaginator

        if not order_by == "-last_seen":
            raise ValueError("Unsupported order_by: %s" % order_by)

        dataset = Dataset.Events
        snuba_key = snuba.get_snuba_column_name(key)
        if include_transactions and snuba_key.startswith("tags["):
            snuba_key = snuba.get_snuba_column_name(key, dataset=Dataset.Discover)
            if not snuba_key.startswith("tags["):
                dataset = Dataset.Discover

        # We cannot search the values of these columns like we do other columns because they are
        # a different type, and as such, LIKE and != do not work on them. Furthermore, because the
        # use case for these values in autosuggestion is minimal, so we choose to disable them here.
        #
        # event_id:     This is a FixedString which disallows us to use LIKE on it when searching,
        #               but does work with !=. However, for consistency sake we disallow it
        #               entirely, furthermore, suggesting an event_id is not a very useful feature
        #               as they are not human readable.
        # timestamp:    This is a DateTime which disallows us to use both LIKE and != on it when
        #               searching. Suggesting a timestamp can potentially be useful but as it does
        #               work at all, we opt to disable it here. A potential solution can be to
        #               generate a time range to bound where they are searching. e.g. if a user
        #               enters 2020-07 we can generate the following conditions:
        #               >= 2020-07-01T00:00:00 AND <= 2020-07-31T23:59:59
        # time:         This is a column computed from timestamp so it suffers the same issues
        if snuba_key in {"event_id", "timestamp", "time"}:
            return SequencePaginator([])

        # These columns have fixed values and we don't need to emit queries to find out the
        # potential options.
        if key in {"error.handled", "error.unhandled"}:
            return SequencePaginator(
                [
                    (
                        1,
                        TagValue(
                            key=key, value="true", times_seen=None, first_seen=None, last_seen=None
                        ),
                    ),
                    (
                        2,
                        TagValue(
                            key=key, value="false", times_seen=None, first_seen=None, last_seen=None
                        ),
                    ),
                ]
            )

        conditions = []
        # transaction status needs a special case so that the user interacts with the names and not codes
        transaction_status = snuba_key == "transaction_status"
        if include_transactions and transaction_status:
            # Here we want to use the status codes during filtering,
            # but want to do this with names that include our query
            status_codes = [
                span_key
                for span_key, value in six.iteritems(SPAN_STATUS_CODE_TO_NAME)
                if (query and query in value) or (not query)
            ]
            if status_codes:
                conditions.append([snuba_key, "IN", status_codes])
            else:
                return SequencePaginator([])
        elif key in FUZZY_NUMERIC_KEYS:
            converted_query = int(query) if query is not None and query.isdigit() else None
            if converted_query is not None:
                conditions.append([snuba_key, ">=", converted_query - FUZZY_NUMERIC_DISTANCE])
                conditions.append([snuba_key, "<=", converted_query + FUZZY_NUMERIC_DISTANCE])
        elif include_transactions and key == PROJECT_ALIAS:
            project_filters = {
                "id__in": projects,
            }
            if query:
                project_filters["slug__icontains"] = query
            project_queryset = Project.objects.filter(**project_filters).values("id", "slug")

            if not project_queryset.exists():
                return SequencePaginator([])

            project_slugs = {project["id"]: project["slug"] for project in project_queryset}
            projects = [project["id"] for project in project_queryset]
            snuba_key = "project_id"
            dataset = Dataset.Discover
        else:
            snuba_name = snuba_key

            is_user_alias = include_transactions and key == USER_DISPLAY_ALIAS
            if is_user_alias:
                # user.alias is a pseudo column in discover. It is computed by coalescing
                # together multiple user attributes. Here we get the coalese function used,
                # and resolve it to the corresponding snuba query
                dataset = Dataset.Discover
                resolver = snuba.resolve_column(dataset)
                snuba_name = FIELD_ALIASES[USER_DISPLAY_ALIAS].get_field()
                snuba.resolve_complex_column(snuba_name, resolver)
            elif snuba_name in BLACKLISTED_COLUMNS:
                snuba_name = "tags[%s]" % (key,)

            if query:
                conditions.append([snuba_name, "LIKE", u"%{}%".format(query)])
            else:
                conditions.append([snuba_name, "!=", ""])

        filters = {"project_id": projects}
        if environments:
            filters["environment"] = environments

        results = snuba.query(
            dataset=dataset,
            start=start,
            end=end,
            groupby=[snuba_key],
            filter_keys=filters,
            aggregations=[
                ["count()", "", "times_seen"],
                ["min", "timestamp", "first_seen"],
                ["max", "timestamp", "last_seen"],
            ],
            conditions=conditions,
            orderby=order_by,
            # TODO: This means they can't actually paginate all TagValues.
            limit=1000,
            arrayjoin=snuba.get_arrayjoin(snuba_key),
            referrer="tagstore.get_tag_value_paginator_for_projects",
        )

        if include_transactions:
            # With transaction_status we need to map the ids back to their names
            if transaction_status:
                results = OrderedDict(
                    [
                        (SPAN_STATUS_CODE_TO_NAME[result_key], data)
                        for result_key, data in six.iteritems(results)
                    ]
                )
            # With project names we map the ids back to the project slugs
            elif key == PROJECT_ALIAS:
                results = OrderedDict(
                    [
                        (project_slugs[value], data)
                        for value, data in six.iteritems(results)
                        if value in project_slugs
                    ]
                )

        tag_values = [
            TagValue(key=key, value=six.text_type(value), **fix_tag_value_data(data))
            for value, data in six.iteritems(results)
        ]

        desc = order_by.startswith("-")
        score_field = order_by.lstrip("-")
        return SequencePaginator(
            [(int(to_timestamp(getattr(tv, score_field)) * 1000), tv) for tv in tag_values],
            reverse=desc,
        )