Пример #1
0
    def _get_actor_subquery(self) -> Tuple[str, Dict[str, Any]]:
        if self.is_aggregating_by_groups:
            actor_join_subquery, actor_join_subquery_params = GroupsJoinQuery(
                self._filter, self._team.pk,
                join_key="funnel_actors.actor_id").get_join_query()
        else:
            person_query, actor_join_subquery_params = ClickhousePersonQuery(
                self._filter,
                self._team.pk,
                entity=Entity({
                    "id":
                    "person",
                    "type":
                    "events",
                    "properties":
                    self._filter.correlation_property_values
                }),
            ).get_query()

            actor_join_subquery = f"""
                JOIN ({person_query}) person
                ON person.id = funnel_actors.actor_id
            """

        return actor_join_subquery, actor_join_subquery_params
Пример #2
0
def test_groups_join_query_filtering(snapshot):
    filter = Filter(
        data={
            "properties": [{
                "key": "industry",
                "value": "finance",
                "type": "group",
                "group_type_index": 0
            }]
        })

    assert GroupsJoinQuery(filter, 2).get_join_query() == snapshot
Пример #3
0
    def _get_aggregation_join_query(self):
        if self._filter.aggregation_group_type_index is None:
            person_query, person_query_params = ClickhousePersonQuery(
                self._filter, self._team.pk, ColumnOptimizer(self._filter, self._team.pk)
            ).get_query()

            return (
                f"""
                JOIN ({person_query}) person
                    ON person.id = funnel_actors.actor_id
            """,
                person_query_params,
            )
        else:
            return GroupsJoinQuery(self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query()
Пример #4
0
def test_groups_join_query_filtering_with_custom_key_names(snapshot):
    filter = Filter(
        data={
            "properties": [
                {
                    "key": "industry",
                    "value": "finance",
                    "type": "group",
                    "group_type_index": 0
                },
                {
                    "key": "company",
                    "value": "crashed",
                    "type": "group",
                    "group_type_index": 2
                },
            ]
        })

    assert GroupsJoinQuery(
        filter, 2, join_key="call_me_industry").get_join_query() == snapshot
Пример #5
0
    def _get_aggregation_join_query(self):
        if self._team.actor_on_events_querying_enabled:
            return "", {}

        if self._filter.aggregation_group_type_index is None:
            person_query, person_query_params = PersonQuery(
                self._filter, self._team.pk,
                EnterpriseColumnOptimizer(self._filter,
                                          self._team.pk)).get_query()

            return (
                f"""
                JOIN ({person_query}) person
                    ON person.id = funnel_actors.actor_id
            """,
                person_query_params,
            )
        else:
            return GroupsJoinQuery(
                self._filter, self._team.pk,
                join_key="funnel_actors.actor_id").get_join_query()
Пример #6
0
    def get_query(self) -> Tuple[str, Dict, Callable]:
        interval_annotation = get_trunc_func_ch(self.filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            self.filter.interval, self.filter.date_from, self.filter.date_to,
            self.team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=self.filter,
                                                          team_id=self.team_id)

        props_to_filter = self.filter.property_groups.combine_property_group(
            PropertyOperatorType.AND, self.entity.property_groups)

        outer_properties = self.column_optimizer.property_optimizer.parse_property_groups(
            props_to_filter).outer
        prop_filters, prop_filter_params = parse_prop_grouped_clauses(
            team_id=self.team_id,
            property_group=outer_properties,
            table_name="e",
            person_properties_mode=PersonPropertiesMode.
            USING_PERSON_PROPERTIES_COLUMN,
        )
        aggregate_operation, _, math_params = process_math(self.entity)

        action_query = ""
        action_params: Dict = {}
        if self.entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = self.entity.get_action()
            action_query, action_params = format_action_filter(
                team_id=self.team_id, action=action, table_name="e")

        self.params = {
            **self.params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": self.entity.id,
            "key": self.filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "actions_query":
            "AND {}".format(action_query) if action_query else "",
            "event_filter":
            "AND event = %(event)s" if not action_query else "",
            "filters":
            prop_filters if props_to_filter.values else "",
        }

        _params, _breakdown_filter_params = {}, {}

        if self.filter.breakdown_type == "cohort":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
            )
        else:
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params(
                "count(*)"
                if self.entity.math == "dau" else aggregate_operation,
                math_params,
            )

        if len(_params["values"]) == 0:
            # If there are no breakdown values, we are sure that there's no relevant events, so instead of adjusting
            # a "real" SELECT for this, we only include the below dummy SELECT.
            # It's a drop-in replacement for a "real" one, simply always returning 0 rows.
            # See https://github.com/PostHog/posthog/pull/5674 for context.
            return (
                "SELECT [now()] AS date, [0] AS data, '' AS breakdown_value LIMIT 0",
                {},
                lambda _: [],
            )

        person_join_condition, person_join_params = self._person_join_condition(
        )
        groups_join_condition, groups_join_params = GroupsJoinQuery(
            self.filter, self.team_id, self.column_optimizer).get_join_query()
        self.params = {
            **self.params,
            **_params,
            **person_join_params,
            **groups_join_params
        }
        breakdown_filter_params = {
            **breakdown_filter_params,
            **_breakdown_filter_params
        }

        if self.filter.display in TRENDS_DISPLAY_BY_VALUE:
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format(
                breakdown_filter=breakdown_filter,
                person_join=person_join_condition,
                groups_join=groups_join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )
            time_range = enumerate_time_range(self.filter, seconds_in_interval)

            return (
                content_sql,
                self.params,
                self._parse_single_aggregate_result(self.filter, self.entity,
                                                    {"days": time_range}),
            )

        else:

            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)

            if self.entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                active_user_params = get_active_user_params(
                    self.filter, self.entity, self.team_id)
                conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format(
                    **breakdown_filter_params, **active_user_params)
                inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    conditions=conditions,
                    GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                        self.team_id),
                    **active_user_params,
                    **breakdown_filter_params,
                )
            elif self.filter.display == TRENDS_CUMULATIVE and self.entity.math == "dau":
                inner_sql = BREAKDOWN_CUMULATIVE_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    **breakdown_filter_params,
                )
            else:
                inner_sql = BREAKDOWN_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                )

            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                interval=interval_annotation,
                num_intervals=num_intervals,
                inner_sql=inner_sql,
            )
            self.params.update({
                "seconds_in_interval": seconds_in_interval,
                "num_intervals": num_intervals,
            })

            return breakdown_query, self.params, self._parse_trend_result(
                self.filter, self.entity)
Пример #7
0
def test_groups_join_query_blank():
    filter = Filter(data={"properties": []})

    assert GroupsJoinQuery(filter, 2).get_join_query() == ("", {})
Пример #8
0
def get_breakdown_prop_values(
    filter: Filter,
    entity: Entity,
    aggregate_operation: str,
    team_id: int,
    limit: int = BREAKDOWN_VALUES_LIMIT,
    extra_params={},
    column_optimizer: Optional[EnterpriseColumnOptimizer] = None,
):
    """
    Returns the top N breakdown prop values for event/person breakdown

    e.g. for Browser with limit 3 might return ['Chrome', 'Safari', 'Firefox', 'Other']
    """
    column_optimizer = column_optimizer or EnterpriseColumnOptimizer(
        filter, team_id)
    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id)

    props_to_filter = filter.property_groups.combine_property_group(
        PropertyOperatorType.AND, entity.property_groups)
    outer_properties = column_optimizer.property_optimizer.parse_property_groups(
        props_to_filter).outer

    prop_filters, prop_filter_params = parse_prop_grouped_clauses(
        team_id=team_id,
        property_group=outer_properties,
        table_name="e",
        prepend="e_brkdwn",
        person_properties_mode=PersonPropertiesMode.
        USING_PERSON_PROPERTIES_COLUMN,
        allow_denormalized_props=True,
    )

    entity_params, entity_format_params = get_entity_filtering_params(
        entity=entity, team_id=team_id, table_name="e")

    value_expression = _to_value_expression(filter.breakdown_type,
                                            filter.breakdown,
                                            filter.breakdown_group_type_index)

    person_join_clauses = ""
    person_join_params: Dict = {}
    person_query = PersonQuery(filter,
                               team_id,
                               column_optimizer=column_optimizer,
                               entity=entity)
    if person_query.is_used:
        person_subquery, person_join_params = person_query.get_query()
        person_join_clauses = f"""
            INNER JOIN ({get_team_distinct_ids_query(team_id)}) AS pdi ON e.distinct_id = pdi.distinct_id
            INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
        """

    groups_join_condition, groups_join_params = GroupsJoinQuery(
        filter, team_id, column_optimizer).get_join_query()

    elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
        value_expression=value_expression,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        prop_filters=prop_filters,
        aggregate_operation=aggregate_operation,
        person_join_clauses=person_join_clauses,
        groups_join_clauses=groups_join_condition,
        **entity_format_params,
    )

    return sync_execute(
        elements_query,
        {
            "key": filter.breakdown,
            "limit": limit,
            "team_id": team_id,
            "offset": filter.offset,
            **prop_filter_params,
            **entity_params,
            **person_join_params,
            **groups_join_params,
            **extra_params,
            **date_params,
        },
    )[0][0]
Пример #9
0
 def _get_groups_query(self) -> Tuple[str, Dict]:
     return GroupsJoinQuery(self._filter, self._team_id,
                            self._column_optimizer).get_join_query()
Пример #10
0
 def _get_groups_query(self) -> Tuple[str, Dict]:
     return GroupsJoinQuery(
         self._filter, self._team_id, self._column_optimizer, using_person_on_events=self._using_person_on_events
     ).get_join_query()
Пример #11
0
def get_breakdown_prop_values(
    filter: Filter,
    entity: Entity,
    aggregate_operation: str,
    team_id: int,
    limit: int = 25,
    extra_params={},
    column_optimizer: Optional[ColumnOptimizer] = None,
):
    "Returns the top N breakdown prop values for event/person breakdown"

    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id)
    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties + entity.properties,
        team_id,
        table_name="e",
        prepend="e_brkdwn",
        person_properties_mode=PersonPropertiesMode.EXCLUDE,
        allow_denormalized_props=True,
    )

    entity_params, entity_format_params = get_entity_filtering_params(
        entity, team_id, table_name="e")

    if filter.breakdown_type == "person":
        value_expression, _ = get_property_string_expr(
            "person", cast(str, filter.breakdown), "%(key)s", "person_props")
    elif filter.breakdown_type == "group":
        value_expression, _ = get_property_string_expr(
            "groups", cast(str, filter.breakdown), "%(key)s",
            f"group_properties_{filter.breakdown_group_type_index}")
    else:
        value_expression, _ = get_property_string_expr(
            "events", cast(str, filter.breakdown), "%(key)s", "properties")

    person_join_clauses = ""
    person_join_params: Dict = {}
    person_query = ClickhousePersonQuery(filter,
                                         team_id,
                                         column_optimizer=column_optimizer,
                                         entity=entity)
    if person_query.is_used:
        person_subquery, person_join_params = person_query.get_query()
        person_join_clauses = f"""
            INNER JOIN ({GET_TEAM_PERSON_DISTINCT_IDS}) AS pdi ON e.distinct_id = pdi.distinct_id
            INNER JOIN ({person_subquery}) person ON pdi.person_id = person.id
        """

    groups_join_condition, groups_join_params = GroupsJoinQuery(
        filter, team_id, column_optimizer).get_join_query()

    elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
        value_expression=value_expression,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        prop_filters=prop_filters,
        aggregate_operation=aggregate_operation,
        person_join_clauses=person_join_clauses,
        groups_join_clauses=groups_join_condition,
        **entity_format_params,
    )

    return sync_execute(
        elements_query,
        {
            "key": filter.breakdown,
            "limit": limit,
            "team_id": team_id,
            "offset": filter.offset,
            **prop_filter_params,
            **entity_params,
            **person_join_params,
            **groups_join_params,
            **extra_params,
            **date_params,
        },
    )[0][0]