Пример #1
0
    def _get_date_filter(self) -> Tuple[str, Dict]:
        date_filter = ""
        date_params: Dict[str, Any] = {}
        interval_annotation = get_trunc_func_ch(self._filter.interval)
        _, _, round_interval = get_time_diff(self._filter.interval or "day",
                                             self._filter.date_from,
                                             self._filter.date_to,
                                             team_id=self._team_id)
        _, parsed_date_to, date_params = parse_timestamps(
            filter=self._filter, team_id=self._team_id)
        parsed_date_from = date_from_clause(interval_annotation,
                                            round_interval)

        self.parsed_date_from = parsed_date_from
        self.parsed_date_to = parsed_date_to

        if self._entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
            date_filter = "{parsed_date_from_prev_range} {parsed_date_to}"
            format_params = get_active_user_params(self._filter, self._entity,
                                                   self._team_id)
            self.active_user_params = format_params

            date_filter = date_filter.format(**format_params,
                                             parsed_date_to=parsed_date_to)
        else:
            date_filter = "{parsed_date_from} {parsed_date_to}".format(
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to)

        return date_filter, date_params
Пример #2
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif (filter.breakdown_type == "person"
          and isinstance(filter.breakdown, str)
          and isinstance(filter.breakdown_value, str)):
        person_prop = Property(
            **{
                "key": filter.breakdown,
                "value": filter.breakdown_value,
                "type": "person"
            })
        filter.properties.append(person_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team.pk,
        filter_test_accounts=filter.filter_test_accounts)
    params: Dict = {
        "team_id": team.pk,
        **prop_filter_params,
        **entity_params, "offset": filter.offset
    }

    if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
        active_user_params = get_active_user_params(filter, entity, team.pk)
        content_sql = PERSONS_ACTIVE_USER_SQL.format(
            entity_query=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
            **active_user_params,
        )
    else:
        content_sql = PERSON_TREND_SQL.format(
            entity_filter=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
        )
    return content_sql, {**params, **person_filter_params}
Пример #3
0
    def get_query(self) -> Tuple[str, Dict, Callable]:
        interval_annotation = get_trunc_func_ch(self.filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            self.filter.interval, self.filter.date_from, self.filter.date_to,
            self.team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=self.filter,
                                                          team_id=self.team_id)

        props_to_filter = self.filter.property_groups.combine_property_group(
            PropertyOperatorType.AND, self.entity.property_groups)

        outer_properties = self.column_optimizer.property_optimizer.parse_property_groups(
            props_to_filter).outer
        prop_filters, prop_filter_params = parse_prop_grouped_clauses(
            team_id=self.team_id,
            property_group=outer_properties,
            table_name="e",
            person_properties_mode=PersonPropertiesMode.
            USING_PERSON_PROPERTIES_COLUMN,
        )
        aggregate_operation, _, math_params = process_math(self.entity)

        action_query = ""
        action_params: Dict = {}
        if self.entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = self.entity.get_action()
            action_query, action_params = format_action_filter(
                team_id=self.team_id, action=action, table_name="e")

        self.params = {
            **self.params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": self.entity.id,
            "key": self.filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "actions_query":
            "AND {}".format(action_query) if action_query else "",
            "event_filter":
            "AND event = %(event)s" if not action_query else "",
            "filters":
            prop_filters if props_to_filter.values else "",
        }

        _params, _breakdown_filter_params = {}, {}

        if self.filter.breakdown_type == "cohort":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
            )
        else:
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_prop_params(
                "count(*)"
                if self.entity.math == "dau" else aggregate_operation,
                math_params,
            )

        if len(_params["values"]) == 0:
            # If there are no breakdown values, we are sure that there's no relevant events, so instead of adjusting
            # a "real" SELECT for this, we only include the below dummy SELECT.
            # It's a drop-in replacement for a "real" one, simply always returning 0 rows.
            # See https://github.com/PostHog/posthog/pull/5674 for context.
            return (
                "SELECT [now()] AS date, [0] AS data, '' AS breakdown_value LIMIT 0",
                {},
                lambda _: [],
            )

        person_join_condition, person_join_params = self._person_join_condition(
        )
        groups_join_condition, groups_join_params = GroupsJoinQuery(
            self.filter, self.team_id, self.column_optimizer).get_join_query()
        self.params = {
            **self.params,
            **_params,
            **person_join_params,
            **groups_join_params
        }
        breakdown_filter_params = {
            **breakdown_filter_params,
            **_breakdown_filter_params
        }

        if self.filter.display in TRENDS_DISPLAY_BY_VALUE:
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format(
                breakdown_filter=breakdown_filter,
                person_join=person_join_condition,
                groups_join=groups_join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )
            time_range = enumerate_time_range(self.filter, seconds_in_interval)

            return (
                content_sql,
                self.params,
                self._parse_single_aggregate_result(self.filter, self.entity,
                                                    {"days": time_range}),
            )

        else:

            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)

            if self.entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                active_user_params = get_active_user_params(
                    self.filter, self.entity, self.team_id)
                conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format(
                    **breakdown_filter_params, **active_user_params)
                inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    conditions=conditions,
                    GET_TEAM_PERSON_DISTINCT_IDS=get_team_distinct_ids_query(
                        self.team_id),
                    **active_user_params,
                    **breakdown_filter_params,
                )
            elif self.filter.display == TRENDS_CUMULATIVE and self.entity.math == "dau":
                inner_sql = BREAKDOWN_CUMULATIVE_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    **breakdown_filter_params,
                )
            else:
                inner_sql = BREAKDOWN_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    person_join=person_join_condition,
                    groups_join=groups_join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                )

            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                interval=interval_annotation,
                num_intervals=num_intervals,
                inner_sql=inner_sql,
            )
            self.params.update({
                "seconds_in_interval": seconds_in_interval,
                "num_intervals": num_intervals,
            })

            return breakdown_query, self.params, self._parse_trend_result(
                self.filter, self.entity)
Пример #4
0
    def _format_breakdown_query(self, entity: Entity, filter: Filter,
                                team_id: int) -> Tuple[str, Dict, Callable]:
        # process params
        params: Dict[str, Any] = {"team_id": team_id}
        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to,
            team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=filter,
                                                          team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            table_name="e",
            filter_test_accounts=filter.filter_test_accounts)
        aggregate_operation, _, math_params = process_math(entity)

        if entity.math == "dau" or filter.breakdown_type == "person":
            join_condition = EVENT_JOIN_PERSON_SQL
        else:
            join_condition = ""

        action_query = ""
        action_params: Dict = {}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = entity.get_action()
            action_query, action_params = format_action_filter(action,
                                                               table_name="e")

        params = {
            **params,
            **math_params,
            **prop_filter_params,
            **action_params,
            "event": entity.id,
            "key": filter.breakdown,
            **date_params,
        }

        breakdown_filter_params = {
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "actions_query":
            "AND {}".format(action_query) if action_query else "",
            "event_filter":
            "AND event = %(event)s" if not action_query else "",
            "filters":
            prop_filters if props_to_filter else "",
        }

        _params, _breakdown_filter_params = {}, {}

        if filter.breakdown_type == "cohort":
            _params, breakdown_filter, _breakdown_filter_params, breakdown_value = self._breakdown_cohort_params(
                team_id, filter, entity)
        elif filter.breakdown_type == "person":
            (
                _params,
                breakdown_filter,
                _breakdown_filter_params,
                breakdown_value,
            ) = self._breakdown_person_params(
                "count(*)" if entity.math == "dau" else aggregate_operation,
                entity, filter, team_id)
        else:
            (
                _params,
                breakdown_filter,
                _breakdown_filter_params,
                breakdown_value,
            ) = self._breakdown_prop_params(
                "count(*)" if entity.math == "dau" else aggregate_operation,
                entity, filter, team_id)

        if len(_params["values"]) == 0:
            return "SELECT 1", {}, lambda _: []

        params = {**params, **_params}
        breakdown_filter_params = {
            **breakdown_filter_params,
            **_breakdown_filter_params
        }

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)
            content_sql = BREAKDOWN_AGGREGATE_QUERY_SQL.format(
                breakdown_filter=breakdown_filter,
                event_join=join_condition,
                aggregate_operation=aggregate_operation,
                breakdown_value=breakdown_value,
            )
            time_range = enumerate_time_range(filter, seconds_in_interval)

            return content_sql, params, self._parse_single_aggregate_result(
                filter, entity, {"days": time_range})

        else:

            breakdown_filter = breakdown_filter.format(
                **breakdown_filter_params)

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                active_user_params = get_active_user_params(
                    filter, entity, team_id)
                conditions = BREAKDOWN_ACTIVE_USER_CONDITIONS_SQL.format(
                    **breakdown_filter_params, **active_user_params)
                inner_sql = BREAKDOWN_ACTIVE_USER_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                    conditions=conditions,
                    GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
                    **active_user_params,
                    **breakdown_filter_params)
            else:
                inner_sql = BREAKDOWN_INNER_SQL.format(
                    breakdown_filter=breakdown_filter,
                    event_join=join_condition,
                    aggregate_operation=aggregate_operation,
                    interval_annotation=interval_annotation,
                    breakdown_value=breakdown_value,
                )

            breakdown_query = BREAKDOWN_QUERY_SQL.format(
                interval=interval_annotation,
                num_intervals=num_intervals,
                inner_sql=inner_sql,
            )
            params.update({
                "date_to":
                filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
                "seconds_in_interval":
                seconds_in_interval,
                "num_intervals":
                num_intervals,
            })

            return breakdown_query, params, self._parse_trend_result(
                filter, entity)
Пример #5
0
    def _normal_query(self, entity: Entity, filter: Filter,
                      team_id: int) -> Tuple[str, Dict, Callable]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day",
            filter.date_from,
            filter.date_to,
            team_id=team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=filter,
                                                          team_id=team_id)

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        aggregate_operation, join_condition, math_params = process_math(entity)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params, **math_params, **date_params}
        content_sql_params = {
            "interval":
            interval_annotation,
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
            "timestamp":
            "timestamp",
            "filters":
            prop_filters,
            "event_join":
            join_condition,
            "aggregate_operation":
            aggregate_operation,
            "entity_query":
            "AND {actions_query}" if entity.type == TREND_FILTER_TYPE_ACTIONS
            else "AND event = %(event)s",
        }

        entity_params, entity_format_params = self._populate_entity_params(
            entity)
        params = {**params, **entity_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(
                **content_sql_params).format(**entity_format_params)
            time_range = self._enumerate_time_range(filter,
                                                    seconds_in_interval)

            return (
                content_sql,
                params,
                lambda result: [{
                    "aggregated_value":
                    result[0][0] if result and len(result) else 0,
                    "days":
                    time_range
                }],
            )
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                sql_params = get_active_user_params(filter, entity, team_id)
                content_sql = ACTIVE_USER_SQL.format(
                    **content_sql_params,
                    **sql_params).format(**entity_format_params)
            else:
                # entity_format_params depends on format clause from content_sql_params
                content_sql = VOLUME_SQL.format(**content_sql_params).format(
                    **entity_format_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                               content_sql=content_sql)
            return final_query, params, self._parse_normal_result(filter)
Пример #6
0
    def _total_volume_query(self, entity: Entity, filter: Filter,
                            team_id: int) -> Tuple[str, Dict, Callable]:

        interval_annotation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, round_interval = get_time_diff(
            filter.interval or "day",
            filter.date_from,
            filter.date_to,
            team_id=team_id)
        _, parsed_date_to, date_params = parse_timestamps(filter=filter,
                                                          team_id=team_id)

        aggregate_operation, join_condition, math_params = process_math(entity)

        content_sql_params = {
            "aggregate_operation":
            aggregate_operation,
            "timestamp":
            "e.timestamp",
            "interval":
            interval_annotation,
            "parsed_date_from":
            date_from_clause(interval_annotation, round_interval),
            "parsed_date_to":
            parsed_date_to,
        }
        params: Dict = {"team_id": team_id}
        params = {**params, **math_params, **date_params}

        if filter.display in TRENDS_DISPLAY_BY_VALUE:
            event_query, event_query_params = ClickhouseEventQuery(
                filter,
                entity,
                team_id,
                date_filter="{parsed_date_from} {parsed_date_to}",
                should_join_distinct_ids=True
                if join_condition != "" else False,
            ).get_query()
            event_query = event_query.format(**content_sql_params)
            params = {**params, **event_query_params}
            content_sql = VOLUME_TOTAL_AGGREGATE_SQL.format(
                event_query=event_query, **content_sql_params)
            time_range = enumerate_time_range(filter, seconds_in_interval)

            return (
                content_sql,
                params,
                lambda result: [{
                    "aggregated_value":
                    result[0][0] if result and len(result) else 0,
                    "days":
                    time_range
                }],
            )
        else:

            if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
                event_query, event_query_params = ClickhouseEventQuery(
                    filter,
                    entity,
                    team_id,
                    date_filter=
                    "{parsed_date_from_prev_range} {parsed_date_to}",
                    should_join_distinct_ids=True,
                ).get_query()
                sql_params = get_active_user_params(filter, entity, team_id)
                params = {**params, **event_query_params}
                event_query = event_query.format(**sql_params,
                                                 parsed_date_to=parsed_date_to)
                content_sql = ACTIVE_USER_SQL.format(event_query=event_query,
                                                     **content_sql_params,
                                                     **sql_params)
            else:
                event_query, event_query_params = ClickhouseEventQuery(
                    filter,
                    entity,
                    team_id,
                    date_filter="{parsed_date_from} {parsed_date_to}",
                    should_join_distinct_ids=True
                    if join_condition != "" else False,
                ).get_query()
                event_query = event_query.format(**content_sql_params)
                params = {**params, **event_query_params}
                content_sql = VOLUME_SQL.format(event_query=event_query,
                                                **content_sql_params)

            null_sql = NULL_SQL.format(
                interval=interval_annotation,
                seconds_in_interval=seconds_in_interval,
                num_intervals=num_intervals,
                date_to=filter.date_to.strftime("%Y-%m-%d %H:%M:%S"),
            )
            final_query = AGGREGATE_SQL.format(null_sql=null_sql,
                                               content_sql=content_sql)
            return final_query, params, self._parse_total_volume_result(filter)