示例#1
0
    def _breakdown_person_params(self, aggregate_operation: str,
                                 filter: Filter, team_id: int):
        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team_id)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties,
            team_id,
            table_name="e",
            filter_test_accounts=filter.filter_test_accounts)

        elements_query = TOP_PERSON_PROPS_ARRAY_OF_KEY_SQL.format(
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            latest_person_sql=GET_LATEST_PERSON_SQL.format(query=""),
            prop_filters=prop_filters,
            aggregate_operation=aggregate_operation,
        )
        top_elements_array = self._get_top_elements(elements_query,
                                                    filter,
                                                    team_id,
                                                    params=prop_filter_params)
        params = {
            "values": top_elements_array,
        }
        breakdown_filter = BREAKDOWN_PERSON_PROP_JOIN_SQL
        breakdown_filter_params = {
            "latest_person_sql": GET_LATEST_PERSON_SQL.format(query=""),
        }

        return params, breakdown_filter, breakdown_filter_params, "value"
    def _retrieve_people(self, target_entity: Entity, filter: StickinessFilter,
                         team: Team) -> ReturnDict:

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team.pk)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        entity_sql, entity_params = self._format_entity_filter(
            entity=target_entity)
        trunc_func = get_trunc_func_ch(filter.interval)

        params: Dict = {
            "team_id": team.pk,
            **prop_filter_params,
            "stickiness_day": filter.selected_interval,
            **entity_params,
            "offset": filter.offset,
        }

        content_sql = STICKINESS_PEOPLE_SQL.format(
            entity_filter=entity_sql,
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            trunc_func=trunc_func,
        )

        people = sync_execute(
            PEOPLE_SQL.format(
                content_sql=content_sql,
                query="",
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")),
            params,
        )
        return ClickhousePersonSerializer(people, many=True).data
    def _exec_query(self) -> List[Tuple]:
        prop_filters, prop_filter_params = parse_prop_clauses(
            "uuid", self._filter.properties, self._team, prepend="global")

        # format default dates
        if not self._filter._date_from:
            self._filter._date_from = relative_date_parse("-7d")
        if not self._filter._date_to:
            self._filter._date_to = timezone.now()

        parsed_date_from, parsed_date_to = parse_timestamps(
            filter=self._filter)
        self.params: Dict = {"team_id": self._team.pk, **prop_filter_params}
        steps = [
            self._build_steps_query(entity, index)
            for index, entity in enumerate(self._filter.entities)
        ]
        query = FUNNEL_SQL.format(
            select_steps=",".join([
                "step_{}".format(index)
                for index, _ in enumerate(self._filter.entities)
            ]),
            team_id=self._team.id,
            steps=", ".join(steps),
            filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
        )
        return sync_execute(query, self.params)
示例#4
0
    def _calculate_entity_people(self, team: Team, entity: Entity,
                                 filter: Filter):
        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)
        prop_filters, prop_filter_params = parse_prop_clauses(
            "uuid", filter.properties, team)
        entity_sql, entity_params = self._format_entity_filter(entity=entity)
        params: Dict = {
            "team_id": team.pk,
            **prop_filter_params,
            **entity_params, "offset": filter.offset
        }

        content_sql = PERSON_TREND_SQL.format(
            entity_filter=entity_sql,
            parsed_date_from=(parsed_date_from or ""),
            parsed_date_to=(parsed_date_to or ""),
            filters="{filters}".format(
                filters=prop_filters) if filter.properties else "",
            breakdown_filter="",
        )
        people = sync_execute(
            PEOPLE_THROUGH_DISTINCT_SQL.format(content_sql=content_sql),
            params)
        serialized_people = ClickhousePersonSerializer(people, many=True).data

        return serialized_people
示例#5
0
    def _breakdown_prop_params(self, aggregate_operation: str, filter: Filter,
                               team_id: int):
        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team_id)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties,
            team_id,
            table_name="e",
            filter_test_accounts=filter.filter_test_accounts)
        elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            prop_filters=prop_filters,
            aggregate_operation=aggregate_operation,
        )
        top_elements_array = self._get_top_elements(elements_query,
                                                    filter,
                                                    team_id,
                                                    params=prop_filter_params)
        params = {
            "values": top_elements_array,
        }
        breakdown_filter = BREAKDOWN_PROP_JOIN_SQL

        return params, breakdown_filter, {}, "JSONExtractRaw(properties, %(key)s)"
示例#6
0
    def stats(self, request: request.Request, **kwargs) -> response.Response:
        filter = Filter(request=request, team=self.team)

        date_from, date_to, date_params = parse_timestamps(
            filter, team_id=self.team.pk)

        prop_filters, prop_filter_params = parse_prop_grouped_clauses(
            team_id=self.team.pk, property_group=filter.property_groups)
        result = sync_execute(
            GET_ELEMENTS.format(date_from=date_from,
                                date_to=date_to,
                                query=prop_filters),
            {
                "team_id": self.team.pk,
                **prop_filter_params,
                **date_params
            },
        )
        return response.Response([{
            "count":
            elements[1],
            "hash":
            None,
            "elements": [
                ElementSerializer(element).data
                for element in chain_to_elements(elements[0])
            ],
        } for elements in result])
示例#7
0
    def calculate_dist(self, filter: Filter, team: Team):

        parsed_date_from, parsed_date_to = parse_timestamps(filter)

        filters, params = parse_prop_clauses("uuid", filter.properties, team)
        dist_query = DIST_SQL.format(
            team_id=team.pk,
            date_from=parsed_date_from,
            date_to=parsed_date_to,
            filters="{}".format(filters) if filter.properties else "",
            sessions_limit="",
        )

        params = {**params, "team_id": team.pk}

        result = sync_execute(dist_query, params)

        dist_labels = [
            "0 seconds (1 event)",
            "0-3 seconds",
            "3-10 seconds",
            "10-30 seconds",
            "30-60 seconds",
            "1-3 minutes",
            "3-10 minutes",
            "10-30 minutes",
            "30-60 minutes",
            "1+ hours",
        ]

        res = [{"label": dist_labels[index], "count": result[0][index]} for index in range(len(dist_labels))]

        return res
示例#8
0
    def _exec_query(self) -> List[Tuple]:
        prop_filters, prop_filter_params = parse_prop_clauses(
            self._filter.properties, self._team.pk, prepend="global")

        # format default dates
        data = {}
        if not self._filter._date_from:
            data.update({"date_from": relative_date_parse("-7d")})
        if not self._filter._date_to:
            data.update({"date_to": timezone.now()})
        self._filter = Filter(data={**self._filter._data, **data})

        parsed_date_from, parsed_date_to, _ = parse_timestamps(
            filter=self._filter, table="events.", team_id=self._team.pk)
        self.params: Dict = {
            "team_id": self._team.pk,
            "events":
            [],  # purely a speed optimization, don't need this for filtering
            **prop_filter_params,
        }
        steps = [
            self._build_steps_query(entity, index)
            for index, entity in enumerate(self._filter.entities)
        ]
        query = FUNNEL_SQL.format(
            team_id=self._team.id,
            steps=", ".join(steps),
            filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
        )
        return sync_execute(query, self.params)
    def stickiness(self, entity: Entity, filter: StickinessFilter, team_id: int) -> Dict[str, Any]:

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team_id)
        prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team_id)
        trunc_func = get_trunc_func_ch(filter.interval)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params, "num_intervals": filter.num_intervals}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)
            if action_query == "":
                return {}

            params = {**params, **action_params}
            content_sql = STICKINESS_ACTIONS_SQL.format(
                team_id=team_id,
                actions_query=action_query,
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                filters=prop_filters,
                trunc_func=trunc_func,
            )
        else:
            content_sql = STICKINESS_SQL.format(
                team_id=team_id,
                event=entity.id,
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                filters=prop_filters,
                trunc_func=trunc_func,
            )

        counts = sync_execute(content_sql, params)
        return self.process_result(counts, filter)
示例#10
0
    def _format_lifecycle_query(self, entity: Entity, filter: Filter,
                                team_id: int) -> Tuple[str, Dict, Callable]:
        date_from = filter.date_from

        if not date_from:
            date_from = get_earliest_timestamp(team_id)

        interval = filter.interval or "day"
        num_intervals, seconds_in_interval, _ = get_time_diff(
            interval, filter.date_from, filter.date_to, team_id)
        interval_increment, interval_string, sub_interval_string = self.get_interval(
            interval)
        trunc_func = get_trunc_func_ch(interval)
        event_query = ""
        event_params: Dict[str, Any] = {}

        props_to_filter = [*filter.properties, *entity.properties]
        prop_filters, prop_filter_params = parse_prop_clauses(
            props_to_filter,
            team_id,
            filter_test_accounts=filter.filter_test_accounts)

        _, _, date_params = parse_timestamps(filter=filter, team_id=team_id)

        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            try:
                action = entity.get_action()
                event_query, event_params = format_action_filter(action)
            except:
                return "", {}, self._parse_result(filter, entity)
        else:
            event_query = "event = %(event)s"
            event_params = {"event": entity.id}

        return (
            LIFECYCLE_SQL.format(
                interval=interval_string,
                trunc_func=trunc_func,
                event_query=event_query,
                filters=prop_filters,
                sub_interval=sub_interval_string,
                GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            ),
            {
                "team_id":
                team_id,
                "prev_date_from":
                (date_from - interval_increment).strftime("%Y-%m-%d{}".format(
                    " %H:%M:%S" if filter.interval == "hour"
                    or filter.interval == "minute" else " 00:00:00")),
                "num_intervals":
                num_intervals,
                "seconds_in_interval":
                seconds_in_interval,
                **event_params,
                **date_params,
                **prop_filter_params,
            },
            self._parse_result(filter, entity),
        )
示例#11
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif (
        filter.breakdown_type == "person"
        and isinstance(filter.breakdown, str)
        and isinstance(filter.breakdown_value, str)
    ):
        person_prop = Property(**{"key": filter.breakdown, "value": filter.breakdown_value, "type": "person"})
        filter.properties.append(person_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
    params: Dict = {"team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset}

    content_sql = PERSON_TREND_SQL.format(
        entity_filter=f"AND {entity_sql}",
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        filters=prop_filters,
        breakdown_filter="",
        person_filter=person_filter,
    )
    return content_sql, {**params, **person_filter_params}
示例#12
0
def get_breakdown_event_prop_values(filter: Filter,
                                    entity: Entity,
                                    aggregate_operation: str,
                                    team_id: int,
                                    limit: int = 25):
    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team_id)
    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team_id,
        table_name="e",
        filter_test_accounts=filter.filter_test_accounts,
    )

    entity_params, entity_format_params = populate_entity_params(entity)

    elements_query = TOP_ELEMENTS_ARRAY_OF_KEY_SQL.format(
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        prop_filters=prop_filters,
        aggregate_operation=aggregate_operation,
        **entity_format_params,
    )
    top_elements_array = _get_top_elements(
        filter=filter,
        team_id=team_id,
        query=elements_query,
        params={
            **prop_filter_params,
            **entity_params
        },
        limit=limit,
    )

    return top_elements_array
示例#13
0
    def _exec_query(self) -> List[Tuple]:
        prop_filters, prop_filter_params = parse_prop_clauses(
            self._filter.properties,
            self._team.pk,
            prepend="global",
            allow_denormalized_props=True)

        # format default dates
        data = {}
        if not self._filter._date_from:
            data.update({"date_from": relative_date_parse("-7d")})
        if not self._filter._date_to:
            data.update({"date_to": timezone.now()})
        self._filter = self._filter.with_data(data)

        parsed_date_from, parsed_date_to, _ = parse_timestamps(
            filter=self._filter, table="events.", team_id=self._team.pk)
        self.params.update(prop_filter_params)
        steps = [
            self._build_steps_query(entity, index)
            for index, entity in enumerate(self._filter.entities)
        ]
        query = FUNNEL_SQL.format(
            team_id=self._team.id,
            steps=", ".join(steps),
            filters=prop_filters.replace("uuid IN", "events.uuid IN", 1),
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            top_level_groupby="",
            extra_select="",
            extra_groupby="",
            within_time="6048000000000000",
        )
        return sync_execute(query, self.params)
示例#14
0
文件: list.py 项目: yianz/posthog
    def run(self, filter: SessionsFilter, team: Team, *args,
            **kwargs) -> List[Dict[str, Any]]:
        limit = kwargs.get("limit", SESSIONS_LIST_DEFAULT_LIMIT)
        offset = kwargs.get("offset", 0)
        filter = set_default_dates(filter)

        filters, params = parse_prop_clauses(filter.properties, team.pk)
        action_filter_timestamp_sql, action_filter_params = format_action_filter_aggregate(
            filter, team.pk)

        date_from, date_to, _ = parse_timestamps(filter, team.pk)
        params = {
            **params,
            **action_filter_params,
            "team_id": team.pk,
            "limit": limit,
            "offset": offset,
            "distinct_id_limit": limit + offset,
        }
        query = SESSION_SQL.format(
            date_from=date_from,
            date_to=date_to,
            filters=filters,
            action_filter_timestamp=action_filter_timestamp_sql,
            sessions_limit="LIMIT %(offset)s, %(limit)s",
        )
        query_result = sync_execute(query, params)
        result = self._parse_list_results(query_result)

        self._add_person_properties(team, result)

        return filter_sessions_by_recordings(team, result, filter)
示例#15
0
def _format_all_query(team_id: int, filter: Filter,
                      **kwargs) -> Tuple[str, Dict]:
    entity = kwargs.pop("entity", None)
    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id, table="all_events.")

    props_to_filter = [*filter.properties]

    if entity and isinstance(entity, Entity):
        props_to_filter = [*props_to_filter, *entity.properties]

    prop_filters, prop_filter_params = parse_prop_clauses(
        props_to_filter,
        team_id,
        prepend="all_cohort_",
        table_name="all_events")
    query = f"""
            SELECT DISTINCT distinct_id, 0 as value
            FROM events all_events
            WHERE team_id = {team_id} 
            {parsed_date_from} 
            {parsed_date_to} 
            {prop_filters}
            """
    return query, {**date_params, **prop_filter_params}
示例#16
0
    def _get_date_filter(self) -> Tuple[str, Dict]:
        date_filter = ""
        date_params: Dict[str, Any] = {}
        interval_annotation = get_trunc_func_ch(self._filter.interval)
        _, _, round_interval = get_time_diff(self._filter.interval or "day",
                                             self._filter.date_from,
                                             self._filter.date_to,
                                             team_id=self._team_id)
        _, parsed_date_to, date_params = parse_timestamps(
            filter=self._filter, team_id=self._team_id)
        parsed_date_from = date_from_clause(interval_annotation,
                                            round_interval)

        self.parsed_date_from = parsed_date_from
        self.parsed_date_to = parsed_date_to

        if self._entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
            date_filter = "{parsed_date_from_prev_range} {parsed_date_to}"
            format_params = get_active_user_params(self._filter, self._entity,
                                                   self._team_id)
            self.active_user_params = format_params

            date_filter = date_filter.format(**format_params,
                                             parsed_date_to=parsed_date_to)
        else:
            date_filter = "{parsed_date_from} {parsed_date_to}".format(
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to)

        return date_filter, date_params
示例#17
0
    def stats(self, request: request.Request) -> response.Response:
        filter = Filter(request=request)
        team = request.user.team
        assert team is not None

        date_from, date_to = parse_timestamps(filter)

        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        result = sync_execute(
            GET_ELEMENTS.format(date_from=date_from,
                                date_to=date_to,
                                query=prop_filters),
            {
                "team_id": team.id,
                **prop_filter_params
            },
        )
        return response.Response([{
            "count":
            elements[1],
            "hash":
            None,
            "elements": [
                ElementSerializer(element).data
                for element in chain_to_elements(elements[0])
            ],
        } for elements in result])
def _process_content_sql(target_entity: Entity, filter: StickinessFilter,
                         team: Team) -> Tuple[str, Dict[str, Any]]:
    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team.pk)
    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team.pk,
        filter_test_accounts=filter.filter_test_accounts)
    entity_sql, entity_params = _format_entity_filter(entity=target_entity)
    trunc_func = get_trunc_func_ch(filter.interval)

    params: Dict = {
        "team_id": team.pk,
        **prop_filter_params,
        "stickiness_day": filter.selected_interval,
        **entity_params,
        "offset": filter.offset,
    }

    content_sql = STICKINESS_PEOPLE_SQL.format(
        entity_filter=entity_sql,
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        filters=prop_filters,
        trunc_func=trunc_func,
        GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
    )
    return content_sql, params
示例#19
0
    def stickiness(self, entity: Entity, filter: Filter, team_id: int) -> Dict[str, Any]:
        if not filter.date_to or not filter.date_from:
            raise ValueError("_stickiness needs date_to and date_from set")
        range_days = (filter.date_to - filter.date_from).days + 2

        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)
        prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team_id)

        params: Dict = {"team_id": team_id}
        params = {**params, **prop_filter_params}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)
            if action_query == "":
                return {}

            params = {**params, **action_params}
            content_sql = STICKINESS_ACTIONS_SQL.format(
                team_id=team_id,
                actions_query=action_query,
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                filters=prop_filters,
            )
        else:
            content_sql = STICKINESS_SQL.format(
                team_id=team_id,
                event=entity.id,
                parsed_date_from=parsed_date_from,
                parsed_date_to=parsed_date_to,
                filters=prop_filters,
            )

        counts = sync_execute(content_sql, params)
        return self.process_result(counts, range_days)
示例#20
0
def _format_all_query(team_id: int, filter: Filter,
                      **kwargs) -> Tuple[str, Dict]:
    entity = kwargs.pop("entity", None)
    parsed_date_from, parsed_date_to, date_params = parse_timestamps(
        filter=filter, team_id=team_id, table="all_events.")

    props_to_filter = filter.property_groups

    if entity and isinstance(entity, Entity):
        props_to_filter = props_to_filter.combine_property_group(
            PropertyOperatorType.AND, entity.property_groups)

    prop_filters, prop_filter_params = parse_prop_grouped_clauses(
        team_id=team_id,
        property_group=props_to_filter,
        prepend="all_cohort_",
        table_name="all_events",
    )
    query = f"""
            SELECT DISTINCT distinct_id, {ALL_USERS_COHORT_ID} as value
            FROM events all_events
            WHERE team_id = {team_id}
            {parsed_date_from}
            {parsed_date_to}
            {prop_filters}
            """
    return query, {**date_params, **prop_filter_params}
示例#21
0
    def _calculate_stickiness_entity_people(self, team: Team, entity: Entity,
                                            filter: Filter,
                                            stickiness_day: int):
        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        entity_sql, entity_params = self._format_entity_filter(entity=entity)

        params: Dict = {
            "team_id": team.pk,
            **prop_filter_params,
            "stickiness_day": stickiness_day,
            **entity_params,
            "offset": filter.offset,
        }

        content_sql = STICKINESS_PEOPLE_SQL.format(
            entity_filter=entity_sql,
            parsed_date_from=(parsed_date_from or ""),
            parsed_date_to=(parsed_date_to or ""),
            filters="{filters}".format(
                filters=prop_filters) if filter.properties else "",
        )

        people = sync_execute(
            PEOPLE_SQL.format(
                content_sql=content_sql,
                query="",
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")),
            params,
        )
        serialized_people = ClickhousePersonSerializer(people, many=True).data

        return serialized_people
示例#22
0
    def calculate_list(self, filter: Filter, team: Team, limit: int,
                       offset: int):
        filters, params = parse_prop_clauses("uuid", filter.properties, team)

        if not filter._date_from:
            filter._date_from = timezone.now().replace(hour=0,
                                                       minute=0,
                                                       second=0,
                                                       microsecond=0)
        if not filter._date_to and filter.date_from:
            filter._date_to = filter.date_from + relativedelta(days=1)

        date_from, date_to = parse_timestamps(filter)
        params = {
            **params, "team_id": team.pk,
            "limit": limit,
            "offset": offset
        }
        query = SESSION_SQL.format(
            date_from=date_from,
            date_to=date_to,
            filters="{}".format(filters) if filter.properties else "",
            sessions_limit="LIMIT %(offset)s, %(limit)s",
        )
        query_result = sync_execute(query, params)
        result = self._parse_list_results(query_result)

        self._add_person_properties(team, result)

        return result
示例#23
0
    def calculate_paths(self, filter: Filter, team: Team):

        # format default dates
        if not filter._date_from:
            filter._date_from = relative_date_parse("-7d")
        if not filter._date_to:
            filter._date_to = timezone.now()

        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)
        event, path_type, start_comparator = self._determine_path_type(filter.path_type if filter else None)

        prop_filters, prop_filter_params = parse_prop_clauses("uuid", filter.properties, team)

        # Step 0. Event culling subexpression for step 1.
        # Make an expression that removes events in a session that are definitely unused.
        # For example the 4th, 5th, etc row after a "new_session = 1" or "marked_session_start = 1" row gets removed
        excess_row_filter = "("
        for i in range(4):
            if i > 0:
                excess_row_filter += " or "
            excess_row_filter += "neighbor(new_session, {}, 0) = 1".format(-i)
            if filter and filter.start_point:
                excess_row_filter += " or neighbor(marked_session_start, {}, 0) = 1".format(-i)
        excess_row_filter += ")"

        paths_query = PATHS_QUERY_FINAL.format(
            event_query="event = %(event)s"
            if event
            else "event NOT IN ('$autocapture', '$pageview', '$identify', '$pageleave', '$screen')",
            path_type=path_type,
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters if filter.properties else "",
            marked_session_start="{} = %(start_point)s".format(start_comparator)
            if filter and filter.start_point
            else "new_session",
            excess_row_filter=excess_row_filter,
            select_elements_chain=", events.elements_chain as elements_chain" if event == AUTOCAPTURE_EVENT else "",
            group_by_elements_chain=", events.elements_chain" if event == AUTOCAPTURE_EVENT else "",
        )

        params: Dict = {
            "team_id": team.pk,
            "property": "$current_url",
            "event": event,
            "start_point": filter.start_point,
        }
        params = {**params, **prop_filter_params}

        rows = sync_execute(paths_query, params)

        resp: List[Dict[str, str]] = []
        for row in rows:
            resp.append(
                {"source": row[0], "source_id": row[1], "target": row[2], "target_id": row[3], "value": row[4],}
            )

        resp = sorted(resp, key=lambda x: x["value"], reverse=True)
        return resp
示例#24
0
    def calculate_avg(self, filter: Filter, team: Team):

        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter, team.pk)

        filters, params = parse_prop_clauses(
            filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts
        )

        interval_notation = get_trunc_func_ch(filter.interval)
        num_intervals, seconds_in_interval, _ = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to, team.pk
        )

        entity_conditions, entity_params = entity_query_conditions(filter, team)
        if not entity_conditions:
            entity_conditions = ["event != '$feature_flag_called'"]  # default conditino

        params = {**params, **entity_params}
        entity_query = " OR ".join(entity_conditions)

        avg_query = SESSIONS_NO_EVENTS_SQL.format(
            team_id=team.pk,
            date_from=parsed_date_from,
            date_to=parsed_date_to,
            filters=filters,
            sessions_limit="",
            entity_filter=f"AND ({entity_query})",
        )
        per_period_query = AVERAGE_PER_PERIOD_SQL.format(sessions=avg_query, interval=interval_notation)

        null_sql = NULL_SQL.format(
            date_to=filter.date_to.strftime("%Y-%m-%d 00:00:00"),
            interval=interval_notation,
            num_intervals=num_intervals,
            seconds_in_interval=seconds_in_interval,
        )

        final_query = AVERAGE_SQL.format(sessions=per_period_query, null_sql=null_sql)

        params = {**params, "team_id": team.pk}
        response = sync_execute(final_query, params)
        values = self.clean_values(filter, response)
        time_series_data = append_data(values, interval=filter.interval, math=None)
        scaled_data, _ = scale_time_series(time_series_data["data"])
        time_series_data.update({"data": scaled_data})
        # calculate average
        total = sum(val[1] for val in values)

        if total == 0:
            return []

        valid_days = sum(1 if val[1] else 0 for val in values)
        overall_average = (total / valid_days) if valid_days else 0

        result = self._format_avg(overall_average)
        time_series_data.update(result)

        return [time_series_data]
示例#25
0
    def _format_stickiness_query(self, entity: Entity, filter: Filter,
                                 team: Team) -> Optional[Dict[str, Any]]:
        if not filter.date_to or not filter.date_from:
            raise ValueError("_stickiness needs date_to and date_from set")
        range_days = (filter.date_to - filter.date_from).days + 2

        parsed_date_from, parsed_date_to = parse_timestamps(filter=filter)
        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team)

        params: Dict = {"team_id": team.pk}
        params = {**params, **prop_filter_params}
        if entity.type == TREND_FILTER_TYPE_ACTIONS:
            action = Action.objects.get(pk=entity.id)
            action_query, action_params = format_action_filter(action)
            if action_query == "":
                return None

            params = {**params, **action_params}
            content_sql = STICKINESS_ACTIONS_SQL.format(
                team_id=team.pk,
                actions_query=action_query,
                parsed_date_from=(parsed_date_from or ""),
                parsed_date_to=(parsed_date_to or ""),
                filters="{filters}".format(
                    filters=prop_filters) if filter.properties else "",
            )
        else:
            content_sql = STICKINESS_SQL.format(
                team_id=team.pk,
                event=entity.id,
                parsed_date_from=(parsed_date_from or ""),
                parsed_date_to=(parsed_date_to or ""),
                filters="{filters}".format(
                    filters=prop_filters) if filter.properties else "",
            )

        aggregated_counts = sync_execute(content_sql, params)

        response: Dict[int, int] = {}
        for result in aggregated_counts:
            response[result[1]] = result[0]

        labels = []
        data = []

        for day in range(1, range_days):
            label = "{} day{}".format(day, "s" if day > 1 else "")
            labels.append(label)
            data.append(response[day] if day in response else 0)

        return {
            "labels": labels,
            "days": [day for day in range(1, range_days)],
            "data": data,
            "count": sum(data),
        }
示例#26
0
文件: events.py 项目: copyit/posthog
    def run(self, filter: SessionEventsFilter, team: Team, *args, **kwargs) -> List[Dict[str, Any]]:
        date_from, date_to, _ = parse_timestamps(filter, team.pk)

        raw_events = sync_execute(
            SESSION_EVENTS.format(date_from=date_from, date_to=date_to),
            {"team_id": team.pk, "distinct_id": filter.distinct_id},
        )

        return self._serialize(raw_events, filter.distinct_id, team.pk)
示例#27
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif filter.breakdown_type and isinstance(
            filter.breakdown, str) and isinstance(filter.breakdown_value, str):
        breakdown_prop = Property(
            **{
                "key": filter.breakdown,
                "value": filter.breakdown_value,
                "type": filter.breakdown_type
            })
        filter.properties.append(breakdown_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team.pk,
        filter_test_accounts=filter.filter_test_accounts)
    params: Dict = {
        "team_id": team.pk,
        **prop_filter_params,
        **entity_params, "offset": filter.offset
    }

    if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
        active_user_params = get_active_user_params(filter, entity, team.pk)
        content_sql = PERSONS_ACTIVE_USER_SQL.format(
            entity_query=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
            GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            **active_user_params,
        )
    else:
        content_sql = PERSON_TREND_SQL.format(
            entity_filter=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
        )
    return content_sql, {**params, **person_filter_params}
示例#28
0
    def calculate_avg(self, filter: Filter, team: Team):

        # format default dates
        if not filter._date_from:
            filter._date_from = relative_date_parse("-7d")
        if not filter._date_to:
            filter._date_to = timezone.now()

        parsed_date_from, parsed_date_to = parse_timestamps(filter)

        filters, params = parse_prop_clauses("uuid", filter.properties, team)

        interval_notation = get_interval_annotation_ch(filter.interval)
        num_intervals, seconds_in_interval = get_time_diff(
            filter.interval or "day", filter.date_from, filter.date_to)

        avg_query = SESSIONS_NO_EVENTS_SQL.format(
            team_id=team.pk,
            date_from=parsed_date_from,
            date_to=parsed_date_to,
            filters="{}".format(filters) if filter.properties else "",
            sessions_limit="",
        )
        per_period_query = AVERAGE_PER_PERIOD_SQL.format(
            sessions=avg_query, interval=interval_notation)

        null_sql = NULL_SQL.format(
            date_to=(filter.date_to
                     or timezone.now()).strftime("%Y-%m-%d 00:00:00"),
            interval=interval_notation,
            num_intervals=num_intervals,
            seconds_in_interval=seconds_in_interval,
        )

        final_query = AVERAGE_SQL.format(sessions=per_period_query,
                                         null_sql=null_sql)

        params = {**params, "team_id": team.pk}
        response = sync_execute(final_query, params)
        values = self.clean_values(filter, response)
        time_series_data = append_data(values,
                                       interval=filter.interval,
                                       math=None)
        # calculate average
        total = sum(val[1] for val in values)

        if total == 0:
            return []

        valid_days = sum(1 if val[1] else 0 for val in values)
        overall_average = (total / valid_days) if valid_days else 0

        result = self._format_avg(overall_average)
        time_series_data.update(result)

        return [time_series_data]
示例#29
0
    def _get_date_filter(self) -> Tuple[str, Dict]:

        parsed_date_from, parsed_date_to, date_params = parse_timestamps(filter=self._filter, team_id=self._team_id)

        query = f"""
        {parsed_date_from}
        {parsed_date_to}
        """

        return query, date_params
示例#30
0
 def _get_date_filter(self):
     _, _, date_params = parse_timestamps(filter=self._filter,
                                          team_id=self._team_id)
     params = {**date_params, "interval": self._filter.interval}
     # :TRICKY: We fetch all data even for the period before the graph starts up until the end of the last period
     return (
         f"""
         AND timestamp >= toDateTime(dateTrunc(%(interval)s, toDateTime(%(date_from)s))) - INTERVAL 1 {self._filter.interval}
         AND timestamp < toDateTime(dateTrunc(%(interval)s, toDateTime(%(date_to)s))) + INTERVAL 1 {self._filter.interval}
     """,
         params,
     )