Пример #1
0
    def _parse_properties(self, properties: Optional[Any]) -> List[Property]:
        if isinstance(properties, list):
            _properties = []
            for prop_params in properties:
                if isinstance(prop_params, Property):
                    _properties.append(prop_params)
                else:
                    try:
                        new_prop = Property(**prop_params)
                        _properties.append(new_prop)
                    except:
                        continue
            return _properties
        if not properties:
            return []

        # old style dict properties
        ret = []
        for key, value in properties.items():
            key_split = key.split("__")
            ret.append(
                Property(
                    key=key_split[0],
                    value=value,
                    operator=key_split[1] if len(key_split) > 1 else None,
                    type="event",
                ))
        return ret
Пример #2
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif (
        filter.breakdown_type == "person"
        and isinstance(filter.breakdown, str)
        and isinstance(filter.breakdown_value, str)
    ):
        person_prop = Property(**{"key": filter.breakdown, "value": filter.breakdown_value, "type": "person"})
        filter.properties.append(person_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk)
    params: Dict = {"team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset}

    content_sql = PERSON_TREND_SQL.format(
        entity_filter=f"AND {entity_sql}",
        parsed_date_from=parsed_date_from,
        parsed_date_to=parsed_date_to,
        filters=prop_filters,
        breakdown_filter="",
        person_filter=person_filter,
    )
    return content_sql, {**params, **person_filter_params}
Пример #3
0
def uses_elements_chain(action: Action) -> bool:
    for action_step in action.steps.all():
        if any(Property(**prop).type == "element" for prop in (action_step.properties or [])):
            return True
        if any(getattr(action_step, attribute) is not None for attribute in ["selector", "tag_name", "href", "text"]):
            return True
    return False
Пример #4
0
    def _get_search_clause(self) -> Tuple[str, Dict]:

        if not isinstance(self._filter, Filter):
            return "", {}

        if self._filter.search:
            prop_group = PropertyGroup(
                type=PropertyOperatorType.AND,
                values=[Property(key="email", operator="icontains", value=self._filter.search, type="person")],
            )
            search_clause, params = parse_prop_grouped_clauses(
                self._team_id,
                prop_group,
                prepend="search",
                has_person_id_joined=False,
                group_properties_joined=False,
                person_properties_mode=PersonPropertiesMode.DIRECT,
                _top_level=False,
            )

            distinct_id_clause = """
            id IN (
                SELECT person_id FROM person_distinct_id where distinct_id = %(distinct_id)s
            )
            """

            params.update({"distinct_id": self._filter.search})

            return f"AND (({search_clause}) OR ({distinct_id_clause}))", params

        return "", {}
Пример #5
0
    def correlation_property_values(self) -> Optional[List[Property]]:
        # Used for property correlations persons

        _props = self._data.get(FUNNEL_CORRELATION_PROPERTY_VALUES)

        if not _props:
            return None

        if isinstance(_props, str):
            try:
                loaded_props = json.loads(_props)
            except json.decoder.JSONDecodeError:
                raise ValidationError("Properties are unparsable!")
        else:
            loaded_props = _props

        if isinstance(loaded_props, list):
            _properties = []
            for prop_params in loaded_props:
                if isinstance(prop_params, Property):
                    _properties.append(prop_params)
                else:
                    try:
                        new_prop = Property(**prop_params)
                        _properties.append(new_prop)
                    except:
                        continue
            return _properties
        return None
Пример #6
0
 def duration_filter_property(self) -> Optional[Property]:
     return next(
         (Property(**filter) for filter in self._all_filters
          if filter["type"] == SESSIONS_FILTER_RECORDING_TYPE
          and filter["key"] == "duration"),
         None,
     )
Пример #7
0
def parse_prop_clauses(
    filters: List[Property],
    team_id: Optional[int],
    prepend: str = "global",
    table_name: str = "",
    allow_denormalized_props: bool = False,
    filter_test_accounts=False,
) -> Tuple[str, Dict]:
    final = []
    params: Dict[str, Any] = {}
    if team_id is not None:
        params["team_id"] = team_id
    if table_name != "":
        table_name += "."

    if filter_test_accounts:
        test_account_filters = Team.objects.only("test_account_filters").get(
            id=team_id).test_account_filters
        filters.extend([Property(**prop) for prop in test_account_filters])

    for idx, prop in enumerate(filters):
        if prop.type == "cohort":
            cohort = Cohort.objects.get(pk=prop.value, team_id=team_id)
            person_id_query, cohort_filter_params = format_filter_query(cohort)
            params = {**params, **cohort_filter_params}
            final.append("AND {table_name}distinct_id IN ({clause})".format(
                table_name=table_name, clause=person_id_query))
        elif prop.type == "person":
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                "{}person".format(prepend),
                allow_denormalized_props=allow_denormalized_props)
            final.append(
                "AND {table_name}distinct_id IN ({filter_query})".format(
                    filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format(
                        filters=filter_query),
                    table_name=table_name))
            params.update(filter_params)
        elif prop.type == "element":
            query, filter_params = filter_element({prop.key: prop.value},
                                                  prepend="{}_".format(idx))
            final.append("AND {}".format(query[0]))
            params.update(filter_params)
        else:
            filter_query, filter_params = prop_filter_json_extract(
                prop,
                idx,
                prepend,
                prop_var="{}properties".format(table_name),
                allow_denormalized_props=allow_denormalized_props,
            )

            final.append(
                f"{filter_query} AND {table_name}team_id = %(team_id)s"
                if team_id else filter_query)
            params.update(filter_params)
    return " ".join(final), params
Пример #8
0
    def _parse_properties(self, properties: Optional[Any]) -> List[Property]:
        if isinstance(properties, list):
            return [Property(**property) for property in properties]
        if not properties:
            return []

        # old style dict properties
        ret = []
        for key, value in properties.items():
            key_split = key.split("__")
            ret.append(
                Property(
                    key=key_split[0],
                    value=value,
                    operator=key_split[1] if len(key_split) > 1 else None,
                    type="event",
                ))
        return ret
Пример #9
0
    def person_filter_properties(self) -> List[Property]:
        if len(self.properties) > 0:  # type: ignore
            return self.properties  # type: ignore

        return [
            Property(**filter) for filter in self._all_filters
            if filter["type"] in
            [SESSIONS_FILTER_COHORT_TYPE, SESSIONS_FILTER_PERSON_TYPE]
        ]
Пример #10
0
def _process_content_sql(team: Team, entity: Entity, filter: Filter):

    filter = _handle_date_interval(filter)

    parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                           team_id=team.pk)
    entity_sql, entity_params = format_entity_filter(entity=entity)
    person_filter = ""
    person_filter_params: Dict[str, Any] = {}

    if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
        cohort = Cohort.objects.get(pk=filter.breakdown_value)
        person_filter, person_filter_params = format_filter_query(cohort)
        person_filter = "AND distinct_id IN ({})".format(person_filter)
    elif filter.breakdown_type and isinstance(
            filter.breakdown, str) and isinstance(filter.breakdown_value, str):
        breakdown_prop = Property(
            **{
                "key": filter.breakdown,
                "value": filter.breakdown_value,
                "type": filter.breakdown_type
            })
        filter.properties.append(breakdown_prop)

    prop_filters, prop_filter_params = parse_prop_clauses(
        filter.properties,
        team.pk,
        filter_test_accounts=filter.filter_test_accounts)
    params: Dict = {
        "team_id": team.pk,
        **prop_filter_params,
        **entity_params, "offset": filter.offset
    }

    if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]:
        active_user_params = get_active_user_params(filter, entity, team.pk)
        content_sql = PERSONS_ACTIVE_USER_SQL.format(
            entity_query=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
            GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS,
            **active_user_params,
        )
    else:
        content_sql = PERSON_TREND_SQL.format(
            entity_filter=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
        )
    return content_sql, {**params, **person_filter_params}
Пример #11
0
def simplified_cohort_filter_properties(cohort: Cohort,
                                        team: Team) -> List[Property]:
    """
    'Simplifies' cohort property filters, removing team-specific context from properties.
    """
    from ee.clickhouse.models.cohort import is_precalculated_query

    if cohort.is_static:
        return [Property(type="static-cohort", key="id", value=cohort.pk)]

    # Cohort has been precalculated
    if is_precalculated_query(cohort):
        return [
            Property(type="precalculated-cohort", key="id", value=cohort.pk)
        ]

    # Cohort can have multiple match groups.
    # Each group is either
    # 1. "user has done X in time range Y at least N times" or
    # 2. "user has properties XYZ", including belonging to another cohort
    #
    # Users who match _any_ of the groups are considered to match the cohort.
    group_filters: List[List[Property]] = []
    for group in cohort.groups:
        if group.get("action_id") or group.get("event_id"):
            # :TODO: Support hasdone as separate property type
            return [Property(type="cohort", key="id", value=cohort.pk)]
        elif group.get("properties"):
            # :TRICKY: This will recursively simplify all the properties
            # :TRICKY: cohort groups will only contain 1 level deep properties which means we can use _property_groups_flat to return
            # TODO: Update this when cohort groups use property_groups
            filter = Filter(data=group, team=team)
            group_filters.append(filter.property_groups.flat)

    if len(group_filters) > 1:
        # :TODO: Support or properties
        return [Property(type="cohort", key="id", value=cohort.pk)]
    elif len(group_filters) == 1:
        return group_filters[0]
    else:
        return []
Пример #12
0
    def _calculate_entity_people(self, team: Team, entity: Entity,
                                 filter: Filter):
        parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter,
                                                               team_id=team.pk)
        entity_sql, entity_params = format_entity_filter(entity=entity)
        person_filter = ""
        person_filter_params: Dict[str, Any] = {}

        if filter.breakdown_type == "cohort" and filter.breakdown_value != "all":
            cohort = Cohort.objects.get(pk=filter.breakdown_value)
            person_filter, person_filter_params = format_filter_query(cohort)
            person_filter = "AND distinct_id IN ({})".format(person_filter)
        elif (filter.breakdown_type == "person"
              and isinstance(filter.breakdown, str)
              and isinstance(filter.breakdown_value, str)):
            person_prop = Property(
                **{
                    "key": filter.breakdown,
                    "value": filter.breakdown_value,
                    "type": "person"
                })
            filter.properties.append(person_prop)

        prop_filters, prop_filter_params = parse_prop_clauses(
            filter.properties, team.pk)
        params: Dict = {
            "team_id": team.pk,
            **prop_filter_params,
            **entity_params, "offset": filter.offset
        }

        content_sql = PERSON_TREND_SQL.format(
            entity_filter=f"AND {entity_sql}",
            parsed_date_from=parsed_date_from,
            parsed_date_to=parsed_date_to,
            filters=prop_filters,
            breakdown_filter="",
            person_filter=person_filter,
        )

        people = sync_execute(
            PEOPLE_THROUGH_DISTINCT_SQL.format(
                content_sql=content_sql,
                latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")),
            {
                **params,
                **person_filter_params
            },
        )
        serialized_people = ClickhousePersonSerializer(people, many=True).data

        return serialized_people
Пример #13
0
def properties_to_Q(
    properties: List[Property], team_id: int, is_person_query: bool = False, filter_test_accounts: bool = False
) -> Q:
    """
    Converts a filter to Q, for use in Django ORM .filter()
    If you're filtering a Person QuerySet, use is_person_query to avoid doing an unnecessary nested loop
    """
    filters = Q()

    if filter_test_accounts:
        test_account_filters = Team.objects.only("test_account_filters").get(id=team_id).test_account_filters
        properties.extend([Property(**prop) for prop in test_account_filters])

    if len(properties) == 0:
        return filters

    if is_person_query:
        for property in properties:
            filters &= property.property_to_Q()
        return filters

    person_properties = [prop for prop in properties if prop.type == "person"]
    if len(person_properties) > 0:
        person_Q = Q()
        for property in person_properties:
            person_Q &= property.property_to_Q()
        filters &= Q(Exists(Person.objects.filter(person_Q, id=OuterRef("person_id"),).only("pk")))

    for property in [prop for prop in properties if prop.type == "event"]:
        filters &= property.property_to_Q()

    # importing from .event and .cohort below to avoid importing from partially initialized modules

    element_properties = [prop for prop in properties if prop.type == "element"]
    if len(element_properties) > 0:
        from posthog.models.event import Event

        filters &= Q(
            Exists(
                Event.objects.filter(pk=OuterRef("id"))
                .filter(
                    **Event.objects.filter_by_element(
                        {item.key: item.value for item in element_properties}, team_id=team_id,
                    )
                )
                .only("id")
            )
        )

    cohort_properties = [prop for prop in properties if prop.type == "cohort"]
    if len(cohort_properties) > 0:
        from posthog.models.cohort import CohortPeople

        for item in cohort_properties:
            if item.key == "id":
                cohort_id = int(cast(Union[str, int], item.value))
                filters &= Q(
                    Exists(
                        CohortPeople.objects.filter(cohort_id=cohort_id, person_id=OuterRef("person_id"),).only("id")
                    )
                )

    return filters
Пример #14
0
            distinct_id="whatever",
            properties={"attr": "50"},
        ),
        _create_event(
            event="$pageview",
            team=team,
            distinct_id="whatever",
            properties={"attr": 5},
        ),
    ]


@pytest.mark.parametrize(
    "property,expected_event_indexes",
    [
        (Property(key="email", value="*****@*****.**"), [0]),
        (Property(key="email", value="*****@*****.**",
                  operator="exact"), [0]),
        (Property(key="email",
                  value=["*****@*****.**", "*****@*****.**"],
                  operator="exact"), [1]),
        (Property(key="attr", value="5"), [4]),
        (Property(key="email", value="*****@*****.**",
                  operator="is_not"), range(1, 5)),
        (Property(key="email",
                  value=["*****@*****.**", "*****@*****.**"],
                  operator="is_not"), range(2, 5)),
        (Property(key="email", value=r".*est@.*", operator="regex"), [0]),
        (Property(key="email", value=r"?.", operator="regex"), []),
    ],
)
Пример #15
0
 def properties_all_match(predicate):
     return all(
         predicate(Property(**property))
         for condition in filters["groups"]
         for property in condition.get("properties", []))
Пример #16
0
 def recording_duration_filter(self) -> Optional[Property]:
     duration_filter_data_str = self._data.get(SESSION_RECORDINGS_FILTER_TYPE_DURATION, None)
     if duration_filter_data_str:
         filter_data = json.loads(duration_filter_data_str)
         return Property(**filter_data)
     return None
Пример #17
0
    def actor_query(self,
                    limit_actors: Optional[bool] = True) -> Tuple[str, Dict]:
        if self._filter.breakdown_type == "cohort" and self._filter.breakdown_value != "all":
            cohort = Cohort.objects.get(pk=self._filter.breakdown_value,
                                        team_id=self._team.pk)
            self._filter = self._filter.with_data({
                "properties":
                self._filter.property_groups.combine_properties(
                    PropertyOperatorType.AND,
                    [Property(key="id", value=cohort.pk, type="cohort")
                     ]).to_dict()
            })
        elif (self._filter.breakdown_type
              and isinstance(self._filter.breakdown, str)
              and isinstance(self._filter.breakdown_value, str)):
            if self._filter.breakdown_type == "group":
                breakdown_prop = Property(
                    key=self._filter.breakdown,
                    value=self._filter.breakdown_value,
                    type=self._filter.breakdown_type,
                    group_type_index=self._filter.breakdown_group_type_index,
                )
            else:
                breakdown_prop = Property(key=self._filter.breakdown,
                                          value=self._filter.breakdown_value,
                                          type=self._filter.breakdown_type)

            self._filter = self._filter.with_data({
                "properties":
                self._filter.property_groups.combine_properties(
                    PropertyOperatorType.AND, [breakdown_prop]).to_dict()
            })

        extra_fields: List[str] = [
            "distinct_id", "team_id"
        ] if not self.is_aggregating_by_groups else []
        if self._filter.include_recordings:
            extra_fields += ["uuid"]

        events_query, params = TrendsEventQuery(
            filter=self._filter,
            team=self._team,
            entity=self.entity,
            should_join_distinct_ids=not self.is_aggregating_by_groups,
            should_join_persons=not self.is_aggregating_by_groups,
            extra_event_properties=["$window_id", "$session_id"]
            if self._filter.include_recordings else [],
            extra_fields=extra_fields,
        ).get_query()

        matching_events_select_statement = (
            ", groupUniqArray(10)((timestamp, uuid, $session_id, $window_id)) as matching_events"
            if self._filter.include_recordings else "")

        return (
            GET_ACTORS_FROM_EVENT_QUERY.format(
                id_field=self._aggregation_actor_field,
                matching_events_select_statement=
                matching_events_select_statement,
                events_query=events_query,
                limit="LIMIT %(limit)s" if limit_actors else "",
                offset="OFFSET %(offset)s" if limit_actors else "",
            ),
            {
                **params, "offset": self._filter.offset,
                "limit": 200
            },
        )