def _parse_properties(self, properties: Optional[Any]) -> List[Property]: if isinstance(properties, list): _properties = [] for prop_params in properties: if isinstance(prop_params, Property): _properties.append(prop_params) else: try: new_prop = Property(**prop_params) _properties.append(new_prop) except: continue return _properties if not properties: return [] # old style dict properties ret = [] for key, value in properties.items(): key_split = key.split("__") ret.append( Property( key=key_split[0], value=value, operator=key_split[1] if len(key_split) > 1 else None, type="event", )) return ret
def _process_content_sql(team: Team, entity: Entity, filter: Filter): filter = _handle_date_interval(filter) parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) entity_sql, entity_params = format_entity_filter(entity=entity) person_filter = "" person_filter_params: Dict[str, Any] = {} if filter.breakdown_type == "cohort" and filter.breakdown_value != "all": cohort = Cohort.objects.get(pk=filter.breakdown_value) person_filter, person_filter_params = format_filter_query(cohort) person_filter = "AND distinct_id IN ({})".format(person_filter) elif ( filter.breakdown_type == "person" and isinstance(filter.breakdown, str) and isinstance(filter.breakdown_value, str) ): person_prop = Property(**{"key": filter.breakdown, "value": filter.breakdown_value, "type": "person"}) filter.properties.append(person_prop) prop_filters, prop_filter_params = parse_prop_clauses(filter.properties, team.pk) params: Dict = {"team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset} content_sql = PERSON_TREND_SQL.format( entity_filter=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, ) return content_sql, {**params, **person_filter_params}
def uses_elements_chain(action: Action) -> bool: for action_step in action.steps.all(): if any(Property(**prop).type == "element" for prop in (action_step.properties or [])): return True if any(getattr(action_step, attribute) is not None for attribute in ["selector", "tag_name", "href", "text"]): return True return False
def _get_search_clause(self) -> Tuple[str, Dict]: if not isinstance(self._filter, Filter): return "", {} if self._filter.search: prop_group = PropertyGroup( type=PropertyOperatorType.AND, values=[Property(key="email", operator="icontains", value=self._filter.search, type="person")], ) search_clause, params = parse_prop_grouped_clauses( self._team_id, prop_group, prepend="search", has_person_id_joined=False, group_properties_joined=False, person_properties_mode=PersonPropertiesMode.DIRECT, _top_level=False, ) distinct_id_clause = """ id IN ( SELECT person_id FROM person_distinct_id where distinct_id = %(distinct_id)s ) """ params.update({"distinct_id": self._filter.search}) return f"AND (({search_clause}) OR ({distinct_id_clause}))", params return "", {}
def correlation_property_values(self) -> Optional[List[Property]]: # Used for property correlations persons _props = self._data.get(FUNNEL_CORRELATION_PROPERTY_VALUES) if not _props: return None if isinstance(_props, str): try: loaded_props = json.loads(_props) except json.decoder.JSONDecodeError: raise ValidationError("Properties are unparsable!") else: loaded_props = _props if isinstance(loaded_props, list): _properties = [] for prop_params in loaded_props: if isinstance(prop_params, Property): _properties.append(prop_params) else: try: new_prop = Property(**prop_params) _properties.append(new_prop) except: continue return _properties return None
def duration_filter_property(self) -> Optional[Property]: return next( (Property(**filter) for filter in self._all_filters if filter["type"] == SESSIONS_FILTER_RECORDING_TYPE and filter["key"] == "duration"), None, )
def parse_prop_clauses( filters: List[Property], team_id: Optional[int], prepend: str = "global", table_name: str = "", allow_denormalized_props: bool = False, filter_test_accounts=False, ) -> Tuple[str, Dict]: final = [] params: Dict[str, Any] = {} if team_id is not None: params["team_id"] = team_id if table_name != "": table_name += "." if filter_test_accounts: test_account_filters = Team.objects.only("test_account_filters").get( id=team_id).test_account_filters filters.extend([Property(**prop) for prop in test_account_filters]) for idx, prop in enumerate(filters): if prop.type == "cohort": cohort = Cohort.objects.get(pk=prop.value, team_id=team_id) person_id_query, cohort_filter_params = format_filter_query(cohort) params = {**params, **cohort_filter_params} final.append("AND {table_name}distinct_id IN ({clause})".format( table_name=table_name, clause=person_id_query)) elif prop.type == "person": filter_query, filter_params = prop_filter_json_extract( prop, idx, "{}person".format(prepend), allow_denormalized_props=allow_denormalized_props) final.append( "AND {table_name}distinct_id IN ({filter_query})".format( filter_query=GET_DISTINCT_IDS_BY_PROPERTY_SQL.format( filters=filter_query), table_name=table_name)) params.update(filter_params) elif prop.type == "element": query, filter_params = filter_element({prop.key: prop.value}, prepend="{}_".format(idx)) final.append("AND {}".format(query[0])) params.update(filter_params) else: filter_query, filter_params = prop_filter_json_extract( prop, idx, prepend, prop_var="{}properties".format(table_name), allow_denormalized_props=allow_denormalized_props, ) final.append( f"{filter_query} AND {table_name}team_id = %(team_id)s" if team_id else filter_query) params.update(filter_params) return " ".join(final), params
def _parse_properties(self, properties: Optional[Any]) -> List[Property]: if isinstance(properties, list): return [Property(**property) for property in properties] if not properties: return [] # old style dict properties ret = [] for key, value in properties.items(): key_split = key.split("__") ret.append( Property( key=key_split[0], value=value, operator=key_split[1] if len(key_split) > 1 else None, type="event", )) return ret
def person_filter_properties(self) -> List[Property]: if len(self.properties) > 0: # type: ignore return self.properties # type: ignore return [ Property(**filter) for filter in self._all_filters if filter["type"] in [SESSIONS_FILTER_COHORT_TYPE, SESSIONS_FILTER_PERSON_TYPE] ]
def _process_content_sql(team: Team, entity: Entity, filter: Filter): filter = _handle_date_interval(filter) parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) entity_sql, entity_params = format_entity_filter(entity=entity) person_filter = "" person_filter_params: Dict[str, Any] = {} if filter.breakdown_type == "cohort" and filter.breakdown_value != "all": cohort = Cohort.objects.get(pk=filter.breakdown_value) person_filter, person_filter_params = format_filter_query(cohort) person_filter = "AND distinct_id IN ({})".format(person_filter) elif filter.breakdown_type and isinstance( filter.breakdown, str) and isinstance(filter.breakdown_value, str): breakdown_prop = Property( **{ "key": filter.breakdown, "value": filter.breakdown_value, "type": filter.breakdown_type }) filter.properties.append(breakdown_prop) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk, filter_test_accounts=filter.filter_test_accounts) params: Dict = { "team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset } if entity.math in [WEEKLY_ACTIVE, MONTHLY_ACTIVE]: active_user_params = get_active_user_params(filter, entity, team.pk) content_sql = PERSONS_ACTIVE_USER_SQL.format( entity_query=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, **active_user_params, ) else: content_sql = PERSON_TREND_SQL.format( entity_filter=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, ) return content_sql, {**params, **person_filter_params}
def simplified_cohort_filter_properties(cohort: Cohort, team: Team) -> List[Property]: """ 'Simplifies' cohort property filters, removing team-specific context from properties. """ from ee.clickhouse.models.cohort import is_precalculated_query if cohort.is_static: return [Property(type="static-cohort", key="id", value=cohort.pk)] # Cohort has been precalculated if is_precalculated_query(cohort): return [ Property(type="precalculated-cohort", key="id", value=cohort.pk) ] # Cohort can have multiple match groups. # Each group is either # 1. "user has done X in time range Y at least N times" or # 2. "user has properties XYZ", including belonging to another cohort # # Users who match _any_ of the groups are considered to match the cohort. group_filters: List[List[Property]] = [] for group in cohort.groups: if group.get("action_id") or group.get("event_id"): # :TODO: Support hasdone as separate property type return [Property(type="cohort", key="id", value=cohort.pk)] elif group.get("properties"): # :TRICKY: This will recursively simplify all the properties # :TRICKY: cohort groups will only contain 1 level deep properties which means we can use _property_groups_flat to return # TODO: Update this when cohort groups use property_groups filter = Filter(data=group, team=team) group_filters.append(filter.property_groups.flat) if len(group_filters) > 1: # :TODO: Support or properties return [Property(type="cohort", key="id", value=cohort.pk)] elif len(group_filters) == 1: return group_filters[0] else: return []
def _calculate_entity_people(self, team: Team, entity: Entity, filter: Filter): parsed_date_from, parsed_date_to, _ = parse_timestamps(filter=filter, team_id=team.pk) entity_sql, entity_params = format_entity_filter(entity=entity) person_filter = "" person_filter_params: Dict[str, Any] = {} if filter.breakdown_type == "cohort" and filter.breakdown_value != "all": cohort = Cohort.objects.get(pk=filter.breakdown_value) person_filter, person_filter_params = format_filter_query(cohort) person_filter = "AND distinct_id IN ({})".format(person_filter) elif (filter.breakdown_type == "person" and isinstance(filter.breakdown, str) and isinstance(filter.breakdown_value, str)): person_prop = Property( **{ "key": filter.breakdown, "value": filter.breakdown_value, "type": "person" }) filter.properties.append(person_prop) prop_filters, prop_filter_params = parse_prop_clauses( filter.properties, team.pk) params: Dict = { "team_id": team.pk, **prop_filter_params, **entity_params, "offset": filter.offset } content_sql = PERSON_TREND_SQL.format( entity_filter=f"AND {entity_sql}", parsed_date_from=parsed_date_from, parsed_date_to=parsed_date_to, filters=prop_filters, breakdown_filter="", person_filter=person_filter, ) people = sync_execute( PEOPLE_THROUGH_DISTINCT_SQL.format( content_sql=content_sql, latest_person_sql=GET_LATEST_PERSON_SQL.format(query="")), { **params, **person_filter_params }, ) serialized_people = ClickhousePersonSerializer(people, many=True).data return serialized_people
def properties_to_Q( properties: List[Property], team_id: int, is_person_query: bool = False, filter_test_accounts: bool = False ) -> Q: """ Converts a filter to Q, for use in Django ORM .filter() If you're filtering a Person QuerySet, use is_person_query to avoid doing an unnecessary nested loop """ filters = Q() if filter_test_accounts: test_account_filters = Team.objects.only("test_account_filters").get(id=team_id).test_account_filters properties.extend([Property(**prop) for prop in test_account_filters]) if len(properties) == 0: return filters if is_person_query: for property in properties: filters &= property.property_to_Q() return filters person_properties = [prop for prop in properties if prop.type == "person"] if len(person_properties) > 0: person_Q = Q() for property in person_properties: person_Q &= property.property_to_Q() filters &= Q(Exists(Person.objects.filter(person_Q, id=OuterRef("person_id"),).only("pk"))) for property in [prop for prop in properties if prop.type == "event"]: filters &= property.property_to_Q() # importing from .event and .cohort below to avoid importing from partially initialized modules element_properties = [prop for prop in properties if prop.type == "element"] if len(element_properties) > 0: from posthog.models.event import Event filters &= Q( Exists( Event.objects.filter(pk=OuterRef("id")) .filter( **Event.objects.filter_by_element( {item.key: item.value for item in element_properties}, team_id=team_id, ) ) .only("id") ) ) cohort_properties = [prop for prop in properties if prop.type == "cohort"] if len(cohort_properties) > 0: from posthog.models.cohort import CohortPeople for item in cohort_properties: if item.key == "id": cohort_id = int(cast(Union[str, int], item.value)) filters &= Q( Exists( CohortPeople.objects.filter(cohort_id=cohort_id, person_id=OuterRef("person_id"),).only("id") ) ) return filters
distinct_id="whatever", properties={"attr": "50"}, ), _create_event( event="$pageview", team=team, distinct_id="whatever", properties={"attr": 5}, ), ] @pytest.mark.parametrize( "property,expected_event_indexes", [ (Property(key="email", value="*****@*****.**"), [0]), (Property(key="email", value="*****@*****.**", operator="exact"), [0]), (Property(key="email", value=["*****@*****.**", "*****@*****.**"], operator="exact"), [1]), (Property(key="attr", value="5"), [4]), (Property(key="email", value="*****@*****.**", operator="is_not"), range(1, 5)), (Property(key="email", value=["*****@*****.**", "*****@*****.**"], operator="is_not"), range(2, 5)), (Property(key="email", value=r".*est@.*", operator="regex"), [0]), (Property(key="email", value=r"?.", operator="regex"), []), ], )
def properties_all_match(predicate): return all( predicate(Property(**property)) for condition in filters["groups"] for property in condition.get("properties", []))
def recording_duration_filter(self) -> Optional[Property]: duration_filter_data_str = self._data.get(SESSION_RECORDINGS_FILTER_TYPE_DURATION, None) if duration_filter_data_str: filter_data = json.loads(duration_filter_data_str) return Property(**filter_data) return None
def actor_query(self, limit_actors: Optional[bool] = True) -> Tuple[str, Dict]: if self._filter.breakdown_type == "cohort" and self._filter.breakdown_value != "all": cohort = Cohort.objects.get(pk=self._filter.breakdown_value, team_id=self._team.pk) self._filter = self._filter.with_data({ "properties": self._filter.property_groups.combine_properties( PropertyOperatorType.AND, [Property(key="id", value=cohort.pk, type="cohort") ]).to_dict() }) elif (self._filter.breakdown_type and isinstance(self._filter.breakdown, str) and isinstance(self._filter.breakdown_value, str)): if self._filter.breakdown_type == "group": breakdown_prop = Property( key=self._filter.breakdown, value=self._filter.breakdown_value, type=self._filter.breakdown_type, group_type_index=self._filter.breakdown_group_type_index, ) else: breakdown_prop = Property(key=self._filter.breakdown, value=self._filter.breakdown_value, type=self._filter.breakdown_type) self._filter = self._filter.with_data({ "properties": self._filter.property_groups.combine_properties( PropertyOperatorType.AND, [breakdown_prop]).to_dict() }) extra_fields: List[str] = [ "distinct_id", "team_id" ] if not self.is_aggregating_by_groups else [] if self._filter.include_recordings: extra_fields += ["uuid"] events_query, params = TrendsEventQuery( filter=self._filter, team=self._team, entity=self.entity, should_join_distinct_ids=not self.is_aggregating_by_groups, should_join_persons=not self.is_aggregating_by_groups, extra_event_properties=["$window_id", "$session_id"] if self._filter.include_recordings else [], extra_fields=extra_fields, ).get_query() matching_events_select_statement = ( ", groupUniqArray(10)((timestamp, uuid, $session_id, $window_id)) as matching_events" if self._filter.include_recordings else "") return ( GET_ACTORS_FROM_EVENT_QUERY.format( id_field=self._aggregation_actor_field, matching_events_select_statement= matching_events_select_statement, events_query=events_query, limit="LIMIT %(limit)s" if limit_actors else "", offset="OFFSET %(offset)s" if limit_actors else "", ), { **params, "offset": self._filter.offset, "limit": 200 }, )