def _get_props(self, filters: List[Property], allow_denormalized_props: bool = False) -> Tuple[str, Dict]: filter_test_accounts = self._filter.filter_test_accounts team_id = self._team_id table_name = f"{self.EVENT_TABLE_ALIAS}." prepend = "global" final = [] params: Dict[str, Any] = {} if filter_test_accounts: test_account_filters = Team.objects.only( "test_account_filters").get(id=team_id).test_account_filters filters.extend([Property(**prop) for prop in test_account_filters]) for idx, prop in enumerate(filters): if prop.type == "cohort": person_id_query, cohort_filter_params = self._get_cohort_subquery( prop) params = {**params, **cohort_filter_params} final.append(f"AND {person_id_query}") elif prop.type == "person": filter_query, filter_params = prop_filter_json_extract( prop, idx, "{}person".format(prepend), allow_denormalized_props=allow_denormalized_props, prop_var=self._PERSON_PROPERTIES_ALIAS, ) final.append(filter_query) params.update(filter_params) elif prop.type == "element": query, filter_params = filter_element( {prop.key: prop.value}, prepend="{}_".format(idx)) final.append("AND {}".format(query[0])) params.update(filter_params) else: filter_query, filter_params = prop_filter_json_extract( prop, idx, prepend, prop_var="properties", allow_denormalized_props=allow_denormalized_props, ) final.append(filter_query) params.update(filter_params) return " ".join(final), params
def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]: filters = [] params: Dict[str, Any] = {} for group_idx, group in enumerate(cohort.groups): if group.get("action_id"): action = Action.objects.get(pk=group["action_id"], team_id=cohort.team.pk) action_filter_query, action_params = format_action_filter(action) extract_person = "SELECT distinct_id FROM events WHERE uuid IN ({query})".format( query=action_filter_query) params = {**params, **action_params} filters.append("(" + extract_person + ")") elif group.get("properties"): from ee.clickhouse.models.property import prop_filter_json_extract filter = Filter(data=group) query = "" for idx, prop in enumerate(filter.properties): filter_query, filter_params = prop_filter_json_extract( prop=prop, idx=idx, prepend="{}_{}_{}_person".format(cohort.pk, group_idx, idx)) params = {**params, **filter_params} query += " {}".format(filter_query) filters.append(GET_LATEST_PERSON_ID_SQL.format(query=query)) joined_filter = " OR person_id IN ".join(filters) return joined_filter, params
def _get_group_filters(self): if self.is_aggregating_by_groups: conditions, params = [""], {} properties = self._filter.correlation_property_values if properties: for index, property in enumerate(properties): if property.type != "group": continue expr, prop_params = prop_filter_json_extract( property, index, prepend=f"group_type_{property.group_type_index}", prop_var= f"group_properties_{property.group_type_index}", allow_denormalized_props=True, ) conditions.append(expr) params.update(prop_params) return " ".join(conditions), params else: return "", {}
def format_person_query(cohort: Cohort) -> Tuple[str, Dict[str, Any]]: filters = [] params: Dict[str, Any] = {} if cohort.is_static: return ( "person_id IN (SELECT person_id FROM {} WHERE cohort_id = %(cohort_id)s AND team_id = %(team_id)s)" .format(PERSON_STATIC_COHORT_TABLE), { "cohort_id": cohort.pk, "team_id": cohort.team_id }, ) or_queries = [] for group_idx, group in enumerate(cohort.groups): if group.get("action_id"): action = Action.objects.get(pk=group["action_id"], team_id=cohort.team.pk) action_filter_query, action_params = format_action_filter( action, prepend="_{}_action".format(group_idx)) date_query: str = "" date_params: Dict[str, str] = {} if group.get("days"): date_query, date_params = parse_action_timestamps( int(group.get("days"))) extract_person = "SELECT distinct_id FROM events WHERE team_id = %(team_id)s {date_query} AND {query}".format( query=action_filter_query, date_query=date_query) params = {**params, **action_params, **date_params} filters.append("distinct_id IN (" + extract_person + ")") elif group.get("properties"): from ee.clickhouse.models.property import prop_filter_json_extract filter = Filter(data=group) query = "" for idx, prop in enumerate(filter.properties): filter_query, filter_params = prop_filter_json_extract( prop=prop, idx=idx, prepend="{}_{}_{}_person".format(cohort.pk, group_idx, idx)) params = {**params, **filter_params} query += filter_query or_queries.append(query.replace("AND ", "", 1)) if len(or_queries) > 0: query = "AND ({})".format(" OR ".join(or_queries)) filters.append("person_id IN {}".format( GET_LATEST_PERSON_ID_SQL.format(query=query))) joined_filter = " OR ".join(filters) return joined_filter, params
def test_prop_filter_json_extract(test_events, property, expected_event_indexes): query, params = prop_filter_json_extract(property, 0) uuids = list( sorted([ uuid for (uuid, ) in sync_execute( f"SELECT uuid FROM events WHERE 1 = 1 {query}", params) ])) expected = list( sorted([test_events[index] for index in expected_event_indexes])) assert uuids == expected
def get_properties_cohort_subquery(cohort: Cohort, cohort_group: Dict, group_idx: int): from ee.clickhouse.models.property import prop_filter_json_extract filter = Filter(data=cohort_group) params: Dict[str, Any] = {} query = "" for idx, prop in enumerate(filter.properties): filter_query, filter_params = prop_filter_json_extract( prop=prop, idx=idx, prepend="{}_{}_{}_person".format(cohort.pk, group_idx, idx)) params = {**params, **filter_params} query += filter_query return query.replace("AND ", "", 1), params
def test_prop_filter_json_extract(test_events, property, expected_event_indexes, team): query, params = prop_filter_json_extract(property, 0, allow_denormalized_props=False) uuids = list( sorted([ uuid for (uuid, ) in sync_execute( f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}", { "team_id": team.pk, **params }) ])) expected = list( sorted([test_events[index] for index in expected_event_indexes])) assert uuids == expected
def _get_person_filters(self) -> Tuple[str, Dict]: conditions, params = [""], {} properties = self._filter.properties + (self._entity.properties if self._entity else []) for index, property in enumerate(properties): if property.type != "person": continue expr, prop_params = prop_filter_json_extract( property, index, prepend="personquery", allow_denormalized_props=True, transform_expression=lambda column_name: f"argMax(person.{column_name}, _timestamp)", ) conditions.append(expr) params.update(prop_params) return " ".join(conditions), params
def get_properties_cohort_subquery( cohort: Cohort, cohort_group: Dict, group_idx: int) -> Tuple[str, Dict[str, Any]]: from ee.clickhouse.models.property import prop_filter_json_extract filter = Filter(data=cohort_group) params: Dict[str, Any] = {} query_parts = [] # Cohorts don't yet support OR filters for idx, prop in enumerate(filter.property_groups.flat): if prop.type == "cohort": try: prop_cohort: Cohort = Cohort.objects.get( pk=prop.value, team_id=cohort.team_id) except Cohort.DoesNotExist: return "0 = 14", {} if prop_cohort.pk == cohort.pk: # If we've encountered a cyclic dependency (meaning this cohort depends on this cohort), # we treat it as satisfied for all persons query_parts.append("AND 11 = 11") else: person_id_query, cohort_filter_params = format_filter_query( prop_cohort, idx, "person_id") params.update(cohort_filter_params) query_parts.append(f"AND person.id IN ({person_id_query})") else: filter_query, filter_params = prop_filter_json_extract( prop=prop, idx=idx, prepend="{}_{}_{}_person".format(cohort.pk, group_idx, idx), allow_denormalized_props=False, ) params.update(filter_params) query_parts.append(filter_query) return "\n".join(query_parts).replace("AND ", "", 1), params
def test_prop_filter_json_extract_materialized(test_events, property, expected_event_indexes, team): materialize("events", "attr") materialize("events", "email") query, params = prop_filter_json_extract(property, 0, allow_denormalized_props=True) assert "JSONExtract" not in query uuids = list( sorted([ uuid for (uuid, ) in sync_execute( f"SELECT uuid FROM events WHERE team_id = %(team_id)s {query}", { "team_id": team.pk, **params }) ])) expected = list( sorted([test_events[index] for index in expected_event_indexes])) assert uuids == expected