def test_event_properties_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], "properties": [{ "key": "some_key", "value": "test_val", "operator": "exact", "type": "event" }], }) entity = Entity({"id": "viewed", "type": "events"}) global_prop_query, global_prop_query_params = TrendsEventQuery( filter=filter, entity=entity, team_id=self.team.pk).get_query() sync_execute(global_prop_query, global_prop_query_params) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], }) entity = Entity({ "id": "viewed", "type": "events", "properties": [{ "key": "some_key", "value": "test_val", "operator": "exact", "type": "event" }], }) entity_prop_query, entity_prop_query_params = TrendsEventQuery( filter=filter, entity=entity, team_id=self.team.pk).get_query() # global queries and enttiy queries should be the same self.assertEqual(sqlparse.format(global_prop_query, reindent=True), sqlparse.format(entity_prop_query, reindent=True)) sync_execute(entity_prop_query, entity_prop_query_params)
def test_person_properties_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], "properties": [ {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}, {"key": "key", "value": "val"}, ], } ) entity = Entity({"id": "viewed", "type": "events"}) self._run_query(filter, entity) entity = Entity( { "id": "viewed", "type": "events", "properties": [ {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}, {"key": "key", "value": "val"}, ], } ) filter = Filter( data={"date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [entity.to_dict()],} ) self._run_query(filter, entity)
def entities(self) -> List[Entity]: processed_entities: List[Entity] = [] if self._data.get(ACTIONS): actions = self._data.get(ACTIONS, []) if isinstance(actions, str): actions = json.loads(actions) processed_entities.extend([ Entity({ **entity, "type": TREND_FILTER_TYPE_ACTIONS }) for entity in actions ]) if self._data.get(EVENTS): events = self._data.get(EVENTS, []) if isinstance(events, str): events = json.loads(events) processed_entities.extend([ Entity({ **entity, "type": TREND_FILTER_TYPE_EVENTS }) for entity in events ]) processed_entities.sort( key=lambda entity: entity.order if entity.order else -1) # Set sequential index values on entities for index, entity in enumerate(processed_entities): entity.index = index return processed_entities
def preprocess_params(self, filter: Filter, total_intervals=11): period = filter.period or "Day" tdelta, t1 = self.determineTimedelta(total_intervals, period) filter._date_to = (filter.date_to + t1).isoformat() first_time_retention = filter.retention_type == RETENTION_FIRST_TIME if period == "Hour": date_to = filter.date_to date_from: datetime.datetime = date_to - tdelta elif period == "Week": date_to = filter.date_to.replace(hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta date_from = date_from - timedelta(days=date_from.isoweekday() % 7) else: date_to = filter.date_to.replace(hour=0, minute=0, second=0, microsecond=0) date_from = date_to - tdelta filter._date_from = date_from.isoformat() filter._date_to = date_to.isoformat() entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not filter.target_entity else filter.target_entity ) returning_entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not len(filter.entities) > 0 else filter.entities[0] ) # need explicit handling of date_from so it's not optional but also need filter object for date_filter_Q return filter, entity, returning_entity, first_time_retention, date_from, date_to
def test_event_properties_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], "properties": [{"key": "some_key", "value": "test_val", "operator": "exact", "type": "event"}], } ) entity = Entity({"id": "viewed", "type": "events"}) self._run_query(filter, entity) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], } ) entity = Entity( { "id": "viewed", "type": "events", "properties": [{"key": "some_key", "value": "test_val", "operator": "exact", "type": "event"}], } ) self._run_query(filter, entity)
def __init__(self, data: Optional[Dict[str, Any]] = None, request: Optional[HttpRequest] = None, **kwargs) -> None: if request: data = { **request.GET.dict(), **(data if data else {}), **({ PROPERTIES: json.loads(request.GET[PROPERTIES]) } if request.GET.get(PROPERTIES) else {}), ACTIONS: json.loads(request.GET.get(ACTIONS, "[]")), EVENTS: json.loads(request.GET.get(EVENTS, "[]")), } elif not data: raise ValueError( "You need to define either a data dict or a request") self._date_from = data.get(DATE_FROM) self._date_to = data.get(DATE_TO) self.entities = data.get(ENTITIES, []) self.properties = self._parse_properties(data.get(PROPERTIES)) self.selector = data.get(SELECTOR, []) self.interval = data.get(INTERVAL) self.selector = data.get(SELECTOR) self.shown_as = data.get(SHOWN_AS) self.breakdown = self._parse_breakdown(data) self.breakdown_type = data.get(BREAKDOWN_TYPE) self.breakdown_value = data.get(BREAKDOWN_VALUE) self._compare = data.get(COMPARE, "false") self.insight = data.get(INSIGHT, INSIGHT_TRENDS) self.session = data.get(SESSION) self.path_type = data.get(PATH_TYPE) self.start_point = data.get(START_POINT) self._offset = data.get(OFFSET) self.display = data[DISPLAY] if data.get( DISPLAY) else INSIGHT_TO_DISPLAY[self.insight] if data.get(ACTIONS): self.entities.extend([ Entity({ **entity, "type": TREND_FILTER_TYPE_ACTIONS }) for entity in data.get(ACTIONS, []) ]) if data.get(EVENTS): self.entities.extend([ Entity({ **entity, "type": TREND_FILTER_TYPE_EVENTS }) for entity in data.get(EVENTS, []) ]) self.entities = sorted(self.entities, key=lambda entity: entity.order if entity.order else -1)
def entities(self) -> List[Entity]: _entities: List[Entity] = [] if self._data.get(ACTIONS): actions = self._data.get(ACTIONS, []) if isinstance(actions, str): actions = json.loads(actions) _entities.extend([Entity({**entity, "type": TREND_FILTER_TYPE_ACTIONS}) for entity in actions]) if self._data.get(EVENTS): events = self._data.get(EVENTS, []) if isinstance(events, str): events = json.loads(events) _entities.extend([Entity({**entity, "type": TREND_FILTER_TYPE_EVENTS}) for entity in events]) return sorted(_entities, key=lambda entity: entity.order if entity.order else -1)
def _get_actor_subquery(self) -> Tuple[str, Dict[str, Any]]: if self.is_aggregating_by_groups: actor_join_subquery, actor_join_subquery_params = GroupsJoinQuery( self._filter, self._team.pk, join_key="funnel_actors.actor_id").get_join_query() else: person_query, actor_join_subquery_params = ClickhousePersonQuery( self._filter, self._team.pk, entity=Entity({ "id": "person", "type": "events", "properties": self._filter.correlation_property_values }), ).get_query() actor_join_subquery = f""" JOIN ({person_query}) person ON person.id = funnel_actors.actor_id """ return actor_join_subquery, actor_join_subquery_params
def target_entity(self) -> Entity: if self.entities: return self.entities[0] elif self.entityId and self.type: return Entity({"id": self.entityId, "type": self.type}) else: raise ValueError("An entity must be provided for stickiness target entity to be determined")
def test_person_query_does_not_include_recording_events_if_flag_not_set( self): _create_person(team_id=self.team.pk, distinct_ids=["u1"], properties={"email": "bla"}) _create_event(event="pageview", distinct_id="u1", team=self.team, timestamp=timezone.now()) event = { "id": "pageview", "name": "pageview", "type": "events", "order": 0, } filter = Filter( data={ "date_from": "2021-01-21T00:00:00Z", "date_to": "2021-01-22T00:00:00Z", "events": [event], }) entity = Entity(event) _, serialized_actors = ClickhouseTrendsActors(self.team, entity, filter).get_actors() self.assertEqual(serialized_actors[0].get("matched_recordings"), None)
def test_basic_event_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], } ) entity = Entity({"id": "viewed", "type": "events"}) query, params = ClickhouseEventQuery(filter, entity, self.team.pk).get_query() correct = """ SELECT e.timestamp as timestamp, e.properties as properties FROM events e WHERE team_id = %(team_id)s AND event = %(event)s AND timestamp >= '2021-05-01 00:00:00' AND timestamp <= '2021-05-07 23:59:59' """ self.assertEqual(sqlparse.format(query, reindent=True), sqlparse.format(correct, reindent=True)) sync_execute(query, params)
def insert_cohort_from_query(cohort_id: int, insight_type: str, filter_data: Dict[str, Any], entity_data: Dict[str, Any]) -> None: if is_clickhouse_enabled(): from ee.clickhouse.queries.clickhouse_stickiness import insert_stickiness_people_into_cohort from ee.clickhouse.queries.util import get_earliest_timestamp from ee.clickhouse.views.actions import insert_entity_people_into_cohort from ee.clickhouse.views.cohort import insert_cohort_people_into_pg from posthog.models.entity import Entity from posthog.models.filters.filter import Filter from posthog.models.filters.stickiness_filter import StickinessFilter cohort = Cohort.objects.get(pk=cohort_id) entity = Entity(data=entity_data) if insight_type == INSIGHT_STICKINESS: _stickiness_filter = StickinessFilter( data=filter_data, team=cohort.team, get_earliest_timestamp=get_earliest_timestamp) insert_stickiness_people_into_cohort(cohort, entity, _stickiness_filter) else: _filter = Filter(data=filter_data) insert_entity_people_into_cohort(cohort, entity, _filter) insert_cohort_people_into_pg(cohort=cohort)
def _format_lifecycle_query(self, entity: Entity, filter: Filter, team_id: int) -> Tuple[str, Dict, Callable]: date_from = filter.date_from if not date_from: date_from = get_earliest_timestamp(team_id) interval = filter.interval or "day" num_intervals, seconds_in_interval, _ = get_time_diff( interval, filter.date_from, filter.date_to, team_id) interval_increment, interval_string, sub_interval_string = self.get_interval( interval) trunc_func = get_trunc_func_ch(interval) event_query = "" event_params: Dict[str, Any] = {} props_to_filter = [*filter.properties, *entity.properties] prop_filters, prop_filter_params = parse_prop_clauses( props_to_filter, team_id, filter_test_accounts=filter.filter_test_accounts) _, _, date_params = parse_timestamps(filter=filter, team_id=team_id) if entity.type == TREND_FILTER_TYPE_ACTIONS: try: action = entity.get_action() event_query, event_params = format_action_filter(action) except: return "", {}, self._parse_result(filter, entity) else: event_query = "event = %(event)s" event_params = {"event": entity.id} return ( LIFECYCLE_SQL.format( interval=interval_string, trunc_func=trunc_func, event_query=event_query, filters=prop_filters, sub_interval=sub_interval_string, GET_TEAM_PERSON_DISTINCT_IDS=GET_TEAM_PERSON_DISTINCT_IDS, ), { "team_id": team_id, "prev_date_from": (date_from - interval_increment).strftime("%Y-%m-%d{}".format( " %H:%M:%S" if filter.interval == "hour" or filter.interval == "minute" else " 00:00:00")), "num_intervals": num_intervals, "seconds_in_interval": seconds_in_interval, **event_params, **date_params, **prop_filter_params, }, self._parse_result(filter, entity), )
def test_static_cohort_filter(self): cohort = _create_cohort(team=self.team, name="cohort1", groups=[], is_static=True) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [ { "id": "viewed", "order": 0 }, ], "properties": [{ "key": "id", "value": cohort.pk, "type": "cohort" }], }) entity = Entity({ "id": "viewed", "type": "events", }) query, params = TrendsEventQuery(filter=filter, entity=entity, team_id=self.team.pk).get_query() sync_execute(query, params)
def test_stickiness_people_endpoint(self): person1, _, _, person4 = self._create_multiple_people() watched_movie = action_factory(team=self.team, name="watch movie action", event_name="watched movie") filter = StickinessFilter( data={ "shown_as": "Stickiness", "stickiness_days": 1, "date_from": "2020-01-01", "date_to": "2020-01-08", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) target_entity = Entity({"id": watched_movie.id, "type": "actions"}) factory = APIRequestFactory() request = factory.get("/person/stickiness") people = stickiness().people(target_entity, filter, self.team, request) all_people_ids = [str(person["id"]) for person in people] self.assertListEqual(sorted(all_people_ids), sorted([str(person1.pk), str(person4.pk)]))
def test_stickiness_people_with_entity_filter(self): person1, _, _, _ = self._create_multiple_people() filter = StickinessFilter( data={ "shown_as": "Stickiness", "stickiness_days": 1, "date_from": "2020-01-01", "date_to": "2020-01-08", }, team=self.team, get_earliest_timestamp=get_earliest_timestamp, ) target_entity = Entity({ "id": "watched movie", "type": "events", "properties": [{ "key": "$browser", "value": "Chrome" }] }) factory = APIRequestFactory() request = factory.get("/person/stickiness") people = stickiness().people(target_entity, filter, self.team, request) self.assertEqual(len(people), 1) self.assertEqual(people[0]["id"], person1.id)
def people(self, request: Request, *args: Any, **kwargs: Any) -> Response: team = request.user.team filter = Filter(request=request) shown_as = request.GET.get("shown_as") if len(filter.entities) >= 1: entity = filter.entities[0] else: entity = Entity({ "id": request.GET["entityId"], "type": request.GET["type"] }) # adhoc date handling. parsed differently with django orm date_from = filter.date_from or timezone.now() if filter.interval == "month": filter._date_to = (date_from + relativedelta(months=1) - timedelta(days=1)).strftime("%Y-%m-%d %H:%M:%S") elif filter.interval == "week": filter._date_to = date_from + timedelta(weeks=1) elif filter.interval == "hour": filter._date_to = date_from + timedelta(hours=1) elif filter.interval == "minute": filter._date_to = date_from + timedelta(minutes=1) current_url = request.get_full_path() if shown_as is not None and shown_as == "Stickiness": stickiness_day = int(request.GET["stickiness_days"]) serialized_people = self._calculate_stickiness_entity_people( team, entity, filter, stickiness_day) else: serialized_people = self._calculate_entity_people( team, entity, filter) current_url = request.get_full_path() next_url: Optional[str] = request.get_full_path() offset = filter.offset if len(serialized_people) > 100 and next_url: if "offset" in next_url: next_url = next_url[1:] next_url = next_url.replace("offset=" + str(offset), "offset=" + str(offset + 100)) else: next_url = request.build_absolute_uri("{}{}offset={}".format( next_url, "&" if "?" in next_url else "?", offset + 100)) else: next_url = None return Response({ "results": [{ "people": serialized_people[0:100], "count": len(serialized_people[0:99]) }], "next": next_url, "previous": current_url[1:], })
def _parse_entity(self, entity_data) -> Optional[Entity]: if entity_data: if isinstance(entity_data, str): _data = json.loads(entity_data) else: _data = entity_data return Entity({"id": _data["id"], "type": _data["type"]}) return None
def test_equality_with_type(self): entity1 = Entity({"id": "e1", "type": TREND_FILTER_TYPE_EVENTS}) entity2 = Entity({"id": "e1", "type": TREND_FILTER_TYPE_EVENTS}) self.assertTrue(entity1.equals(entity2)) entity1 = Entity({"id": "e1", "type": TREND_FILTER_TYPE_EVENTS}) entity2 = Entity({"id": "e1", "type": TREND_FILTER_TYPE_ACTIONS}) self.assertFalse(entity1.equals(entity2))
def action_filter(self) -> Optional[Entity]: if self._data.get("action_filter") is not None: action_filter = cast(Union[str, Dict], self._data.get("action_filter")) action_filter = json.loads(action_filter) if isinstance( action_filter, str) else action_filter return Entity(action_filter) else: return None
def test_person_properties_filter(self): filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], "properties": [ {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}, {"key": "key", "value": "val"}, ], } ) entity = Entity({"id": "viewed", "type": "events"}) global_prop_query, global_prop_query_params = ClickhouseEventQuery(filter, entity, self.team.pk).get_query() sync_execute(global_prop_query, global_prop_query_params) filter = Filter( data={ "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-07 00:00:00", "events": [{"id": "viewed", "order": 0},], } ) entity = Entity( { "id": "viewed", "type": "events", "properties": [ {"key": "email", "value": "@posthog.com", "operator": "not_icontains", "type": "person"}, {"key": "key", "value": "val"}, ], } ) entity_prop_query, entity_prop_query_params = ClickhouseEventQuery(filter, entity, self.team.pk).get_query() # global queries and enttiy queries should be the same self.assertEqual( sqlparse.format(global_prop_query, reindent=True), sqlparse.format(entity_prop_query, reindent=True) ) sync_execute(entity_prop_query, entity_prop_query_params)
def correlation_person_entity(self) -> Optional["Entity"]: from posthog.models.entity import Entity raw_event = self._data.get(FUNNEL_CORRELATION_PERSON_ENTITY) if isinstance(raw_event, str): event = json.loads(raw_event) else: event = raw_event return Entity(event) if event else None
def _format_entity_filter(entity: Entity) -> Tuple[str, Dict]: if entity.type == TREND_FILTER_TYPE_ACTIONS: action = entity.get_action() action_query, params = format_action_filter(action) entity_filter = "AND {}".format(action_query) else: entity_filter = "AND event = %(event)s" params = {"event": entity.id} return entity_filter, params
def test_breakdown_by_group_props(self): self._create_groups() journey = { "person1": [ { "event": "sign up", "timestamp": datetime(2020, 1, 2, 12), "properties": {"$group_0": "org:5"}, "group0_properties": {"industry": "finance"}, }, { "event": "sign up", "timestamp": datetime(2020, 1, 2, 13), "properties": {"$group_0": "org:6"}, "group0_properties": {"industry": "technology"}, }, { "event": "sign up", "timestamp": datetime(2020, 1, 2, 15), "properties": {"$group_0": "org:7", "$group_1": "company:10"}, "group0_properties": {"industry": "finance"}, "group1_properties": {"industry": "finance"}, }, ], } journeys_for(events_by_person=journey, team=self.team) filter = Filter( data={ "date_from": "2020-01-01T00:00:00Z", "date_to": "2020-01-12", "breakdown": "industry", "breakdown_type": "group", "breakdown_group_type_index": 0, "events": [{"id": "sign up", "name": "sign up", "type": "events", "order": 0,}], } ) response = Trends().run(filter, self.team,) self.assertEqual(len(response), 2) self.assertEqual(response[0]["breakdown_value"], "finance") self.assertEqual(response[0]["count"], 2) self.assertEqual(response[1]["breakdown_value"], "technology") self.assertEqual(response[1]["count"], 1) filter = filter.with_data( {"breakdown_value": "technology", "date_from": "2020-01-02T00:00:00Z", "date_to": "2020-01-03"} ) entity = Entity({"id": "sign up", "name": "sign up", "type": "events", "order": 0,}) res = self._get_trend_people(filter, entity) self.assertEqual(res[0]["distinct_ids"], ["person1"])
def test_equality_with_old_style_and_new_style_properties(self): entity1 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": { "key": "value" } }) entity2 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": [ { "key": "key", "value": "value" }, ] }) self.assertTrue(entity1.equals(entity2))
def action_filters(self) -> List[Entity]: TYPE_MAPPING = { SESSIONS_FILTER_ACTION_TYPE: TREND_FILTER_TYPE_ACTIONS, SESSIONS_FILTER_EVENT_TYPE: TREND_FILTER_TYPE_EVENTS, } return [ Entity({ **filter, "id": filter["value"], "type": TYPE_MAPPING[filter["type"]] }) for filter in self._all_filters if filter["type"] in [SESSIONS_FILTER_ACTION_TYPE, SESSIONS_FILTER_EVENT_TYPE] ]
def test_inclusion(self): entity1 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": [ { "key": "email", "value": "*****@*****.**", "type": "person" }, { "key": "current_url", "value": "*****@*****.**", "type": "element" }, ], }) entity2 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": [ { "key": "current_url", "value": "*****@*****.**", "type": "element" }, ], }) self.assertTrue(entity2.is_superset(entity1)) self.assertFalse(entity1.is_superset(entity2))
def test_equality_with_simple_properties(self): entity1 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": [ { "key": "email", "value": "*****@*****.**", "type": "person" }, { "key": "current_url", "value": "*****@*****.**", "type": "element" }, ], }) entity2 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": [ { "key": "current_url", "value": "*****@*****.**", "type": "element" }, { "key": "email", "value": "*****@*****.**", "type": "person" }, ], }) self.assertTrue(entity1.equals(entity2)) entity2 = Entity({ "id": "e1", "type": TREND_FILTER_TYPE_EVENTS, "properties": [ { "key": "current$url", "value": "*****@*****.**", "type": "element" }, { "key": "email", "value": "*****@*****.**", "type": "person" }, ], }) self.assertFalse(entity1.equals(entity2))
def populate_entity_params(entity: Entity) -> Tuple[Dict, Dict]: params, content_sql_params = {}, {} if entity.type == TREND_FILTER_TYPE_ACTIONS: action = entity.get_action() action_query, action_params = format_action_filter(action) params = {**action_params} content_sql_params = { "entity_query": "AND {action_query}".format(action_query=action_query) } else: content_sql_params = {"entity_query": "AND event = %(event)s"} params = {"event": entity.id} return params, content_sql_params
def _serialize_entity(self, entity: Entity, filter: StickinessFilter, team: Team) -> List[Dict[str, Any]]: serialized: Dict[str, Any] = { "action": entity.to_dict(), "label": entity.name, "count": 0, "data": [], "labels": [], "days": [], } response = [] new_dict = copy.deepcopy(serialized) new_dict.update( self.stickiness(entity=entity, filter=filter, team=team)) response.append(new_dict) return response