def wrapper(*args, **kwargs): # prepare caching params request: HttpRequest = args[1] team: Team = cast(User, request.user).team if cache_type == CacheType.TRENDS: filter = Filter(request=request) cache_key = generate_cache_key(filter.toJSON() + "_" + str(team.pk)) payload = {"filter": filter.toJSON(), "team_id": team.pk} elif cache_type == CacheType.FUNNEL: pk = args[2] cache_key = generate_cache_key("funnel_{}_{}".format( pk, team.pk)) payload = {"funnel_id": pk, "team_id": team.pk} else: raise ValueError("Invalid cache type!") # return cached result if possible if not request.GET.get("refresh", False): cached_result = cache.get(cache_key) if cached_result: return cached_result["result"] # call function being wrapped result = f(*args, **kwargs) # cache new data if result is not None: cache.set( cache_key, { "result": result, "details": payload, "type": cache_type, }, expiry_seconds, ) return result
def _test_refresh_dashboard_cache_types( self, filter: FilterType, cache_type: CacheType, patch_update_cache_item: MagicMock, ) -> None: self._create_dashboard(filter) update_cached_items() expected_args = [ generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), cache_type, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ] patch_update_cache_item.assert_any_call(*expected_args) update_cache_item(*expected_args) # type: ignore item_key = generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)) self.assertIsNotNone(get_safe_cache(item_key))
def update_cached_items() -> None: tasks = [] items = (DashboardItem.objects.filter( Q( Q(dashboard__is_shared=True) | Q(dashboard__last_accessed_at__gt=timezone.now() - relativedelta(days=7)))).exclude(refreshing=True).exclude( deleted=True)) for item in items.filter(filters__isnull=False).exclude( filters={}).distinct("filters"): filter = Filter(data=item.filters) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), item.team_id)) payload = {"filter": filter.toJSON(), "team_id": item.team_id} tasks.append( update_cache_item_task.s(cache_key, TRENDS_ENDPOINT, payload)) for item in items.filter(funnel_id__isnull=False).distinct("funnel_id"): cache_key = generate_cache_key("funnel_{}_{}".format( item.funnel_id, item.team_id)) payload = {"funnel_id": item.funnel_id, "team_id": item.team_id} tasks.append( update_cache_item_task.s(cache_key, FUNNEL_ENDPOINT, payload)) logger.info("Found {} items to refresh".format(len(tasks))) taskset = group(tasks) taskset.apply_async()
def test_stickiness_regression(self, patch_update_cache_item: MagicMock, patch_apply_async: MagicMock) -> None: # We moved Stickiness from being a "shown_as" item to its own insight # This move caused issues hence a regression test filter_stickiness = StickinessFilter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], "date_from": "2012-01-10", "date_to": "2012-01-15", "insight": INSIGHT_STICKINESS, "shown_as": "Stickiness", }, team=self.team, get_earliest_timestamp=Event.objects.earliest_timestamp, ) filter = Filter( data={ "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], "date_from": "2012-01-10", "date_to": "2012-01-15", } ) shared_dashboard = Dashboard.objects.create(team=self.team, is_shared=True) DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter_stickiness.to_dict(), team=self.team) DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter.to_dict(), team=self.team) item_stickiness_key = generate_cache_key(filter_stickiness.toJSON() + "_" + str(self.team.pk)) item_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk)) update_cached_items() for call_item in patch_update_cache_item.call_args_list: update_cache_item(*call_item[0]) self.assertEqual( get_safe_cache(item_stickiness_key)["result"][0]["labels"], ["1 day", "2 days", "3 days", "4 days", "5 days", "6 days"], ) self.assertEqual( get_safe_cache(item_key)["result"][0]["labels"], [ "Tue. 10 January", "Wed. 11 January", "Thu. 12 January", "Fri. 13 January", "Sat. 14 January", "Sun. 15 January", ], )
def test_update_cache_item_calls_right_funnel_class(self, funnel_mock: MagicMock) -> None: # basic funnel filter = Filter( data={ "insight": "FUNNELS", "events": [ {"id": "$pageview", "order": 0, "type": "events"}, {"id": "$pageview", "order": 1, "type": "events"}, ], } ) dashboard_item = self._create_dashboard(filter) funnel_mock.return_value.run.return_value = {} with self.settings(EE_AVAILABLE=False): update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.FUNNEL, {"filter": filter.toJSON(), "team_id": self.team.pk,}, ) updated_dashboard_item = DashboardItem.objects.get(pk=dashboard_item.pk) self.assertEqual(updated_dashboard_item.refreshing, False) self.assertEqual(updated_dashboard_item.last_refresh, now()) funnel_mock.assert_called_once()
def test_return_cached_results(self): dashboard = Dashboard.objects.create(team=self.team, name="dashboard") filter_dict = { "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], } filter = Filter(data=filter_dict) item = Insight.objects.create(dashboard=dashboard, filters=filter_dict, team=self.team,) Insight.objects.create( dashboard=dashboard, filters=filter.to_dict(), team=self.team, ) response = self.client.get(f"/api/projects/{self.team.id}/dashboards/%s/" % dashboard.pk).json() self.assertEqual(response["items"][0]["result"], None) # cache results response = self.client.get( f"/api/projects/{self.team.id}/insights/trend/?events=%s&properties=%s" % (json.dumps(filter_dict["events"]), json.dumps(filter_dict["properties"])) ) self.assertEqual(response.status_code, 200) item = Insight.objects.get(pk=item.pk) self.assertAlmostEqual(item.last_refresh, now(), delta=timezone.timedelta(seconds=5)) self.assertEqual(item.filters_hash, generate_cache_key(f"{filter.toJSON()}_{self.team.pk}")) with self.assertNumQueries(13): # Django session, PostHog user, PostHog team, PostHog org membership, PostHog dashboard, # PostHog dashboard item, PostHog team, PostHog dashboard item UPDATE, PostHog team, # PostHog dashboard item UPDATE, PostHog dashboard UPDATE, PostHog dashboard item, Posthog org tags response = self.client.get(f"/api/projects/{self.team.id}/dashboards/%s/" % dashboard.pk).json() self.assertAlmostEqual(Dashboard.objects.get().last_accessed_at, now(), delta=timezone.timedelta(seconds=5)) self.assertEqual(response["items"][0]["result"][0]["count"], 0)
def _test_refresh_dashboard_cache_types( self, filter: FilterType, patch_update_cache_item: MagicMock, patch_apply_async: MagicMock, ) -> None: dashboard_to_cache = Dashboard.objects.create(team=self.team, is_shared=True, last_accessed_at=now()) DashboardItem.objects.create( dashboard=dashboard_to_cache, filters=filter.to_dict(), team=self.team, last_refresh=now() - timedelta(days=30), ) update_cached_items() for call_item in patch_update_cache_item.call_args_list: update_cache_item(*call_item[0]) item_key = generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)) self.assertIsNotNone(cache.get(item_key))
def calculate_funnel(self, request: request.Request) -> Dict[str, Any]: team = self.team refresh = should_refresh(request) filter = Filter(request=request, data={ **request.data, "insight": INSIGHT_FUNNELS }) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), team.pk)) result = {"loading": True} if refresh: cache.delete(cache_key) else: cached_result = get_safe_cache(cache_key) if cached_result: task_id = cached_result.get("task_id", None) if not task_id: return {"result": cached_result["result"]} else: return {"result": result} payload = {"filter": filter.toJSON(), "team_id": team.pk} task = update_cache_item_task.delay(cache_key, CacheType.FUNNEL, payload) if not task.ready(): task_id = task.id cache.set(cache_key, {"task_id": task_id}, 180) # task will be live for 3 minutes self._refresh_dashboard(request=request) return {"result": result}
def dashboard_item_saved(sender, instance: DashboardItem, **kwargs): if instance.filters and instance.filters != {}: filter = get_filter(data=instance.filters, team=instance.team) instance.filters = filter.to_dict() instance.filters_hash = generate_cache_key("{}_{}".format( filter.toJSON(), instance.team_id))
def calculate_funnel(self, request: request.Request) -> Dict[str, Any]: team = request.user.team refresh = request.GET.get("refresh", None) filter = Filter(request=request) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), team.pk)) result = {"loading": True} if refresh: cache.delete(cache_key) else: cached_result = cache.get(cache_key) if cached_result: task_id = cached_result.get("task_id", None) if not task_id: return cached_result["result"] else: return result payload = {"filter": filter.toJSON(), "team_id": team.pk} task = update_cache_item_task.delay(cache_key, FUNNEL_ENDPOINT, payload) task_id = task.id cache.set(cache_key, {"task_id": task_id}, 180) # task will be live for 3 minutes self._refresh_dashboard(request=request) return result
def test_update_cache_item_calls_right_class( self, patch_import_from: MagicMock) -> None: filter = Filter(data={ "insight": "TRENDS", "events": [{ "id": "$pageview" }] }) dashboard_item = self._create_dashboard(filter) with self.settings(EE_AVAILABLE=False): update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.TRENDS, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) patch_import_from.assert_called_once_with("posthog.queries.trends", "Trends") updated_dashboard_item = DashboardItem.objects.get( pk=dashboard_item.pk) self.assertEqual(updated_dashboard_item.refreshing, False) self.assertEqual(updated_dashboard_item.last_refresh, now())
def funnel(self, request: request.Request, *args: Any, **kwargs: Any) -> Response: team = self.team refresh = request.GET.get("refresh", None) dashboard_id = request.GET.get("from_dashboard", None) filter = Filter(request=request) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), team.pk)) result = {"loading": True} if refresh: cache.delete(cache_key) else: cached_result = get_safe_cache(cache_key) if cached_result: task_id = cached_result.get("task_id", None) if not task_id: return Response(cached_result["result"]) else: return Response(result) payload = {"filter": filter.toJSON(), "team_id": team.pk} task = update_cache_item_task.delay(cache_key, CacheType.FUNNEL, payload) task_id = task.id cache.set(cache_key, {"task_id": task_id}, 180) # task will be live for 3 minutes if dashboard_id: DashboardItem.objects.filter(pk=dashboard_id).update( last_refresh=now()) return Response(result)
def test_return_cached_results(self): dashboard = Dashboard.objects.create(team=self.team, name="dashboard") filter_dict = { "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], } filter = Filter(data=filter_dict) item = DashboardItem.objects.create(dashboard=dashboard, filters=filter_dict, team=self.team,) DashboardItem.objects.create( dashboard=dashboard, filters=filter.to_dict(), team=self.team, ) response = self.client.get("/api/dashboard/%s/" % dashboard.pk).json() self.assertEqual(response["items"][0]["result"], None) # cache results response = self.client.get( "/api/insight/trend/?events=%s&properties=%s" % (json.dumps(filter_dict["events"]), json.dumps(filter_dict["properties"])) ) self.assertEqual(response.status_code, 200) item = DashboardItem.objects.get(pk=item.pk) self.assertAlmostEqual(item.last_refresh, now(), delta=timezone.timedelta(seconds=5)) self.assertEqual(item.filters_hash, generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk))) with self.assertNumQueries(11): response = self.client.get("/api/dashboard/%s/" % dashboard.pk).json() self.assertAlmostEqual(Dashboard.objects.get().last_accessed_at, now(), delta=timezone.timedelta(seconds=5)) self.assertEqual(response["items"][0]["result"][0]["count"], 0)
def update_cached_items() -> None: tasks = [] items = (DashboardItem.objects.filter( Q( Q(dashboard__is_shared=True) | Q(dashboard__last_accessed_at__gt=timezone.now() - relativedelta(days=7)))).exclude( dashboard__deleted=True).exclude(refreshing=True).exclude( deleted=True)) for item in items.filter(filters__isnull=False).exclude( filters={}).distinct("filters"): filter = Filter(data=item.filters) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), item.team_id)) curr_data = cache.get(cache_key) # if task is logged and loading leave it alone if curr_data and curr_data.get("task_id", None): continue cache_type = CacheType.FUNNEL if filter.insight == "FUNNELS" else CacheType.TRENDS payload = {"filter": filter.toJSON(), "team_id": item.team_id} tasks.append(update_cache_item_task.s(cache_key, cache_type, payload)) logger.info("Found {} items to refresh".format(len(tasks))) taskset = group(tasks) taskset.apply_async()
def update_cached_items() -> None: tasks = [] items = ( DashboardItem.objects.filter( Q(Q(dashboard__is_shared=True) | Q(dashboard__last_accessed_at__gt=timezone.now() - relativedelta(days=7))) ) .exclude(dashboard__deleted=True) .exclude(refreshing=True) .exclude(deleted=True) .distinct("filters_hash") ) for item in DashboardItem.objects.filter( pk__in=Subquery(items.filter(filters__isnull=False).exclude(filters={}).distinct("filters").values("pk")) ).order_by(F("last_refresh").asc(nulls_first=True))[0:PARALLEL_DASHBOARD_ITEM_CACHE]: filter = get_filter(data=item.dashboard_filters(), team=item.team) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), item.team_id)) cache_type = get_cache_type(filter) payload = {"filter": filter.toJSON(), "team_id": item.team_id} tasks.append(update_cache_item_task.s(cache_key, cache_type, payload)) logger.info("Found {} items to refresh".format(len(tasks))) taskset = group(tasks) taskset.apply_async()
def test_refresh_dashboard_cache(self, patch_update_cache_item: MagicMock, patch_apply_async: MagicMock) -> None: # There's two things we want to refresh # Any shared dashboard, as we only use cached items to show those # Any dashboard accessed in the last 7 days filter_dict = { "events": [{"id": "$pageview"}], "properties": [{"key": "$browser", "value": "Mac OS X"}], } filter = Filter(data=filter_dict) shared_dashboard = Dashboard.objects.create(team=self.team, is_shared=True) funnel_filter = Filter(data={"events": [{"id": "user signed up", "type": "events", "order": 0},],}) item = DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter.to_dict(), team=self.team) funnel_item = DashboardItem.objects.create( dashboard=shared_dashboard, filters=funnel_filter.to_dict(), team=self.team ) dashboard_to_cache = Dashboard.objects.create(team=self.team, is_shared=True, last_accessed_at=now()) item_to_cache = DashboardItem.objects.create( dashboard=dashboard_to_cache, filters=Filter(data={"events": [{"id": "cache this"}]}).to_dict(), team=self.team, ) dashboard_do_not_cache = Dashboard.objects.create( team=self.team, is_shared=True, last_accessed_at="2020-01-01T12:00:00Z" ) item_do_not_cache = DashboardItem.objects.create( dashboard=dashboard_do_not_cache, filters=Filter(data={"events": [{"id": "do not cache this"}]}).to_dict(), team=self.team, ) item_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk)) funnel_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk)) update_cached_items() # pass the caught calls straight to the function # we do this to skip Redis for call_item in patch_update_cache_item.call_args_list: update_cache_item(*call_item[0]) self.assertIsNotNone(DashboardItem.objects.get(pk=item.pk).last_refresh) self.assertIsNotNone(DashboardItem.objects.get(pk=item_to_cache.pk).last_refresh) self.assertIsNotNone(DashboardItem.objects.get(pk=item_do_not_cache.pk).last_refresh) self.assertEqual(get_safe_cache(item_key)["result"][0]["count"], 0) self.assertEqual(get_safe_cache(funnel_key)["result"][0]["count"], 0)
def dashboard_item_saved(sender, instance: Insight, dashboard=None, **kwargs): if instance.filters and instance.filters != {}: filter = get_filter( data=instance.dashboard_filters(dashboard=dashboard), team=instance.team) instance.filters_hash = generate_cache_key("{}_{}".format( filter.toJSON(), instance.team_id))
def wrapper(*args, **kw): from posthog.celery import update_cache_item_task cache_key = "" # prepare caching params request = args[1] team = request.user.team_set.get() payload = None dashboard_item_id = None refresh = request.GET.get("refresh", None) if cache_type == TRENDS_ENDPOINT: filter = Filter(request=request) cache_key = generate_cache_key(filter.toJSON() + "_" + str(team.pk)) payload = {"filter": filter.toJSON(), "team_id": team.pk} elif cache_type == FUNNEL_ENDPOINT: pk = args[2] cache_key = generate_cache_key("funnel_{}_{}".format( pk, team.pk)) payload = {"funnel_id": pk, "team_id": team.pk} if not refresh: # return result if cached cached_result = cache.get(cache_key) if cached_result: return cached_result["result"] # call wrapped function result = f(*args, **kw) # cache new data using if result and payload: cache.set( cache_key, { "result": result, "details": payload, "type": cache_type, }, expiry, ) return result
def get_result(self, dashboard_item: DashboardItem): if not dashboard_item.filters: return None filter = Filter(data=dashboard_item.filters) cache_key = generate_cache_key(filter.toJSON() + "_" + str(dashboard_item.team_id)) result = cache.get(cache_key) if not result or result.get("task_id", None): return None return result["result"]
def wrapper(*args, **kwargs): # prepare caching params request: HttpRequest = args[1] team = cast(User, request.user).team filter = None if not team: return f(*args, **kwargs) if cache_type == CacheType.TRENDS: filter = Filter(request=request) cache_key = generate_cache_key(filter.toJSON() + "_" + str(team.pk)) payload = {"filter": filter.toJSON(), "team_id": team.pk} elif cache_type == CacheType.FUNNEL: pk = args[2] cache_key = generate_cache_key("funnel_{}_{}".format( pk, team.pk)) payload = {"funnel_id": pk, "team_id": team.pk} else: raise ValueError("Invalid cache type!") # return cached result if possible if not request.GET.get("refresh", False): cached_result = cache.get(cache_key) if cached_result: return cached_result["result"] # call function being wrapped result = f(*args, **kwargs) # cache new data if result is not None: cache.set( cache_key, { "result": result, "details": payload, "type": cache_type, }, CACHED_RESULTS_TTL, ) if filter: dashboard_items = DashboardItem.objects.filter( team_id=team.pk, filters_hash=cache_key) dashboard_items.update(last_refresh=now()) return result
def dashboard_item_update_task_params( item: DashboardItem, dashboard: Optional[Dashboard] = None ) -> Tuple[str, CacheType, Dict]: filter = get_filter(data=item.dashboard_filters(dashboard), team=item.team) cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), item.team_id)) cache_type = get_cache_type(filter) payload = {"filter": filter.toJSON(), "team_id": item.team_id} return cache_key, cache_type, payload
def test_update_cache_item_calls_right_class(self) -> None: filter = Filter(data={ "insight": "TRENDS", "events": [{ "id": "$pageview" }] }) dashboard_item = self._create_dashboard(filter) update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.TRENDS, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) updated_dashboard_item = Insight.objects.get(pk=dashboard_item.pk) self.assertEqual(updated_dashboard_item.refreshing, False) self.assertEqual(updated_dashboard_item.last_refresh, now())
def test_insights_old_filter(self) -> None: # Some filters hashes are wrong (likely due to changes in our filters models) and previously we would not save changes to those insights and constantly retry them. dashboard = Dashboard.objects.create(team=self.team, is_shared=True) filter = {"events": [{"id": "$pageview"}]} item = Insight.objects.create(dashboard=dashboard, filters=filter, filters_hash="cache_thisiswrong", team=self.team) Insight.objects.all().update(filters_hash="cache_thisiswrong") self.assertEquals(Insight.objects.get().filters_hash, "cache_thisiswrong") update_cached_items() self.assertEquals( Insight.objects.get().filters_hash, generate_cache_key("{}_{}".format( Filter(data=filter).toJSON(), self.team.pk)), ) self.assertEquals(Insight.objects.get().last_refresh.isoformat(), "2021-08-25T22:09:14.252000+00:00")
def wrapper(*args, **kwargs) -> Dict[str, Union[List, datetime, bool, str]]: # prepare caching params request: HttpRequest = args[1] team = cast(User, request.user).team filter = None if not team: return f(*args, **kwargs) filter = get_filter(request=request, team=team) cache_key = generate_cache_key("{}_{}".format( filter.toJSON(), team.pk)) # return cached result if possible if not request.GET.get("refresh", False): cached_result = get_safe_cache(cache_key) if cached_result and cached_result.get("result"): return {**cached_result, "is_cached": True} # call function being wrapped result = f(*args, **kwargs) # cache new data if result is not None and not ( isinstance(result.get("result"), dict) and result["result"].get("loading")): cache.set( cache_key, { "result": result["result"], "last_refresh": now() }, TEMP_CACHE_RESULTS_TTL, ) if filter: dashboard_items = DashboardItem.objects.filter( team_id=team.pk, filters_hash=cache_key) dashboard_items.update(last_refresh=now()) return result
def wrapper(*args, **kwargs): # prepare caching params request: HttpRequest = args[1] team = cast(User, request.user).team filter = None if not team: return f(*args, **kwargs) filter = get_filter(request=request, team=team) cache_key = generate_cache_key("{}_{}".format( filter.toJSON(), team.pk)) payload = {"filter": filter.toJSON(), "team_id": team.pk} # return cached result if possible if not request.GET.get("refresh", False): cached_result = cache.get(cache_key) if cached_result and cached_result.get("result"): return cached_result["result"] # call function being wrapped result = f(*args, **kwargs) # cache new data if result is not None and (not isinstance(result, dict) or not result.get("loading")): cache.set( cache_key, { "result": result, "details": payload, }, CACHED_RESULTS_TTL, ) if filter: dashboard_items = DashboardItem.objects.filter( team_id=team.pk, filters_hash=cache_key) dashboard_items.update(last_refresh=now()) return result
def _execute_sql( self, filter: RetentionFilter, team: Team, ) -> Dict[Tuple[int, int], Dict[str, Any]]: period = filter.period is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME events: QuerySet = QuerySet() entity_condition, entity_condition_strigified = self.get_entity_condition( filter.target_entity, "first_event_date") returning_condition, returning_condition_stringified = self.get_entity_condition( filter.returning_entity, "events") events = Event.objects.filter(team_id=team.pk).add_person_id( team.pk).annotate(event_date=F("timestamp")) trunc, fields = self._get_trunc_func("timestamp", period) if is_first_time_retention: filtered_events = events.filter( filter.properties_to_Q(team_id=team.pk)) first_date = (filtered_events.filter(entity_condition).values( "person_id", "event", "action").annotate(first_date=Min(trunc)).filter( filter.custom_date_filter_Q("first_date")).distinct()) final_query = (filtered_events.filter( filter.date_filter_Q).filter(returning_condition).values_list( "person_id", "event_date", "event", "action").union( first_date.values_list("first_date", "person_id", "event", "action"))) else: filtered_events = events.filter(filter.date_filter_Q).filter( filter.properties_to_Q(team_id=team.pk)) first_date = (filtered_events.filter(entity_condition).annotate( first_date=trunc).values("first_date", "person_id", "event", "action").distinct()) final_query = ( filtered_events.filter(returning_condition).values_list( "person_id", "event_date", "event", "action").union( first_date.values_list("first_date", "person_id", "event", "action"))) event_query, events_query_params = final_query.query.sql_with_params() reference_event_query, first_date_params = first_date.query.sql_with_params( ) final_query = """ SELECT {fields} COUNT(DISTINCT "events"."person_id"), array_agg(DISTINCT "events"."person_id") as people FROM ({event_query}) events LEFT JOIN ({reference_event_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE event_date >= first_date AND {target_condition} AND {return_condition} OR ({target_condition} AND event_date = first_date) GROUP BY date, first_date """.format( event_query=event_query, reference_event_query=reference_event_query, fields=fields, return_condition=returning_condition_stringified, target_condition=entity_condition_strigified, ) event_params = (filter.target_entity.id, filter.returning_entity.id, filter.target_entity.id) start_params = ((filter.date_from, filter.date_from) if period == "Month" or period == "Hour" else (filter.date_from, )) with connection.cursor() as cursor: cursor.execute( final_query, start_params + events_query_params + first_date_params + event_params, ) data = namedtuplefetchall(cursor) scores: dict = {} for datum in data: key = round(datum.first_date, 1) if not scores.get(key, None): scores.update({key: {}}) for person in datum.people: if not scores[key].get(person, None): scores[key].update({person: 1}) else: scores[key][person] += 1 by_dates = {} for row in data: people = sorted( row.people, key=lambda p: scores[round(row.first_date, 1)][int(p)], reverse=True, ) random_key = "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) cache_key = generate_cache_key("{}{}{}".format( random_key, str(round(row.first_date, 0)), str(team.pk))) cache.set( cache_key, people, 600, ) by_dates.update({ (int(row.first_date), int(row.date)): { "count": row.count, "people": people[0:100], "offset": 100, "next": cache_key if len(people) > 100 else None, } }) return by_dates
def query_retention(self, filters: Filter, team) -> dict: events: QuerySet = QuerySet() entity = (Entity({ "id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS }) if not filters.target_entity else filters.target_entity) if entity.type == TREND_FILTER_TYPE_EVENTS: events = Event.objects.filter_by_event_with_people(event=entity.id, team_id=team.id) elif entity.type == TREND_FILTER_TYPE_ACTIONS: events = Event.objects.filter(action__pk=entity.id).add_person_id( team.id) filtered_events = events.filter(filters.date_filter_Q).filter( filters.properties_to_Q(team_id=team.pk)) first_date = (filtered_events.annotate( first_date=TruncDay("timestamp")).values("first_date", "person_id").distinct()) events_query, events_query_params = filtered_events.query.sql_with_params( ) first_date_query, first_date_params = first_date.query.sql_with_params( ) full_query = """ SELECT DATE_PART('days', first_date - %s) AS first_date, DATE_PART('days', timestamp - first_date) AS date, COUNT(DISTINCT "events"."person_id"), array_agg(DISTINCT "events"."person_id") as people FROM ({events_query}) events LEFT JOIN ({first_date_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE timestamp > first_date GROUP BY date, first_date """ full_query = full_query.format( events_query=events_query, first_date_query=first_date_query, event_date_query=TruncDay("timestamp"), ) with connection.cursor() as cursor: cursor.execute( full_query, (filters.date_from, ) + events_query_params + first_date_params, ) data = namedtuplefetchall(cursor) scores: dict = {} for datum in data: key = round(datum.first_date, 1) if not scores.get(key, None): scores.update({key: {}}) for person in datum.people: if not scores[key].get(person, None): scores[key].update({person: 1}) else: scores[key][person] += 1 by_dates = {} for row in data: people = sorted( row.people, key=lambda p: scores[round(row.first_date, 1)][int(p)], reverse=True, ) random_key = "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)) cache_key = generate_cache_key("{}{}{}".format( random_key, str(round(row.first_date, 0)), str(team.pk))) cache.set( cache_key, people, 600, ) by_dates.update({ (int(row.first_date), int(row.date)): { "count": row.count, "people": people[0:100], "offset": 100, "next": cache_key if len(people) > 100 else None, } }) return by_dates
def test_update_cache_item_calls_right_funnel_class_clickhouse( self, funnel_mock: MagicMock, funnel_trends_mock: MagicMock, funnel_time_to_convert_mock: MagicMock, funnel_strict_mock: MagicMock, funnel_unordered_mock: MagicMock, ) -> None: # basic funnel base_filter = Filter( data={ "insight": "FUNNELS", "events": [ { "id": "$pageview", "order": 0, "type": "events" }, { "id": "$pageview", "order": 1, "type": "events" }, ], }) with self.settings(EE_AVAILABLE=True, PRIMARY_DB="clickhouse"): filter = base_filter funnel_mock.return_value.run.return_value = {} update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.FUNNEL, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) funnel_mock.assert_called_once() # trends funnel filter = base_filter.with_data({"funnel_viz_type": "trends"}) funnel_trends_mock.return_value.run.return_value = {} update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.FUNNEL, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) funnel_trends_mock.assert_called_once() self.assertEqual( funnel_trends_mock.call_args[1]["funnel_order_class"], funnel_mock) funnel_trends_mock.reset_mock() # trends unordered funnel filter = base_filter.with_data({ "funnel_viz_type": "trends", "funnel_order_type": "unordered" }) funnel_trends_mock.return_value.run.return_value = {} update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.FUNNEL, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) funnel_trends_mock.assert_called_once() self.assertEqual( funnel_trends_mock.call_args[1]["funnel_order_class"], funnel_unordered_mock) funnel_trends_mock.reset_mock() # time to convert strict funnel filter = base_filter.with_data({ "funnel_viz_type": "time_to_convert", "funnel_order_type": "strict" }) funnel_time_to_convert_mock.return_value.run.return_value = {} update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.FUNNEL, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) funnel_time_to_convert_mock.assert_called_once() self.assertEqual( funnel_time_to_convert_mock.call_args[1]["funnel_order_class"], funnel_strict_mock) funnel_time_to_convert_mock.reset_mock() # strict funnel filter = base_filter.with_data({"funnel_order_type": "strict"}) funnel_strict_mock.return_value.run.return_value = {} update_cache_item( generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)), CacheType.FUNNEL, { "filter": filter.toJSON(), "team_id": self.team.pk, }, ) funnel_strict_mock.assert_called_once()
def test_cached_funnel(self): action_sign_up = Action.objects.create(team=self.team, name="signed up") ActionStep.objects.create(action=action_sign_up, tag_name="button", text="Sign up!") action_credit_card = Action.objects.create(team=self.team, name="paid") ActionStep.objects.create(action=action_credit_card, tag_name="button", text="Pay $10") action_play_movie = Action.objects.create(team=self.team, name="watched movie") ActionStep.objects.create(action=action_play_movie, tag_name="a", href="/movie") Action.objects.create(team=self.team, name="user logged out") [action.calculate_events() for action in Action.objects.all()] self.client.post( "/api/funnel/", data={ "name": "Whatever", "filters": { "events": [ { "id": "user signed up", "type": "events", "order": 0 }, ], "actions": [ { "id": action_sign_up.pk, "type": "actions", "order": 1 }, ], }, }, content_type="application/json", ).json() funnel = Funnel.objects.get() funnel_key = generate_cache_key("funnel_{}_{}".format( funnel.pk, self.team.pk)) # no refresh after getting self.client.get("/api/funnel/{}/".format(funnel.pk)).json() original_name = cache.get(funnel_key)["result"]["name"] self.client.patch("/api/funnel/{}/".format(funnel.pk), data={ "name": "Whatever2" }, content_type="application/json").json() self.client.get("/api/funnel/{}/".format(funnel.pk)).json() refreshed_name = cache.get(funnel_key)["result"]["name"] self.assertEqual("Whatever", refreshed_name) self.client.get("/api/funnel/{}/?refresh=true".format( funnel.pk)).json() refreshed_name = cache.get(funnel_key)["result"]["name"] self.assertEqual("Whatever2", refreshed_name)
def query_retention(self, filter: Filter, team) -> dict: period = filter.period events: QuerySet = QuerySet() entity = ( Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS}) if not filter.target_entity else filter.target_entity ) if entity.type == TREND_FILTER_TYPE_EVENTS: events = Event.objects.filter_by_event_with_people(event=entity.id, team_id=team.id) elif entity.type == TREND_FILTER_TYPE_ACTIONS: events = Event.objects.filter(action__pk=entity.id).add_person_id(team.id) filtered_events = events.filter(filter.date_filter_Q).filter(filter.properties_to_Q(team_id=team.pk)) def _determineTrunc(subject: str, period: str) -> Tuple[Union[TruncHour, TruncDay, TruncWeek, TruncMonth], str]: if period == "Hour": fields = """ FLOOR(DATE_PART('day', first_date - %s) * 24 + DATE_PART('hour', first_date - %s)) AS first_date, FLOOR(DATE_PART('day', timestamp - first_date) * 24 + DATE_PART('hour', timestamp - first_date)) AS date, """ return TruncHour(subject), fields elif period == "Day": fields = """ FLOOR(DATE_PART('day', first_date - %s)) AS first_date, FLOOR(DATE_PART('day', timestamp - first_date)) AS date, """ return TruncDay(subject), fields elif period == "Week": fields = """ FLOOR(DATE_PART('day', first_date - %s) / 7) AS first_date, FLOOR(DATE_PART('day', timestamp - first_date) / 7) AS date, """ return TruncWeek(subject), fields elif period == "Month": fields = """ FLOOR((DATE_PART('year', first_date) - DATE_PART('year', %s)) * 12 + DATE_PART('month', first_date) - DATE_PART('month', %s)) AS first_date, FLOOR((DATE_PART('year', timestamp) - DATE_PART('year', first_date)) * 12 + DATE_PART('month', timestamp) - DATE_PART('month', first_date)) AS date, """ return TruncMonth(subject), fields else: raise ValueError(f"Period {period} is unsupported.") trunc, fields = _determineTrunc("timestamp", period) first_date = filtered_events.annotate(first_date=trunc).values("first_date", "person_id").distinct() events_query, events_query_params = filtered_events.query.sql_with_params() first_date_query, first_date_params = first_date.query.sql_with_params() full_query = """ SELECT {fields} COUNT(DISTINCT "events"."person_id"), array_agg(DISTINCT "events"."person_id") as people FROM ({events_query}) events LEFT JOIN ({first_date_query}) first_event_date ON (events.person_id = first_event_date.person_id) WHERE timestamp >= first_date GROUP BY date, first_date """ full_query = full_query.format(events_query=events_query, first_date_query=first_date_query, fields=fields) start_params = ( (filter.date_from, filter.date_from) if period == "Month" or period == "Hour" else (filter.date_from,) ) with connection.cursor() as cursor: cursor.execute( full_query, start_params + events_query_params + first_date_params, ) data = namedtuplefetchall(cursor) scores: dict = {} for datum in data: key = round(datum.first_date, 1) if not scores.get(key, None): scores.update({key: {}}) for person in datum.people: if not scores[key].get(person, None): scores[key].update({person: 1}) else: scores[key][person] += 1 by_dates = {} for row in data: people = sorted(row.people, key=lambda p: scores[round(row.first_date, 1)][int(p)], reverse=True,) random_key = "".join( random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10) ) cache_key = generate_cache_key("{}{}{}".format(random_key, str(round(row.first_date, 0)), str(team.pk))) cache.set( cache_key, people, 600, ) by_dates.update( { (int(row.first_date), int(row.date)): { "count": row.count, "people": people[0:100], "offset": 100, "next": cache_key if len(people) > 100 else None, } } ) return by_dates