Пример #1
0
 def wrapper(*args, **kwargs):
     # prepare caching params
     request: HttpRequest = args[1]
     team: Team = cast(User, request.user).team
     if cache_type == CacheType.TRENDS:
         filter = Filter(request=request)
         cache_key = generate_cache_key(filter.toJSON() + "_" +
                                        str(team.pk))
         payload = {"filter": filter.toJSON(), "team_id": team.pk}
     elif cache_type == CacheType.FUNNEL:
         pk = args[2]
         cache_key = generate_cache_key("funnel_{}_{}".format(
             pk, team.pk))
         payload = {"funnel_id": pk, "team_id": team.pk}
     else:
         raise ValueError("Invalid cache type!")
     # return cached result if possible
     if not request.GET.get("refresh", False):
         cached_result = cache.get(cache_key)
         if cached_result:
             return cached_result["result"]
     # call function being wrapped
     result = f(*args, **kwargs)
     # cache new data
     if result is not None:
         cache.set(
             cache_key,
             {
                 "result": result,
                 "details": payload,
                 "type": cache_type,
             },
             expiry_seconds,
         )
     return result
Пример #2
0
    def _test_refresh_dashboard_cache_types(
        self,
        filter: FilterType,
        cache_type: CacheType,
        patch_update_cache_item: MagicMock,
    ) -> None:
        self._create_dashboard(filter)

        update_cached_items()

        expected_args = [
            generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)),
            cache_type,
            {
                "filter": filter.toJSON(),
                "team_id": self.team.pk,
            },
        ]

        patch_update_cache_item.assert_any_call(*expected_args)

        update_cache_item(*expected_args)  # type: ignore

        item_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                     self.team.pk))
        self.assertIsNotNone(get_safe_cache(item_key))
Пример #3
0
def update_cached_items() -> None:

    tasks = []
    items = (DashboardItem.objects.filter(
        Q(
            Q(dashboard__is_shared=True)
            | Q(dashboard__last_accessed_at__gt=timezone.now() -
                relativedelta(days=7)))).exclude(refreshing=True).exclude(
                    deleted=True))

    for item in items.filter(filters__isnull=False).exclude(
            filters={}).distinct("filters"):
        filter = Filter(data=item.filters)
        cache_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                      item.team_id))
        payload = {"filter": filter.toJSON(), "team_id": item.team_id}
        tasks.append(
            update_cache_item_task.s(cache_key, TRENDS_ENDPOINT, payload))

    for item in items.filter(funnel_id__isnull=False).distinct("funnel_id"):
        cache_key = generate_cache_key("funnel_{}_{}".format(
            item.funnel_id, item.team_id))
        payload = {"funnel_id": item.funnel_id, "team_id": item.team_id}
        tasks.append(
            update_cache_item_task.s(cache_key, FUNNEL_ENDPOINT, payload))

    logger.info("Found {} items to refresh".format(len(tasks)))
    taskset = group(tasks)
    taskset.apply_async()
Пример #4
0
    def test_stickiness_regression(self, patch_update_cache_item: MagicMock, patch_apply_async: MagicMock) -> None:
        # We moved Stickiness from being a "shown_as" item to its own insight
        # This move caused issues hence a regression test
        filter_stickiness = StickinessFilter(
            data={
                "events": [{"id": "$pageview"}],
                "properties": [{"key": "$browser", "value": "Mac OS X"}],
                "date_from": "2012-01-10",
                "date_to": "2012-01-15",
                "insight": INSIGHT_STICKINESS,
                "shown_as": "Stickiness",
            },
            team=self.team,
            get_earliest_timestamp=Event.objects.earliest_timestamp,
        )
        filter = Filter(
            data={
                "events": [{"id": "$pageview"}],
                "properties": [{"key": "$browser", "value": "Mac OS X"}],
                "date_from": "2012-01-10",
                "date_to": "2012-01-15",
            }
        )
        shared_dashboard = Dashboard.objects.create(team=self.team, is_shared=True)

        DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter_stickiness.to_dict(), team=self.team)
        DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter.to_dict(), team=self.team)

        item_stickiness_key = generate_cache_key(filter_stickiness.toJSON() + "_" + str(self.team.pk))
        item_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk))

        update_cached_items()

        for call_item in patch_update_cache_item.call_args_list:
            update_cache_item(*call_item[0])

        self.assertEqual(
            get_safe_cache(item_stickiness_key)["result"][0]["labels"],
            ["1 day", "2 days", "3 days", "4 days", "5 days", "6 days"],
        )
        self.assertEqual(
            get_safe_cache(item_key)["result"][0]["labels"],
            [
                "Tue. 10 January",
                "Wed. 11 January",
                "Thu. 12 January",
                "Fri. 13 January",
                "Sat. 14 January",
                "Sun. 15 January",
            ],
        )
Пример #5
0
    def test_update_cache_item_calls_right_funnel_class(self, funnel_mock: MagicMock) -> None:
        #  basic funnel
        filter = Filter(
            data={
                "insight": "FUNNELS",
                "events": [
                    {"id": "$pageview", "order": 0, "type": "events"},
                    {"id": "$pageview", "order": 1, "type": "events"},
                ],
            }
        )
        dashboard_item = self._create_dashboard(filter)

        funnel_mock.return_value.run.return_value = {}
        with self.settings(EE_AVAILABLE=False):
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)),
                CacheType.FUNNEL,
                {"filter": filter.toJSON(), "team_id": self.team.pk,},
            )

        updated_dashboard_item = DashboardItem.objects.get(pk=dashboard_item.pk)
        self.assertEqual(updated_dashboard_item.refreshing, False)
        self.assertEqual(updated_dashboard_item.last_refresh, now())
        funnel_mock.assert_called_once()
Пример #6
0
    def test_return_cached_results(self):
        dashboard = Dashboard.objects.create(team=self.team, name="dashboard")
        filter_dict = {
            "events": [{"id": "$pageview"}],
            "properties": [{"key": "$browser", "value": "Mac OS X"}],
        }
        filter = Filter(data=filter_dict)

        item = Insight.objects.create(dashboard=dashboard, filters=filter_dict, team=self.team,)
        Insight.objects.create(
            dashboard=dashboard, filters=filter.to_dict(), team=self.team,
        )
        response = self.client.get(f"/api/projects/{self.team.id}/dashboards/%s/" % dashboard.pk).json()
        self.assertEqual(response["items"][0]["result"], None)

        # cache results
        response = self.client.get(
            f"/api/projects/{self.team.id}/insights/trend/?events=%s&properties=%s"
            % (json.dumps(filter_dict["events"]), json.dumps(filter_dict["properties"]))
        )
        self.assertEqual(response.status_code, 200)
        item = Insight.objects.get(pk=item.pk)
        self.assertAlmostEqual(item.last_refresh, now(), delta=timezone.timedelta(seconds=5))
        self.assertEqual(item.filters_hash, generate_cache_key(f"{filter.toJSON()}_{self.team.pk}"))

        with self.assertNumQueries(13):
            # Django session, PostHog user, PostHog team, PostHog org membership, PostHog dashboard,
            # PostHog dashboard item, PostHog team, PostHog dashboard item UPDATE, PostHog team,
            # PostHog dashboard item UPDATE, PostHog dashboard UPDATE, PostHog dashboard item, Posthog org tags
            response = self.client.get(f"/api/projects/{self.team.id}/dashboards/%s/" % dashboard.pk).json()

        self.assertAlmostEqual(Dashboard.objects.get().last_accessed_at, now(), delta=timezone.timedelta(seconds=5))
        self.assertEqual(response["items"][0]["result"][0]["count"], 0)
Пример #7
0
    def _test_refresh_dashboard_cache_types(
        self,
        filter: FilterType,
        patch_update_cache_item: MagicMock,
        patch_apply_async: MagicMock,
    ) -> None:

        dashboard_to_cache = Dashboard.objects.create(team=self.team,
                                                      is_shared=True,
                                                      last_accessed_at=now())

        DashboardItem.objects.create(
            dashboard=dashboard_to_cache,
            filters=filter.to_dict(),
            team=self.team,
            last_refresh=now() - timedelta(days=30),
        )
        update_cached_items()

        for call_item in patch_update_cache_item.call_args_list:
            update_cache_item(*call_item[0])

        item_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                     self.team.pk))
        self.assertIsNotNone(cache.get(item_key))
Пример #8
0
    def calculate_funnel(self, request: request.Request) -> Dict[str, Any]:
        team = self.team
        refresh = should_refresh(request)

        filter = Filter(request=request,
                        data={
                            **request.data, "insight": INSIGHT_FUNNELS
                        })
        cache_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                      team.pk))
        result = {"loading": True}

        if refresh:
            cache.delete(cache_key)
        else:
            cached_result = get_safe_cache(cache_key)
            if cached_result:
                task_id = cached_result.get("task_id", None)
                if not task_id:
                    return {"result": cached_result["result"]}
                else:
                    return {"result": result}

        payload = {"filter": filter.toJSON(), "team_id": team.pk}
        task = update_cache_item_task.delay(cache_key, CacheType.FUNNEL,
                                            payload)
        if not task.ready():
            task_id = task.id
            cache.set(cache_key, {"task_id": task_id},
                      180)  # task will be live for 3 minutes

        self._refresh_dashboard(request=request)
        return {"result": result}
Пример #9
0
def dashboard_item_saved(sender, instance: DashboardItem, **kwargs):
    if instance.filters and instance.filters != {}:
        filter = get_filter(data=instance.filters, team=instance.team)

        instance.filters = filter.to_dict()
        instance.filters_hash = generate_cache_key("{}_{}".format(
            filter.toJSON(), instance.team_id))
Пример #10
0
    def calculate_funnel(self, request: request.Request) -> Dict[str, Any]:
        team = request.user.team
        refresh = request.GET.get("refresh", None)

        filter = Filter(request=request)
        cache_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                      team.pk))
        result = {"loading": True}

        if refresh:
            cache.delete(cache_key)
        else:
            cached_result = cache.get(cache_key)
            if cached_result:
                task_id = cached_result.get("task_id", None)
                if not task_id:
                    return cached_result["result"]
                else:
                    return result

        payload = {"filter": filter.toJSON(), "team_id": team.pk}

        task = update_cache_item_task.delay(cache_key, FUNNEL_ENDPOINT,
                                            payload)
        task_id = task.id
        cache.set(cache_key, {"task_id": task_id},
                  180)  # task will be live for 3 minutes

        self._refresh_dashboard(request=request)
        return result
Пример #11
0
    def test_update_cache_item_calls_right_class(
            self, patch_import_from: MagicMock) -> None:
        filter = Filter(data={
            "insight": "TRENDS",
            "events": [{
                "id": "$pageview"
            }]
        })
        dashboard_item = self._create_dashboard(filter)

        with self.settings(EE_AVAILABLE=False):
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(),
                                                  self.team.pk)),
                CacheType.TRENDS,
                {
                    "filter": filter.toJSON(),
                    "team_id": self.team.pk,
                },
            )

        patch_import_from.assert_called_once_with("posthog.queries.trends",
                                                  "Trends")

        updated_dashboard_item = DashboardItem.objects.get(
            pk=dashboard_item.pk)
        self.assertEqual(updated_dashboard_item.refreshing, False)
        self.assertEqual(updated_dashboard_item.last_refresh, now())
Пример #12
0
    def funnel(self, request: request.Request, *args: Any,
               **kwargs: Any) -> Response:
        team = self.team
        refresh = request.GET.get("refresh", None)
        dashboard_id = request.GET.get("from_dashboard", None)

        filter = Filter(request=request)
        cache_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                      team.pk))
        result = {"loading": True}

        if refresh:
            cache.delete(cache_key)
        else:
            cached_result = get_safe_cache(cache_key)
            if cached_result:
                task_id = cached_result.get("task_id", None)
                if not task_id:
                    return Response(cached_result["result"])
                else:
                    return Response(result)

        payload = {"filter": filter.toJSON(), "team_id": team.pk}
        task = update_cache_item_task.delay(cache_key, CacheType.FUNNEL,
                                            payload)
        task_id = task.id
        cache.set(cache_key, {"task_id": task_id},
                  180)  # task will be live for 3 minutes

        if dashboard_id:
            DashboardItem.objects.filter(pk=dashboard_id).update(
                last_refresh=now())

        return Response(result)
Пример #13
0
    def test_return_cached_results(self):
        dashboard = Dashboard.objects.create(team=self.team, name="dashboard")
        filter_dict = {
            "events": [{"id": "$pageview"}],
            "properties": [{"key": "$browser", "value": "Mac OS X"}],
        }
        filter = Filter(data=filter_dict)

        item = DashboardItem.objects.create(dashboard=dashboard, filters=filter_dict, team=self.team,)
        DashboardItem.objects.create(
            dashboard=dashboard, filters=filter.to_dict(), team=self.team,
        )
        response = self.client.get("/api/dashboard/%s/" % dashboard.pk).json()
        self.assertEqual(response["items"][0]["result"], None)

        # cache results
        response = self.client.get(
            "/api/insight/trend/?events=%s&properties=%s"
            % (json.dumps(filter_dict["events"]), json.dumps(filter_dict["properties"]))
        )
        self.assertEqual(response.status_code, 200)
        item = DashboardItem.objects.get(pk=item.pk)
        self.assertAlmostEqual(item.last_refresh, now(), delta=timezone.timedelta(seconds=5))
        self.assertEqual(item.filters_hash, generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)))

        with self.assertNumQueries(11):
            response = self.client.get("/api/dashboard/%s/" % dashboard.pk).json()

        self.assertAlmostEqual(Dashboard.objects.get().last_accessed_at, now(), delta=timezone.timedelta(seconds=5))
        self.assertEqual(response["items"][0]["result"][0]["count"], 0)
Пример #14
0
def update_cached_items() -> None:

    tasks = []
    items = (DashboardItem.objects.filter(
        Q(
            Q(dashboard__is_shared=True)
            | Q(dashboard__last_accessed_at__gt=timezone.now() -
                relativedelta(days=7)))).exclude(
                    dashboard__deleted=True).exclude(refreshing=True).exclude(
                        deleted=True))

    for item in items.filter(filters__isnull=False).exclude(
            filters={}).distinct("filters"):
        filter = Filter(data=item.filters)
        cache_key = generate_cache_key("{}_{}".format(filter.toJSON(),
                                                      item.team_id))
        curr_data = cache.get(cache_key)

        # if task is logged and loading leave it alone
        if curr_data and curr_data.get("task_id", None):
            continue

        cache_type = CacheType.FUNNEL if filter.insight == "FUNNELS" else CacheType.TRENDS
        payload = {"filter": filter.toJSON(), "team_id": item.team_id}
        tasks.append(update_cache_item_task.s(cache_key, cache_type, payload))

    logger.info("Found {} items to refresh".format(len(tasks)))
    taskset = group(tasks)
    taskset.apply_async()
Пример #15
0
def update_cached_items() -> None:

    tasks = []
    items = (
        DashboardItem.objects.filter(
            Q(Q(dashboard__is_shared=True) | Q(dashboard__last_accessed_at__gt=timezone.now() - relativedelta(days=7)))
        )
        .exclude(dashboard__deleted=True)
        .exclude(refreshing=True)
        .exclude(deleted=True)
        .distinct("filters_hash")
    )

    for item in DashboardItem.objects.filter(
        pk__in=Subquery(items.filter(filters__isnull=False).exclude(filters={}).distinct("filters").values("pk"))
    ).order_by(F("last_refresh").asc(nulls_first=True))[0:PARALLEL_DASHBOARD_ITEM_CACHE]:
        filter = get_filter(data=item.dashboard_filters(), team=item.team)
        cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), item.team_id))

        cache_type = get_cache_type(filter)
        payload = {"filter": filter.toJSON(), "team_id": item.team_id}
        tasks.append(update_cache_item_task.s(cache_key, cache_type, payload))

    logger.info("Found {} items to refresh".format(len(tasks)))
    taskset = group(tasks)
    taskset.apply_async()
Пример #16
0
    def test_refresh_dashboard_cache(self, patch_update_cache_item: MagicMock, patch_apply_async: MagicMock) -> None:
        # There's two things we want to refresh
        # Any shared dashboard, as we only use cached items to show those
        # Any dashboard accessed in the last 7 days
        filter_dict = {
            "events": [{"id": "$pageview"}],
            "properties": [{"key": "$browser", "value": "Mac OS X"}],
        }
        filter = Filter(data=filter_dict)
        shared_dashboard = Dashboard.objects.create(team=self.team, is_shared=True)
        funnel_filter = Filter(data={"events": [{"id": "user signed up", "type": "events", "order": 0},],})

        item = DashboardItem.objects.create(dashboard=shared_dashboard, filters=filter.to_dict(), team=self.team)
        funnel_item = DashboardItem.objects.create(
            dashboard=shared_dashboard, filters=funnel_filter.to_dict(), team=self.team
        )

        dashboard_to_cache = Dashboard.objects.create(team=self.team, is_shared=True, last_accessed_at=now())
        item_to_cache = DashboardItem.objects.create(
            dashboard=dashboard_to_cache,
            filters=Filter(data={"events": [{"id": "cache this"}]}).to_dict(),
            team=self.team,
        )

        dashboard_do_not_cache = Dashboard.objects.create(
            team=self.team, is_shared=True, last_accessed_at="2020-01-01T12:00:00Z"
        )
        item_do_not_cache = DashboardItem.objects.create(
            dashboard=dashboard_do_not_cache,
            filters=Filter(data={"events": [{"id": "do not cache this"}]}).to_dict(),
            team=self.team,
        )

        item_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk))
        funnel_key = generate_cache_key(filter.toJSON() + "_" + str(self.team.pk))
        update_cached_items()

        # pass the caught calls straight to the function
        # we do this to skip Redis
        for call_item in patch_update_cache_item.call_args_list:
            update_cache_item(*call_item[0])

        self.assertIsNotNone(DashboardItem.objects.get(pk=item.pk).last_refresh)
        self.assertIsNotNone(DashboardItem.objects.get(pk=item_to_cache.pk).last_refresh)
        self.assertIsNotNone(DashboardItem.objects.get(pk=item_do_not_cache.pk).last_refresh)
        self.assertEqual(get_safe_cache(item_key)["result"][0]["count"], 0)
        self.assertEqual(get_safe_cache(funnel_key)["result"][0]["count"], 0)
Пример #17
0
def dashboard_item_saved(sender, instance: Insight, dashboard=None, **kwargs):
    if instance.filters and instance.filters != {}:
        filter = get_filter(
            data=instance.dashboard_filters(dashboard=dashboard),
            team=instance.team)

        instance.filters_hash = generate_cache_key("{}_{}".format(
            filter.toJSON(), instance.team_id))
Пример #18
0
        def wrapper(*args, **kw):
            from posthog.celery import update_cache_item_task

            cache_key = ""

            # prepare caching params
            request = args[1]
            team = request.user.team_set.get()
            payload = None
            dashboard_item_id = None
            refresh = request.GET.get("refresh", None)

            if cache_type == TRENDS_ENDPOINT:
                filter = Filter(request=request)
                cache_key = generate_cache_key(filter.toJSON() + "_" +
                                               str(team.pk))
                payload = {"filter": filter.toJSON(), "team_id": team.pk}
            elif cache_type == FUNNEL_ENDPOINT:
                pk = args[2]
                cache_key = generate_cache_key("funnel_{}_{}".format(
                    pk, team.pk))
                payload = {"funnel_id": pk, "team_id": team.pk}

            if not refresh:
                # return result if cached
                cached_result = cache.get(cache_key)
                if cached_result:
                    return cached_result["result"]

            # call wrapped function
            result = f(*args, **kw)

            # cache new data using
            if result and payload:
                cache.set(
                    cache_key,
                    {
                        "result": result,
                        "details": payload,
                        "type": cache_type,
                    },
                    expiry,
                )

            return result
Пример #19
0
 def get_result(self, dashboard_item: DashboardItem):
     if not dashboard_item.filters:
         return None
     filter = Filter(data=dashboard_item.filters)
     cache_key = generate_cache_key(filter.toJSON() + "_" + str(dashboard_item.team_id))
     result = cache.get(cache_key)
     if not result or result.get("task_id", None):
         return None
     return result["result"]
Пример #20
0
        def wrapper(*args, **kwargs):
            # prepare caching params
            request: HttpRequest = args[1]
            team = cast(User, request.user).team
            filter = None
            if not team:
                return f(*args, **kwargs)

            if cache_type == CacheType.TRENDS:
                filter = Filter(request=request)
                cache_key = generate_cache_key(filter.toJSON() + "_" +
                                               str(team.pk))
                payload = {"filter": filter.toJSON(), "team_id": team.pk}
            elif cache_type == CacheType.FUNNEL:
                pk = args[2]
                cache_key = generate_cache_key("funnel_{}_{}".format(
                    pk, team.pk))
                payload = {"funnel_id": pk, "team_id": team.pk}
            else:
                raise ValueError("Invalid cache type!")
            # return cached result if possible
            if not request.GET.get("refresh", False):
                cached_result = cache.get(cache_key)
                if cached_result:
                    return cached_result["result"]
            # call function being wrapped
            result = f(*args, **kwargs)

            # cache new data
            if result is not None:
                cache.set(
                    cache_key,
                    {
                        "result": result,
                        "details": payload,
                        "type": cache_type,
                    },
                    CACHED_RESULTS_TTL,
                )
                if filter:
                    dashboard_items = DashboardItem.objects.filter(
                        team_id=team.pk, filters_hash=cache_key)
                    dashboard_items.update(last_refresh=now())
            return result
Пример #21
0
def dashboard_item_update_task_params(
    item: DashboardItem, dashboard: Optional[Dashboard] = None
) -> Tuple[str, CacheType, Dict]:
    filter = get_filter(data=item.dashboard_filters(dashboard), team=item.team)
    cache_key = generate_cache_key("{}_{}".format(filter.toJSON(), item.team_id))

    cache_type = get_cache_type(filter)
    payload = {"filter": filter.toJSON(), "team_id": item.team_id}

    return cache_key, cache_type, payload
Пример #22
0
    def test_update_cache_item_calls_right_class(self) -> None:
        filter = Filter(data={
            "insight": "TRENDS",
            "events": [{
                "id": "$pageview"
            }]
        })
        dashboard_item = self._create_dashboard(filter)

        update_cache_item(
            generate_cache_key("{}_{}".format(filter.toJSON(), self.team.pk)),
            CacheType.TRENDS,
            {
                "filter": filter.toJSON(),
                "team_id": self.team.pk,
            },
        )

        updated_dashboard_item = Insight.objects.get(pk=dashboard_item.pk)
        self.assertEqual(updated_dashboard_item.refreshing, False)
        self.assertEqual(updated_dashboard_item.last_refresh, now())
Пример #23
0
    def test_insights_old_filter(self) -> None:
        # Some filters hashes are wrong (likely due to changes in our filters models) and previously we would not save changes to those insights and constantly retry them.
        dashboard = Dashboard.objects.create(team=self.team, is_shared=True)
        filter = {"events": [{"id": "$pageview"}]}
        item = Insight.objects.create(dashboard=dashboard,
                                      filters=filter,
                                      filters_hash="cache_thisiswrong",
                                      team=self.team)
        Insight.objects.all().update(filters_hash="cache_thisiswrong")
        self.assertEquals(Insight.objects.get().filters_hash,
                          "cache_thisiswrong")

        update_cached_items()

        self.assertEquals(
            Insight.objects.get().filters_hash,
            generate_cache_key("{}_{}".format(
                Filter(data=filter).toJSON(), self.team.pk)),
        )
        self.assertEquals(Insight.objects.get().last_refresh.isoformat(),
                          "2021-08-25T22:09:14.252000+00:00")
Пример #24
0
        def wrapper(*args,
                    **kwargs) -> Dict[str, Union[List, datetime, bool, str]]:
            # prepare caching params
            request: HttpRequest = args[1]
            team = cast(User, request.user).team
            filter = None
            if not team:
                return f(*args, **kwargs)

            filter = get_filter(request=request, team=team)
            cache_key = generate_cache_key("{}_{}".format(
                filter.toJSON(), team.pk))
            # return cached result if possible
            if not request.GET.get("refresh", False):
                cached_result = get_safe_cache(cache_key)
                if cached_result and cached_result.get("result"):
                    return {**cached_result, "is_cached": True}
            # call function being wrapped
            result = f(*args, **kwargs)

            # cache new data
            if result is not None and not (
                    isinstance(result.get("result"), dict)
                    and result["result"].get("loading")):
                cache.set(
                    cache_key,
                    {
                        "result": result["result"],
                        "last_refresh": now()
                    },
                    TEMP_CACHE_RESULTS_TTL,
                )
                if filter:
                    dashboard_items = DashboardItem.objects.filter(
                        team_id=team.pk, filters_hash=cache_key)
                    dashboard_items.update(last_refresh=now())
            return result
Пример #25
0
        def wrapper(*args, **kwargs):
            # prepare caching params
            request: HttpRequest = args[1]
            team = cast(User, request.user).team
            filter = None
            if not team:
                return f(*args, **kwargs)

            filter = get_filter(request=request, team=team)
            cache_key = generate_cache_key("{}_{}".format(
                filter.toJSON(), team.pk))
            payload = {"filter": filter.toJSON(), "team_id": team.pk}
            # return cached result if possible
            if not request.GET.get("refresh", False):
                cached_result = cache.get(cache_key)
                if cached_result and cached_result.get("result"):
                    return cached_result["result"]
            # call function being wrapped
            result = f(*args, **kwargs)

            # cache new data
            if result is not None and (not isinstance(result, dict)
                                       or not result.get("loading")):
                cache.set(
                    cache_key,
                    {
                        "result": result,
                        "details": payload,
                    },
                    CACHED_RESULTS_TTL,
                )
                if filter:
                    dashboard_items = DashboardItem.objects.filter(
                        team_id=team.pk, filters_hash=cache_key)
                    dashboard_items.update(last_refresh=now())
            return result
Пример #26
0
    def _execute_sql(
        self,
        filter: RetentionFilter,
        team: Team,
    ) -> Dict[Tuple[int, int], Dict[str, Any]]:

        period = filter.period
        is_first_time_retention = filter.retention_type == RETENTION_FIRST_TIME

        events: QuerySet = QuerySet()
        entity_condition, entity_condition_strigified = self.get_entity_condition(
            filter.target_entity, "first_event_date")
        returning_condition, returning_condition_stringified = self.get_entity_condition(
            filter.returning_entity, "events")
        events = Event.objects.filter(team_id=team.pk).add_person_id(
            team.pk).annotate(event_date=F("timestamp"))

        trunc, fields = self._get_trunc_func("timestamp", period)

        if is_first_time_retention:
            filtered_events = events.filter(
                filter.properties_to_Q(team_id=team.pk))
            first_date = (filtered_events.filter(entity_condition).values(
                "person_id", "event",
                "action").annotate(first_date=Min(trunc)).filter(
                    filter.custom_date_filter_Q("first_date")).distinct())
            final_query = (filtered_events.filter(
                filter.date_filter_Q).filter(returning_condition).values_list(
                    "person_id", "event_date", "event", "action").union(
                        first_date.values_list("first_date", "person_id",
                                               "event", "action")))
        else:
            filtered_events = events.filter(filter.date_filter_Q).filter(
                filter.properties_to_Q(team_id=team.pk))
            first_date = (filtered_events.filter(entity_condition).annotate(
                first_date=trunc).values("first_date", "person_id", "event",
                                         "action").distinct())

            final_query = (
                filtered_events.filter(returning_condition).values_list(
                    "person_id", "event_date", "event", "action").union(
                        first_date.values_list("first_date", "person_id",
                                               "event", "action")))

        event_query, events_query_params = final_query.query.sql_with_params()
        reference_event_query, first_date_params = first_date.query.sql_with_params(
        )

        final_query = """
            SELECT
                {fields}
                COUNT(DISTINCT "events"."person_id"),
                array_agg(DISTINCT "events"."person_id") as people
            FROM ({event_query}) events
            LEFT JOIN ({reference_event_query}) first_event_date
              ON (events.person_id = first_event_date.person_id)
            WHERE event_date >= first_date
            AND {target_condition} AND {return_condition}
            OR ({target_condition} AND event_date = first_date)
            GROUP BY date, first_date
        """.format(
            event_query=event_query,
            reference_event_query=reference_event_query,
            fields=fields,
            return_condition=returning_condition_stringified,
            target_condition=entity_condition_strigified,
        )
        event_params = (filter.target_entity.id, filter.returning_entity.id,
                        filter.target_entity.id)

        start_params = ((filter.date_from, filter.date_from)
                        if period == "Month" or period == "Hour" else
                        (filter.date_from, ))

        with connection.cursor() as cursor:
            cursor.execute(
                final_query,
                start_params + events_query_params + first_date_params +
                event_params,
            )
            data = namedtuplefetchall(cursor)

            scores: dict = {}
            for datum in data:
                key = round(datum.first_date, 1)
                if not scores.get(key, None):
                    scores.update({key: {}})
                for person in datum.people:
                    if not scores[key].get(person, None):
                        scores[key].update({person: 1})
                    else:
                        scores[key][person] += 1

        by_dates = {}
        for row in data:
            people = sorted(
                row.people,
                key=lambda p: scores[round(row.first_date, 1)][int(p)],
                reverse=True,
            )

            random_key = "".join(
                random.SystemRandom().choice(string.ascii_uppercase +
                                             string.digits) for _ in range(10))
            cache_key = generate_cache_key("{}{}{}".format(
                random_key, str(round(row.first_date, 0)), str(team.pk)))
            cache.set(
                cache_key,
                people,
                600,
            )
            by_dates.update({
                (int(row.first_date), int(row.date)): {
                    "count": row.count,
                    "people": people[0:100],
                    "offset": 100,
                    "next": cache_key if len(people) > 100 else None,
                }
            })

        return by_dates
Пример #27
0
    def query_retention(self, filters: Filter, team) -> dict:

        events: QuerySet = QuerySet()
        entity = (Entity({
            "id": "$pageview",
            "type": TREND_FILTER_TYPE_EVENTS
        }) if not filters.target_entity else filters.target_entity)
        if entity.type == TREND_FILTER_TYPE_EVENTS:
            events = Event.objects.filter_by_event_with_people(event=entity.id,
                                                               team_id=team.id)
        elif entity.type == TREND_FILTER_TYPE_ACTIONS:
            events = Event.objects.filter(action__pk=entity.id).add_person_id(
                team.id)

        filtered_events = events.filter(filters.date_filter_Q).filter(
            filters.properties_to_Q(team_id=team.pk))

        first_date = (filtered_events.annotate(
            first_date=TruncDay("timestamp")).values("first_date",
                                                     "person_id").distinct())

        events_query, events_query_params = filtered_events.query.sql_with_params(
        )
        first_date_query, first_date_params = first_date.query.sql_with_params(
        )

        full_query = """
            SELECT
                DATE_PART('days', first_date - %s) AS first_date,
                DATE_PART('days', timestamp - first_date) AS date,
                COUNT(DISTINCT "events"."person_id"),
                array_agg(DISTINCT "events"."person_id") as people
            FROM ({events_query}) events
            LEFT JOIN ({first_date_query}) first_event_date
              ON (events.person_id = first_event_date.person_id)
            WHERE timestamp > first_date
            GROUP BY date, first_date
        """

        full_query = full_query.format(
            events_query=events_query,
            first_date_query=first_date_query,
            event_date_query=TruncDay("timestamp"),
        )

        with connection.cursor() as cursor:
            cursor.execute(
                full_query,
                (filters.date_from, ) + events_query_params +
                first_date_params,
            )
            data = namedtuplefetchall(cursor)

            scores: dict = {}
            for datum in data:
                key = round(datum.first_date, 1)
                if not scores.get(key, None):
                    scores.update({key: {}})
                for person in datum.people:
                    if not scores[key].get(person, None):
                        scores[key].update({person: 1})
                    else:
                        scores[key][person] += 1

        by_dates = {}
        for row in data:
            people = sorted(
                row.people,
                key=lambda p: scores[round(row.first_date, 1)][int(p)],
                reverse=True,
            )

            random_key = "".join(
                random.SystemRandom().choice(string.ascii_uppercase +
                                             string.digits) for _ in range(10))
            cache_key = generate_cache_key("{}{}{}".format(
                random_key, str(round(row.first_date, 0)), str(team.pk)))
            cache.set(
                cache_key,
                people,
                600,
            )
            by_dates.update({
                (int(row.first_date), int(row.date)): {
                    "count": row.count,
                    "people": people[0:100],
                    "offset": 100,
                    "next": cache_key if len(people) > 100 else None,
                }
            })

        return by_dates
Пример #28
0
    def test_update_cache_item_calls_right_funnel_class_clickhouse(
        self,
        funnel_mock: MagicMock,
        funnel_trends_mock: MagicMock,
        funnel_time_to_convert_mock: MagicMock,
        funnel_strict_mock: MagicMock,
        funnel_unordered_mock: MagicMock,
    ) -> None:
        #  basic funnel
        base_filter = Filter(
            data={
                "insight":
                "FUNNELS",
                "events": [
                    {
                        "id": "$pageview",
                        "order": 0,
                        "type": "events"
                    },
                    {
                        "id": "$pageview",
                        "order": 1,
                        "type": "events"
                    },
                ],
            })

        with self.settings(EE_AVAILABLE=True, PRIMARY_DB="clickhouse"):
            filter = base_filter
            funnel_mock.return_value.run.return_value = {}
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(),
                                                  self.team.pk)),
                CacheType.FUNNEL,
                {
                    "filter": filter.toJSON(),
                    "team_id": self.team.pk,
                },
            )
            funnel_mock.assert_called_once()

            # trends funnel
            filter = base_filter.with_data({"funnel_viz_type": "trends"})
            funnel_trends_mock.return_value.run.return_value = {}
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(),
                                                  self.team.pk)),
                CacheType.FUNNEL,
                {
                    "filter": filter.toJSON(),
                    "team_id": self.team.pk,
                },
            )

            funnel_trends_mock.assert_called_once()
            self.assertEqual(
                funnel_trends_mock.call_args[1]["funnel_order_class"],
                funnel_mock)
            funnel_trends_mock.reset_mock()

            # trends unordered funnel
            filter = base_filter.with_data({
                "funnel_viz_type": "trends",
                "funnel_order_type": "unordered"
            })
            funnel_trends_mock.return_value.run.return_value = {}
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(),
                                                  self.team.pk)),
                CacheType.FUNNEL,
                {
                    "filter": filter.toJSON(),
                    "team_id": self.team.pk,
                },
            )

            funnel_trends_mock.assert_called_once()
            self.assertEqual(
                funnel_trends_mock.call_args[1]["funnel_order_class"],
                funnel_unordered_mock)
            funnel_trends_mock.reset_mock()

            # time to convert strict funnel
            filter = base_filter.with_data({
                "funnel_viz_type": "time_to_convert",
                "funnel_order_type": "strict"
            })
            funnel_time_to_convert_mock.return_value.run.return_value = {}
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(),
                                                  self.team.pk)),
                CacheType.FUNNEL,
                {
                    "filter": filter.toJSON(),
                    "team_id": self.team.pk,
                },
            )

            funnel_time_to_convert_mock.assert_called_once()
            self.assertEqual(
                funnel_time_to_convert_mock.call_args[1]["funnel_order_class"],
                funnel_strict_mock)
            funnel_time_to_convert_mock.reset_mock()

            # strict funnel
            filter = base_filter.with_data({"funnel_order_type": "strict"})
            funnel_strict_mock.return_value.run.return_value = {}
            update_cache_item(
                generate_cache_key("{}_{}".format(filter.toJSON(),
                                                  self.team.pk)),
                CacheType.FUNNEL,
                {
                    "filter": filter.toJSON(),
                    "team_id": self.team.pk,
                },
            )

            funnel_strict_mock.assert_called_once()
Пример #29
0
    def test_cached_funnel(self):
        action_sign_up = Action.objects.create(team=self.team,
                                               name="signed up")
        ActionStep.objects.create(action=action_sign_up,
                                  tag_name="button",
                                  text="Sign up!")
        action_credit_card = Action.objects.create(team=self.team, name="paid")
        ActionStep.objects.create(action=action_credit_card,
                                  tag_name="button",
                                  text="Pay $10")
        action_play_movie = Action.objects.create(team=self.team,
                                                  name="watched movie")
        ActionStep.objects.create(action=action_play_movie,
                                  tag_name="a",
                                  href="/movie")
        Action.objects.create(team=self.team, name="user logged out")

        [action.calculate_events() for action in Action.objects.all()]

        self.client.post(
            "/api/funnel/",
            data={
                "name": "Whatever",
                "filters": {
                    "events": [
                        {
                            "id": "user signed up",
                            "type": "events",
                            "order": 0
                        },
                    ],
                    "actions": [
                        {
                            "id": action_sign_up.pk,
                            "type": "actions",
                            "order": 1
                        },
                    ],
                },
            },
            content_type="application/json",
        ).json()
        funnel = Funnel.objects.get()

        funnel_key = generate_cache_key("funnel_{}_{}".format(
            funnel.pk, self.team.pk))

        # no refresh after getting
        self.client.get("/api/funnel/{}/".format(funnel.pk)).json()
        original_name = cache.get(funnel_key)["result"]["name"]

        self.client.patch("/api/funnel/{}/".format(funnel.pk),
                          data={
                              "name": "Whatever2"
                          },
                          content_type="application/json").json()

        self.client.get("/api/funnel/{}/".format(funnel.pk)).json()
        refreshed_name = cache.get(funnel_key)["result"]["name"]
        self.assertEqual("Whatever", refreshed_name)

        self.client.get("/api/funnel/{}/?refresh=true".format(
            funnel.pk)).json()
        refreshed_name = cache.get(funnel_key)["result"]["name"]
        self.assertEqual("Whatever2", refreshed_name)
Пример #30
0
    def query_retention(self, filter: Filter, team) -> dict:

        period = filter.period
        events: QuerySet = QuerySet()
        entity = (
            Entity({"id": "$pageview", "type": TREND_FILTER_TYPE_EVENTS})
            if not filter.target_entity
            else filter.target_entity
        )
        if entity.type == TREND_FILTER_TYPE_EVENTS:
            events = Event.objects.filter_by_event_with_people(event=entity.id, team_id=team.id)
        elif entity.type == TREND_FILTER_TYPE_ACTIONS:
            events = Event.objects.filter(action__pk=entity.id).add_person_id(team.id)

        filtered_events = events.filter(filter.date_filter_Q).filter(filter.properties_to_Q(team_id=team.pk))

        def _determineTrunc(subject: str, period: str) -> Tuple[Union[TruncHour, TruncDay, TruncWeek, TruncMonth], str]:
            if period == "Hour":
                fields = """
                FLOOR(DATE_PART('day', first_date - %s) * 24 + DATE_PART('hour', first_date - %s)) AS first_date,
                FLOOR(DATE_PART('day', timestamp - first_date) * 24 + DATE_PART('hour', timestamp - first_date)) AS date,
                """
                return TruncHour(subject), fields
            elif period == "Day":
                fields = """
                FLOOR(DATE_PART('day', first_date - %s)) AS first_date,
                FLOOR(DATE_PART('day', timestamp - first_date)) AS date,
                """
                return TruncDay(subject), fields
            elif period == "Week":
                fields = """
                FLOOR(DATE_PART('day', first_date - %s) / 7) AS first_date,
                FLOOR(DATE_PART('day', timestamp - first_date) / 7) AS date,
                """
                return TruncWeek(subject), fields
            elif period == "Month":
                fields = """
                FLOOR((DATE_PART('year', first_date) - DATE_PART('year', %s)) * 12 + DATE_PART('month', first_date) - DATE_PART('month', %s)) AS first_date,
                FLOOR((DATE_PART('year', timestamp) - DATE_PART('year', first_date)) * 12 + DATE_PART('month', timestamp) - DATE_PART('month', first_date)) AS date,
                """
                return TruncMonth(subject), fields
            else:
                raise ValueError(f"Period {period} is unsupported.")

        trunc, fields = _determineTrunc("timestamp", period)
        first_date = filtered_events.annotate(first_date=trunc).values("first_date", "person_id").distinct()

        events_query, events_query_params = filtered_events.query.sql_with_params()
        first_date_query, first_date_params = first_date.query.sql_with_params()

        full_query = """
            SELECT
                {fields}
                COUNT(DISTINCT "events"."person_id"),
                array_agg(DISTINCT "events"."person_id") as people
            FROM ({events_query}) events
            LEFT JOIN ({first_date_query}) first_event_date
              ON (events.person_id = first_event_date.person_id)
            WHERE timestamp >= first_date
            GROUP BY date, first_date
        """

        full_query = full_query.format(events_query=events_query, first_date_query=first_date_query, fields=fields)

        start_params = (
            (filter.date_from, filter.date_from) if period == "Month" or period == "Hour" else (filter.date_from,)
        )

        with connection.cursor() as cursor:
            cursor.execute(
                full_query, start_params + events_query_params + first_date_params,
            )
            data = namedtuplefetchall(cursor)

            scores: dict = {}
            for datum in data:
                key = round(datum.first_date, 1)
                if not scores.get(key, None):
                    scores.update({key: {}})
                for person in datum.people:
                    if not scores[key].get(person, None):
                        scores[key].update({person: 1})
                    else:
                        scores[key][person] += 1

        by_dates = {}
        for row in data:
            people = sorted(row.people, key=lambda p: scores[round(row.first_date, 1)][int(p)], reverse=True,)

            random_key = "".join(
                random.SystemRandom().choice(string.ascii_uppercase + string.digits) for _ in range(10)
            )
            cache_key = generate_cache_key("{}{}{}".format(random_key, str(round(row.first_date, 0)), str(team.pk)))
            cache.set(
                cache_key, people, 600,
            )
            by_dates.update(
                {
                    (int(row.first_date), int(row.date)): {
                        "count": row.count,
                        "people": people[0:100],
                        "offset": 100,
                        "next": cache_key if len(people) > 100 else None,
                    }
                }
            )

        return by_dates