def test_pagination(self): person_factory(team=self.team, distinct_ids=["1"]) for idx in range(0, 150): event_factory( team=self.team, event="some event", distinct_id="1", timestamp=datetime(2019, 1, 1, 12, 0, 0) + relativedelta(days=idx, seconds=idx), ) response = self.client.get("/api/event/?distinct_id=1").json() self.assertEqual(len(response["results"]), 100) self.assertIn("http://testserver/api/event/?distinct_id=1&before=", response["next"]) page2 = self.client.get(response["next"]).json() from posthog.ee import check_ee_enabled if check_ee_enabled(): from ee.clickhouse.client import sync_execute self.assertEqual( sync_execute("select count(*) from events")[0][0], 150) self.assertEqual(len(page2["results"]), 50)
def demo(request): user = request.user organization = user.organization try: team = organization.teams.get(name=TEAM_NAME) except Team.DoesNotExist: team = Team.objects.create_with_data(organization=organization, name=TEAM_NAME, ingested_event=True, completed_snippet_onboarding=True) _create_anonymous_users(team=team, base_url=request.build_absolute_uri("/demo")) _create_funnel(team=team, base_url=request.build_absolute_uri("/demo")) _recalculate(team=team) user.current_team = team user.save() if "$pageview" not in team.event_names: team.event_names.append("$pageview") team.save() if check_ee_enabled(): from ee.clickhouse.demo import create_anonymous_users_ch from ee.clickhouse.models.event import get_events_by_team result = get_events_by_team(team_id=team.pk) if not result: create_anonymous_users_ch( team=team, base_url=request.build_absolute_uri("/demo")) return render_template("demo.html", request=request, context={"api_token": team.api_token})
def calculate_actions_ch(action: Action) -> None: if check_ee_enabled(): try: from ee.clickhouse.models.action import populate_action_event_table populate_action_event_table(action) except: logger.error("Could not update clickhouse tables")
def calculate_cohorts_ch(cohort: Cohort) -> None: if check_ee_enabled(): try: from ee.clickhouse.models.cohort import populate_cohort_person_table populate_cohort_person_table(cohort) except: logger.error("Could not update clickhouse cohort tables")
def clickhouse_lag(): if check_ee_enabled() and settings.EE_AVAILABLE: from ee.clickhouse.client import sync_execute QUERY = """select max(_timestamp) observed_ts, now() now_ts, now() - max(_timestamp) as lag from events;""" lag = sync_execute(QUERY)[0][2] g = statsd.Gauge("%s_posthog_celery" % (settings.STATSD_PREFIX, )) g.send("clickhouse_even_table_lag_seconds", lag) else: pass
def setup_periodic_tasks(sender, **kwargs): if not settings.DEBUG: sender.add_periodic_task(1.0, redis_celery_queue_depth.s(), name="1 sec queue probe", priority=0) # Heartbeat every 10sec to make sure the worker is alive sender.add_periodic_task(10.0, redis_heartbeat.s(), name="10 sec heartbeat", priority=0) # update events table partitions twice a week sender.add_periodic_task( crontab(day_of_week="mon,fri", hour=0, minute=0), update_event_partitions.s(), # check twice a week ) if getattr(settings, "MULTI_TENANCY", False) or os.environ.get( "SESSION_RECORDING_RETENTION_CRONJOB", False): sender.add_periodic_task(crontab(minute=0, hour="*/12"), run_session_recording_retention.s()) # send weekly status report on non-PostHog Cloud instances if not getattr(settings, "MULTI_TENANCY", False): sender.add_periodic_task(crontab(day_of_week="mon", hour=0, minute=0), status_report.s()) # send weekly email report (~ 8:00 SF / 16:00 UK / 17:00 EU) sender.add_periodic_task(crontab(day_of_week="mon", hour=15, minute=0), send_weekly_email_report.s()) sender.add_periodic_task(crontab(day_of_week="fri", hour=0, minute=0), clean_stale_partials.s()) if not check_ee_enabled(): sender.add_periodic_task(15 * 60, calculate_cohort.s(), name="debug") sender.add_periodic_task(600, check_cached_items.s(), name="check dashboard items") else: # ee enabled scheduled tasks sender.add_periodic_task(120, clickhouse_lag.s(), name="clickhouse event table lag") if settings.ASYNC_EVENT_ACTION_MAPPING: sender.add_periodic_task( (60 * ACTION_EVENT_MAPPING_INTERVAL_MINUTES), calculate_event_action_mappings.s(), name="calculate event action mappings", expires=(60 * ACTION_EVENT_MAPPING_INTERVAL_MINUTES), )
def demo(request): team = request.user.team if not Event.objects.filter(team=team).exists(): _create_anonymous_users(team=team, base_url=request.build_absolute_uri("/demo")) _create_funnel(team=team, base_url=request.build_absolute_uri("/demo")) _recalculate(team=team) if "$pageview" not in team.event_names: team.event_names.append("$pageview") team.save() if check_ee_enabled(): from ee.clickhouse.demo import create_anonymous_users_ch from ee.clickhouse.models.event import get_events_by_team result = get_events_by_team(team_id=team.pk) if not result: create_anonymous_users_ch( team=team, base_url=request.build_absolute_uri("/demo")) return render_template("demo.html", request=request, context={"api_token": team.api_token})
INSERT_PERSON_SQL, PERSON_DISTINCT_ID_EXISTS_SQL, UPDATE_PERSON_ATTACHED_DISTINCT_ID, UPDATE_PERSON_IS_IDENTIFIED, UPDATE_PERSON_PROPERTIES, ) from ee.kafka_client.client import ClickhouseProducer from ee.kafka_client.topics import KAFKA_PERSON, KAFKA_PERSON_UNIQUE_ID from posthog import settings from posthog.ee import check_ee_enabled from posthog.models.filter import Filter from posthog.models.person import Person, PersonDistinctId from posthog.models.team import Team from posthog.models.utils import UUIDT if settings.EE_AVAILABLE and check_ee_enabled(): @receiver(post_save, sender=Person) def person_created(sender, instance: Person, created, **kwargs): create_person( team_id=instance.team.pk, properties=instance.properties, uuid=str(instance.uuid), is_identified=instance.is_identified, ) @receiver(post_save, sender=PersonDistinctId) def person_distinct_id_created(sender, instance: PersonDistinctId, created, **kwargs): create_person_distinct_id(instance.pk, instance.team.pk, instance.distinct_id,
store_names_and_properties(team=team, event=event, properties=properties) # # determine create events create_event( event_uuid=event_uuid, event=event, properties=properties, timestamp=timestamp, team=team, distinct_id=distinct_id, elements=elements_list, ) if check_ee_enabled(): @shared_task def process_event_ee( distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str], ) -> None: properties = data.get("properties", data.get("$set", {})) person_uuid = UUIDT() event_uuid = UUIDT() ts = handle_timestamp(data, now, sent_at) _capture_ee( event_uuid=event_uuid, person_uuid=person_uuid, ip=ip, site_url=site_url,
def get_event(request): now = timezone.now() try: data_from_request = load_data_from_request(request) data = data_from_request["data"] except TypeError: return cors_response( request, JsonResponse( { "code": "validation", "message": "Malformed request data. Make sure you're sending valid JSON.", }, status=400, ), ) if not data: return cors_response( request, JsonResponse( { "code": "validation", "message": "No data found. Make sure to use a POST request when sending the payload in the body of the request.", }, status=400, ), ) sent_at = _get_sent_at(data, request) token = _get_token(data, request) is_personal_api_key = False if not token: token = PersonalAPIKeyAuthentication.find_key( request, data_from_request["body"], data if isinstance(data, dict) else None) is_personal_api_key = True if not token: return cors_response( request, JsonResponse( { "code": "validation", "message": "Neither api_key nor personal_api_key set. You can find your project API key in PostHog project settings.", }, status=400, ), ) team = Team.objects.get_team_from_token(token, is_personal_api_key) if team is None: return cors_response( request, JsonResponse( { "code": "validation", "message": "Project or personal API key invalid. You can find your project API key in PostHog project settings.", }, status=400, ), ) if isinstance(data, dict): if data.get("batch"): # posthog-python and posthog-ruby data = data["batch"] assert data is not None elif "engage" in request.path_info: # JS identify call data["event"] = "$identify" # make sure it has an event name if isinstance(data, list): events = data else: events = [data] for event in events: try: distinct_id = _get_distinct_id(event) except KeyError: return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set user distinct ID field `distinct_id`.", "item": event, }, status=400, ), ) if "event" not in event: return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set event name field `event`.", "item": event, }, status=400, ), ) if check_ee_enabled(): process_event_ee.delay( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) # log the event to kafka write ahead log for processing log_event( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) else: process_event.delay( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) return cors_response(request, JsonResponse({"status": 1}))
class TestInsightApi(TransactionBaseTest): TESTS_API = True def test_get_insight_items(self): filter_dict = { "events": [{ "id": "$pageview" }], "properties": [{ "key": "$browser", "value": "Mac OS X" }], } DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, created_by=self.user) # create without user DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team) response = self.client.get("/api/insight/", data={ "user": "******" }).json() self.assertEqual(len(response["results"]), 1) def test_get_saved_insight_items(self): filter_dict = { "events": [{ "id": "$pageview" }], "properties": [{ "key": "$browser", "value": "Mac OS X" }], } DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), saved=True, team=self.team, created_by=self.user) # create without saved DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team, created_by=self.user) # create without user DashboardItem.objects.create( filters=Filter(data=filter_dict).to_dict(), team=self.team) response = self.client.get( "/api/insight/", data={ "saved": "true", "user": "******", }, ).json() self.assertEqual(len(response["results"]), 1) def test_create_insight_items(self): # Make sure the endpoint works with and without the trailing slash self.client.post( "/api/insight", data={ "filters": { "events": [{ "id": "$pageview" }], "properties": [{ "key": "$browser", "value": "Mac OS X" }], }, }, content_type="application/json", ).json() response = DashboardItem.objects.all() self.assertEqual(len(response), 1) self.assertListEqual(response[0].filters["events"], [{ "id": "$pageview" }]) # BASIC TESTING OF ENDPOINTS. /queries as in depth testing for each insight def test_insight_trends_basic(self): with freeze_time("2012-01-14T03:21:34.000Z"): event_factory(team=self.team, event="$pageview", distinct_id="1") event_factory(team=self.team, event="$pageview", distinct_id="2") with freeze_time("2012-01-15T04:01:34.000Z"): response = self.client.get( "/api/insight/trend/?events={}".format( json.dumps([{ "id": "$pageview" }]))).json() self.assertEqual(response[0]["count"], 2) self.assertEqual(response[0]["action"]["name"], "$pageview") def test_insight_session_basic(self): with freeze_time("2012-01-14T03:21:34.000Z"): event_factory(team=self.team, event="1st action", distinct_id="1") event_factory(team=self.team, event="1st action", distinct_id="2") with freeze_time("2012-01-14T03:25:34.000Z"): event_factory(team=self.team, event="2nd action", distinct_id="1") event_factory(team=self.team, event="2nd action", distinct_id="2") with freeze_time("2012-01-15T03:59:34.000Z"): event_factory(team=self.team, event="3rd action", distinct_id="2") with freeze_time("2012-01-15T03:59:35.000Z"): event_factory(team=self.team, event="3rd action", distinct_id="1") with freeze_time("2012-01-15T04:01:34.000Z"): event_factory(team=self.team, event="4th action", distinct_id="1", properties={"$os": "Mac OS X"}) event_factory(team=self.team, event="4th action", distinct_id="2", properties={"$os": "Windows 95"}) with freeze_time("2012-01-15T04:01:34.000Z"): response = self.client.get("/api/insight/session/", ).json() self.assertEqual(len(response["result"]), 2) response = self.client.get( "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-15", ).json() self.assertEqual(len(response["result"]), 4) for i in range(46): with freeze_time( relative_date_parse("2012-01-15T04:01:34.000Z") + relativedelta(hours=i)): event_factory(team=self.team, event="action {}".format(i), distinct_id=str(i + 3)) response = self.client.get( "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-17", ).json() self.assertEqual(len(response["result"]), 50) self.assertEqual(response.get("offset", None), None) for i in range(2): with freeze_time( relative_date_parse("2012-01-15T04:01:34.000Z") + relativedelta(hours=i + 46)): event_factory(team=self.team, event="action {}".format(i), distinct_id=str(i + 49)) response = self.client.get( "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-17", ).json() self.assertEqual(len(response["result"]), 50) self.assertEqual(response["offset"], 50) response = self.client.get( "/api/insight/session/?date_from=2012-01-14&date_to=2012-01-17&offset=50", ).json() self.assertEqual(len(response["result"]), 2) self.assertEqual(response.get("offset", None), None) # TODO: remove this check if not check_ee_enabled(): @override_settings(CELERY_TASK_ALWAYS_EAGER=True) def test_insight_funnels_basic(self): event_factory(team=self.team, event="user signed up", distinct_id="1") response = self.client.get( "/api/insight/funnel/?events={}".format( json.dumps([ { "id": "user signed up", "type": "events", "order": 0 }, ]))).json() self.assertEqual(response["loading"], True) # TODO: remove this check def test_insight_retention_basic(self): person1 = person_factory( team=self.team, distinct_ids=["person1"], properties={"email": "*****@*****.**"}) event_factory( team=self.team, event="$pageview", distinct_id="person1", timestamp=timezone.now() - timedelta(days=11), ) event_factory( team=self.team, event="$pageview", distinct_id="person1", timestamp=timezone.now() - timedelta(days=10), ) response = self.client.get("/api/insight/retention/", ).json() self.assertEqual(len(response["data"]), 11) self.assertEqual(response["data"][0]["values"][0]["count"], 1) def test_insight_paths_basic(self): person1 = person_factory(team=self.team, distinct_ids=["person_1"]) event_factory( properties={"$current_url": "/"}, distinct_id="person_1", event="$pageview", team=self.team, ) event_factory( properties={"$current_url": "/about"}, distinct_id="person_1", event="$pageview", team=self.team, ) response = self.client.get("/api/insight/path", ).json() self.assertEqual(len(response), 1)
def _match_distinct_id(self, distinct_id: str) -> bool: if check_ee_enabled(): return self._query_clickhouse(distinct_id) return self._query_postgres(distinct_id)
def get_event(request): timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, )) timer.start() now = timezone.now() try: data_from_request = load_data_from_request(request) data = data_from_request["data"] except TypeError: return cors_response( request, JsonResponse( { "code": "validation", "message": "Malformed request data. Make sure you're sending valid JSON.", }, status=400, ), ) if not data: return cors_response( request, JsonResponse( { "code": "validation", "message": "No data found. Make sure to use a POST request when sending the payload in the body of the request.", }, status=400, ), ) sent_at = _get_sent_at(data, request) token = _get_token(data, request) is_personal_api_key = False if not token: token = PersonalAPIKeyAuthentication.find_key( request, data_from_request["body"], data if isinstance(data, dict) else None) is_personal_api_key = True if not token: return cors_response( request, JsonResponse( { "code": "validation", "message": "Neither api_key nor personal_api_key set. You can find your project API key in PostHog project settings.", }, status=400, ), ) team = Team.objects.get_team_from_token(token, is_personal_api_key) if team is None: return cors_response( request, JsonResponse( { "code": "validation", "message": "Project or personal API key invalid. You can find your project API key in PostHog project settings.", }, status=400, ), ) if isinstance(data, dict): if data.get("batch"): # posthog-python and posthog-ruby data = data["batch"] assert data is not None elif "engage" in request.path_info: # JS identify call data["event"] = "$identify" # make sure it has an event name if isinstance(data, list): events = data else: events = [data] for event in events: try: distinct_id = _get_distinct_id(event) except KeyError: return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set user distinct ID field `distinct_id`.", "item": event, }, status=400, ), ) if "event" not in event: return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set event name field `event`.", "item": event, }, status=400, ), ) if check_ee_enabled(): process_event_ee( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) else: task_name = "posthog.tasks.process_event.process_event" celery_queue = settings.CELERY_DEFAULT_QUEUE if team.plugins_opt_in: task_name += "_with_plugins" celery_queue = settings.PLUGINS_CELERY_QUEUE celery_app.send_task( name=task_name, queue=celery_queue, args=[ distinct_id, get_ip_address(request), request.build_absolute_uri("/")[:-1], event, team.id, now.isoformat(), sent_at, ], ) if check_ee_enabled() and settings.LOG_TO_WAL: # log the event to kafka write ahead log for processing log_event( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) timer.stop("event_endpoint") return cors_response(request, JsonResponse({"status": 1}))