def test_set_is_identified(self) -> None: distinct_id = "777" Person.objects.create(team_id=self.team.pk, distinct_ids=[distinct_id]) person_before_event = get_person_by_distinct_id(team_id=self.team.pk, distinct_id=distinct_id) self.assertFalse(person_before_event["is_identified"]) process_event( distinct_id, "", "", {"event": "$identify", "properties": {},}, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( distinct_id, "", "", {"event": "$identify", "properties": {},}, self.team.pk, now().isoformat(), now().isoformat(), ) # Assume that clickhouse has done replacement ch_client.execute("OPTIMIZE TABLE person") person_after_event = get_person_by_distinct_id(team_id=self.team.pk, distinct_id=distinct_id) self.assertTrue(person_after_event["is_identified"])
def test_capture_no_sent_at(self) -> None: self._create_user("james") create_person(team_id=self.team.pk, distinct_ids=["asdfasdfasdf"]) right_now = now() tomorrow = right_now + timedelta(days=1, hours=2) # event sent_at 10 minutes after timestamp process_event_ee( "movie played", "", "", { "event": "$pageview", "timestamp": tomorrow.isoformat(), "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,}, }, self.team.pk, right_now.isoformat(), None, ) events = get_events() returned_time = datetime.strptime(events[0]["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z") difference = abs((tomorrow - returned_time).seconds) self.assertLess(difference, 1)
def test_long_htext(self) -> None: process_event( "new_distinct_id", "", "", { "event": "$autocapture", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "$elements": [ { "tag_name": "a", "$el_text": "a" * 2050, "attr__href": "a" * 2050, "nth_child": 1, "nth_of_type": 2, "attr__class": "btn btn-sm", }, ], }, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "new_distinct_id", "", "", { "event": "$autocapture", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "$elements": [ { "tag_name": "a", "$el_text": "a" * 2050, "attr__href": "a" * 2050, "nth_child": 1, "nth_of_type": 2, "attr__class": "btn btn-sm", }, ], }, }, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() self.assertEqual(len(events[0]["elements"][0]["href"]), 2048) self.assertEqual(len(events[0]["elements"][0]["text"]), 400)
def test_capture_sent_at(self) -> None: self._create_user("tim") create_person(team_id=self.team.pk, distinct_ids=["asdfasdfasdf"]) right_now = now() tomorrow = right_now + timedelta(days=1, hours=2) tomorrow_sent_at = right_now + timedelta(days=1, hours=2, minutes=10) # event sent_at 10 minutes after timestamp process_event_ee( "movie played", "", "", { "event": "$pageview", "timestamp": tomorrow.isoformat(), "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,}, }, self.team.pk, right_now.isoformat(), tomorrow_sent_at.isoformat(), ) events = get_events() returned_time = datetime.strptime(events[0]["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z") event_seconds_before_now = (right_now - returned_time).seconds # assert that the event is actually recorded 10 minutes before now self.assertGreater(event_seconds_before_now, 590) self.assertLess(event_seconds_before_now, 610)
def test_set_is_identified(self) -> None: distinct_id = "777" Person.objects.create(team_id=self.team.pk, distinct_ids=[distinct_id]) person_before_event = get_person_by_distinct_id( team_id=self.team.pk, distinct_id=distinct_id) self.assertFalse(person_before_event["is_identified"]) process_event( distinct_id, "", "", { "event": "$identify", "properties": {}, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( distinct_id, "", "", { "event": "$identify", "properties": {}, }, self.team.pk, now().isoformat(), now().isoformat(), ) person_after_event = get_person_by_distinct_id(team_id=self.team.pk, distinct_id=distinct_id) self.assertTrue(person_after_event["is_identified"])
def test_distinct_with_anonymous_id_which_was_already_created(self) -> None: create_person(team_id=self.team.pk, distinct_ids=["anonymous_id"]) create_person(team_id=self.team.pk, distinct_ids=["new_distinct_id"], properties={"email": "*****@*****.**"}) process_event_ee( "new_distinct_id", "", "", { "event": "$identify", "properties": { "$anon_distinct_id": "anonymous_id", "token": self.team.api_token, "distinct_id": "new_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) # self.assertEqual(Event.objects.count(), 0) person = get_person_by_distinct_id(self.team.pk, "new_distinct_id") distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(sorted(distinct_ids), sorted(["anonymous_id", "new_distinct_id"])) self.assertEqual(person["properties"]["email"], "*****@*****.**")
def test_alias_before_person(self) -> None: process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) person1 = get_person_by_distinct_id(team_id=self.team.pk, distinct_id="old_distinct_id") person2 = get_person_by_distinct_id(team_id=self.team.pk, distinct_id="new_distinct_id") self.assertEqual(person1, person2) events = get_events() distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(len(events), 1) self.assertEqual(sorted(distinct_ids), sorted(["new_distinct_id", "old_distinct_id"]))
def test_snapshot_event_stored_as_session_recording_event(self) -> None: process_event_ee( "some-id", "", "", { "event": "$snapshot", "properties": { "$session_id": "abcf-efg", "$snapshot_data": { "timestamp": 123, } } }, self.team.pk, now().isoformat(), now().isoformat(), ) recordings = sync_execute( "SELECT session_id, distinct_id, snapshot_data FROM session_recording_events", {}) self.assertEqual(recordings, [("abcf-efg", "some-id", '{"timestamp": 123}')]) self.assertEqual(len(get_events()), 0)
def test_alias_both_existing(self) -> None: create_person(distinct_ids=["old_distinct_id"], team_id=self.team.pk) create_person(distinct_ids=["new_distinct_id"], team_id=self.team.pk) process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(len(events), 1) self.assertEqual(sorted(distinct_ids), sorted(["old_distinct_id", "new_distinct_id"]))
def test_capture_no_element(self) -> None: user = self._create_user("tim") Person.objects.create(team_id=self.team.pk, distinct_ids=["asdfasdfasdf"]) process_event( "asdfasdfasdf", "", "", {"event": "$pageview", "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,},}, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "asdfasdfasdf", "", "", {"event": "$pageview", "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,},}, self.team.pk, now().isoformat(), now().isoformat(), ) distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(distinct_ids, ["asdfasdfasdf"]) events = get_events() self.assertEqual(events[0]["event"], "$pageview")
def test_offset_timestamp_no_sent_at(self) -> None: process_event( "distinct_id", "", "", { "offset": 150, "event": "$autocapture", "distinct_id": "distinct_id", }, self.team.pk, "2020-01-01T12:00:05.200Z", None, ) # no sent at makes no difference for offset process_event_ee( "distinct_id", "", "", { "offset": 150, "event": "$autocapture", "distinct_id": "distinct_id", }, self.team.pk, "2020-01-01T12:00:05.200Z", None, ) # no sent at makes no difference for offset events = get_events() returned_time = datetime.strptime(events[0]["timestamp"], "%Y-%m-%dT%H:%M:%S.%f%z") self.assertEqual(returned_time.isoformat(), "2020-01-01T12:00:05.050000+00:00")
def test_alias_merge_properties(self) -> None: Person.objects.create( distinct_ids=["old_distinct_id"], team_id=self.team.pk, properties={"key_on_both": "old value both", "key_on_old": "old value"}, ) Person.objects.create( distinct_ids=["new_distinct_id"], team_id=self.team.pk, properties={"key_on_both": "new value both", "key_on_new": "new value"}, ) process_event( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() self.assertEqual(len(events), 1) distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(sorted(distinct_ids), sorted(["old_distinct_id", "new_distinct_id"])) # Assume that clickhouse has done replacement ch_client.execute("OPTIMIZE TABLE person") persons = get_persons(team_id=self.team.pk) self.assertEqual( persons[0]["properties"], {"key_on_both": "new value both", "key_on_new": "new value", "key_on_old": "old value",}, )
def test_anonymized_ip_capture(self) -> None: self.team.anonymize_ips = True self.team.save() user = self._create_user("tim") Person.objects.create(team_id=self.team.pk, distinct_ids=["asdfasdfasdf"]) process_event( "asdfasdfasdf", "11.12.13.14", "", {"event": "$pageview", "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,},}, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "asdfasdfasdf", "11.12.13.14", "", {"event": "$pageview", "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,},}, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() self.assertNotIn("$ip", events[0]["properties"].keys())
def test_ip_capture(self) -> None: user = self._create_user("tim") Person.objects.create(team_id=self.team.pk, distinct_ids=["asdfasdfasdf"]) process_event( "asdfasdfasdf", "11.12.13.14", "", {"event": "$pageview", "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,},}, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "asdfasdfasdf", "11.12.13.14", "", {"event": "$pageview", "properties": {"distinct_id": "asdfasdfasdf", "token": self.team.api_token,},}, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() self.assertEqual(events[0]["properties"]["$ip"], '"11.12.13.14"')
def test_capture_new_person(self) -> None: user = self._create_user("tim") team_id = self.team.pk # TODO: with self.assertNumQueries(7): process_event( 2, "", "", { "event": "$autocapture", "properties": { "distinct_id": 2, "token": self.team.api_token, "$elements": [ {"tag_name": "a", "nth_child": 1, "nth_of_type": 2, "attr__class": "btn btn-sm",}, {"tag_name": "div", "nth_child": 1, "nth_of_type": 2, "$el_text": "💻",}, ], }, }, team_id, now().isoformat(), now().isoformat(), ) process_event_ee( 2, "", "", { "event": "$autocapture", "properties": { "distinct_id": 2, "token": self.team.api_token, "$elements": [ {"tag_name": "a", "nth_child": 1, "nth_of_type": 2, "attr__class": "btn btn-sm",}, {"tag_name": "div", "nth_child": 1, "nth_of_type": 2, "$el_text": "💻",}, ], }, }, team_id, now().isoformat(), now().isoformat(), ) distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(distinct_ids, ["2"]) events = get_events() self.assertEqual(events[0]["event"], "$autocapture") elements = get_elements_by_elements_hash(elements_hash=events[0]["elements_hash"], team_id=team_id) self.assertEqual(elements[0]["tag_name"], "a") self.assertEqual(elements[0]["attr_class"], ["btn", "btn-sm"]) self.assertEqual(elements[1]["order"], 1) self.assertEqual(elements[1]["text"], "💻") self.assertEqual(events[0]["person"], "2")
def test_distinct_team_leakage(self) -> None: team2 = Team.objects.create() Person.objects.create(team_id=team2.pk, distinct_ids=["2"], properties={"email": "*****@*****.**"}) Person.objects.create(team_id=self.team.pk, distinct_ids=["1", "2"]) try: process_event( "2", "", "", { "event": "$identify", "properties": {"$anon_distinct_id": "1", "token": self.team.api_token, "distinct_id": "2",}, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "2", "", "", { "event": "$identify", "properties": {"$anon_distinct_id": "1", "token": self.team.api_token, "distinct_id": "2",}, }, self.team.pk, now().isoformat(), now().isoformat(), ) except: pass ids: Dict[int, Any] = {self.team.pk: [], team2.pk: []} for pid in get_person_distinct_ids(team_id=self.team.pk): ids[pid["team_id"]].append(pid["distinct_id"]) for pid in get_person_distinct_ids(team_id=team2.pk): ids[pid["team_id"]].append(pid["distinct_id"]) self.assertEqual(sorted(ids[self.team.pk]), sorted(["1", "2"])) self.assertEqual(ids[team2.pk], ["2"]) # Assume that clickhouse has done replacement ch_client.execute("OPTIMIZE TABLE person") people1 = get_persons(team_id=self.team.pk) people2 = get_persons(team_id=team2.pk) self.assertEqual(len(people1), 1) self.assertEqual(len(people2), 1) self.assertEqual(people1[0]["team_id"], self.team.pk) self.assertEqual(people1[0]["properties"], {}) self.assertEqual(people2[0]["team_id"], team2.pk) self.assertEqual(people2[0]["properties"], {"email": "*****@*****.**"})
def test_distinct_with_anonymous_id(self) -> None: create_person(team_id=self.team.pk, distinct_ids=["anonymous_id"]) process_event_ee( "new_distinct_id", "", "", { "event": "$identify", "properties": { "$anon_distinct_id": "anonymous_id", "token": self.team.api_token, "distinct_id": "new_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(len(events), 1) self.assertEqual(sorted(distinct_ids), sorted(["anonymous_id", "new_distinct_id"])) # check no errors as this call can happen multiple times process_event_ee( "new_distinct_id", "", "", { "event": "$identify", "properties": { "$anon_distinct_id": "anonymous_id", "token": self.team.api_token, "distinct_id": "new_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), )
def test_distinct_team_leakage(self) -> None: team2 = Team.objects.create() create_person(team_id=team2.pk, distinct_ids=["2"], properties={"email": "*****@*****.**"}) create_person(team_id=self.team.pk, distinct_ids=["1", "2"]) try: process_event_ee( "2", "", "", { "event": "$identify", "properties": {"$anon_distinct_id": "1", "token": self.team.api_token, "distinct_id": "2",}, }, self.team.pk, now().isoformat(), now().isoformat(), ) except: pass ids = {self.team.pk: [], team2.pk: []} for pid in get_person_distinct_ids(team_id=self.team.pk): ids[pid["team_id"]].append(pid["distinct_id"]) for pid in get_person_distinct_ids(team_id=team2.pk): ids[pid["team_id"]].append(pid["distinct_id"]) self.assertEqual(sorted(ids[self.team.pk]), sorted(["1", "2"])) self.assertEqual(ids[team2.pk], ["2"]) people1 = get_persons(team_id=self.team.pk) people2 = get_persons(team_id=team2.pk) self.assertEqual(len(people1), 1) self.assertEqual(len(people2), 1) self.assertEqual(people1[0]["team_id"], self.team.pk) self.assertEqual(people1[0]["properties"], {}) self.assertEqual(people2[0]["team_id"], team2.pk) self.assertEqual(people2[0]["properties"], {"email": "*****@*****.**"})
def test_alias_merge_properties(self) -> None: create_person( distinct_ids=["old_distinct_id"], team_id=self.team.pk, properties={"key_on_both": "old value both", "key_on_old": "old value"}, ) create_person( distinct_ids=["new_distinct_id"], team_id=self.team.pk, properties={"key_on_both": "new value both", "key_on_new": "new value"}, ) process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() self.assertEqual(len(events), 1) distinct_ids = [item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk)] self.assertEqual(sorted(distinct_ids), sorted(["old_distinct_id", "new_distinct_id"])) persons = get_persons(team_id=self.team.pk) self.assertEqual( persons[0]["properties"], {"key_on_both": "new value both", "key_on_new": "new value", "key_on_old": "old value",}, )
def _process_event_ee( distinct_id: str, ip: str, site_url: str, data: dict, team_id: int, now: str, sent_at: Optional[str], ) -> None: return process_event_ee( distinct_id=distinct_id, ip=ip, site_url=site_url, data=data, team_id=team_id, now=parser.isoparse(now), sent_at=parser.isoparse(sent_at) if sent_at else None, )
def test_ip_override(self) -> None: user = self._create_user("tim") Person.objects.create(team=self.team, distinct_ids=["asdfasdfasdf"]) process_event_ee( "asdfasdfasdf", "11.12.13.14", "", { "event": "$pageview", "properties": { "$ip": "1.0.0.1", "distinct_id": "asdfasdfasdf", "token": self.team.api_token, }, }, self.team.pk, now().isoformat(), now().isoformat(), ) event = get_events()[0] self.assertEqual(event["properties"]["$ip"], "1.0.0.1")
def get_event(request): timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, )) timer.start() now = timezone.now() try: data_from_request = load_data_from_request(request) data = data_from_request["data"] except TypeError: return cors_response( request, JsonResponse( { "code": "validation", "message": "Malformed request data. Make sure you're sending valid JSON.", }, status=400, ), ) if not data: return cors_response( request, JsonResponse( { "code": "validation", "message": "No data found. Make sure to use a POST request when sending the payload in the body of the request.", }, status=400, ), ) sent_at = _get_sent_at(data, request) token = _get_token(data, request) if not token: return cors_response( request, JsonResponse( { "code": "validation", "message": "API key not provided. You can find your project API key in PostHog project settings.", }, status=401, ), ) team = Team.objects.get_team_from_token(token) if team is None: try: project_id = _get_project_id(data, request) except: return cors_response( request, JsonResponse( { "code": "validation", "message": "Invalid project ID.", }, status=400, ), ) if not project_id: return cors_response( request, JsonResponse( { "code": "validation", "message": "Project API key invalid. You can find your project API key in PostHog project settings.", }, status=401, ), ) user = User.objects.get_from_personal_api_key(token) if user is None: return cors_response( request, JsonResponse( { "code": "validation", "message": "Personal API key invalid.", }, status=401, ), ) team = user.teams.get(id=project_id) if isinstance(data, dict): if data.get("batch"): # posthog-python and posthog-ruby data = data["batch"] assert data is not None elif "engage" in request.path_info: # JS identify call data["event"] = "$identify" # make sure it has an event name if isinstance(data, list): events = data else: events = [data] for event in events: try: distinct_id = _get_distinct_id(event) except KeyError: return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set user distinct ID field `distinct_id`.", "item": event, }, status=400, ), ) if not event.get("event"): return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set event name field `event`.", "item": event, }, status=400, ), ) if not event.get("properties"): event["properties"] = {} _ensure_web_feature_flags_in_properties(event, team, distinct_id) event_uuid = UUIDT() if is_ee_enabled(): log_topics = [KAFKA_EVENTS_WAL] if settings.PLUGIN_SERVER_INGESTION: log_topics.append(KAFKA_EVENTS_PLUGIN_INGESTION) statsd.Counter("%s_posthog_cloud_plugin_server_ingestion" % (settings.STATSD_PREFIX, )).increment() log_event( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, event_uuid=event_uuid, topics=log_topics, ) # must done after logging because process_event_ee modifies the event, e.g. by removing $elements if not settings.PLUGIN_SERVER_INGESTION: process_event_ee( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, event_uuid=event_uuid, ) else: task_name = "posthog.tasks.process_event.process_event" if settings.PLUGIN_SERVER_INGESTION or team.plugins_opt_in: task_name += "_with_plugins" celery_queue = settings.PLUGINS_CELERY_QUEUE else: celery_queue = settings.CELERY_DEFAULT_QUEUE celery_app.send_task( name=task_name, queue=celery_queue, args=[ distinct_id, get_ip_address(request), request.build_absolute_uri("/")[:-1], event, team.id, now.isoformat(), sent_at, ], ) timer.stop("event_endpoint") return cors_response(request, JsonResponse({"status": 1}))
def get_event(request): timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX,)) timer.start() now = timezone.now() try: data = load_data_from_request(request) except RequestParsingError as error: capture_exception(error) # We still capture this on Sentry to identify actual potential bugs return cors_response( request, generate_exception_response(f"Malformed request data: {error}", code="invalid_payload"), ) if not data: return cors_response( request, generate_exception_response( "No data found. Make sure to use a POST request when sending the payload in the body of the request.", code="no_data", ), ) sent_at = _get_sent_at(data, request) token = _get_token(data, request) if not token: return cors_response( request, generate_exception_response( "API key not provided. You can find your project API key in PostHog project settings.", type="authentication_error", code="missing_api_key", status_code=status.HTTP_401_UNAUTHORIZED, ), ) team = Team.objects.get_team_from_token(token) if team is None: try: project_id = _get_project_id(data, request) except ValueError: return cors_response( request, generate_exception_response("Invalid Project ID.", code="invalid_project", attr="project_id"), ) if not project_id: return cors_response( request, generate_exception_response( "Project API key invalid. You can find your project API key in PostHog project settings.", type="authentication_error", code="invalid_api_key", status_code=status.HTTP_401_UNAUTHORIZED, ), ) user = User.objects.get_from_personal_api_key(token) if user is None: return cors_response( request, generate_exception_response( "Invalid Personal API key.", type="authentication_error", code="invalid_personal_api_key", status_code=status.HTTP_401_UNAUTHORIZED, ), ) team = user.teams.get(id=project_id) if isinstance(data, dict): if data.get("batch"): # posthog-python and posthog-ruby data = data["batch"] assert data is not None elif "engage" in request.path_info: # JS identify call data["event"] = "$identify" # make sure it has an event name if isinstance(data, list): events = data else: events = [data] try: events = preprocess_session_recording_events(events) except ValueError as e: return cors_response(request, generate_exception_response(f"Invalid payload: {e}", code="invalid_payload")) for event in events: try: distinct_id = _get_distinct_id(event) except KeyError: return cors_response( request, generate_exception_response( "You need to set user distinct ID field `distinct_id`.", code="required", attr="distinct_id" ), ) if not event.get("event"): return cors_response( request, generate_exception_response( "You need to set user event name, field `event`.", code="required", attr="event" ), ) if not event.get("properties"): event["properties"] = {} _ensure_web_feature_flags_in_properties(event, team, distinct_id) event_uuid = UUIDT() ip = None if team.anonymize_ips else get_ip_address(request) if is_ee_enabled(): log_topics = [KAFKA_EVENTS_WAL] if settings.PLUGIN_SERVER_INGESTION: log_topics.append(KAFKA_EVENTS_PLUGIN_INGESTION) statsd.Counter("%s_posthog_cloud_plugin_server_ingestion" % (settings.STATSD_PREFIX,)).increment() log_event( distinct_id=distinct_id, ip=ip, site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, event_uuid=event_uuid, topics=log_topics, ) # must done after logging because process_event_ee modifies the event, e.g. by removing $elements if not settings.PLUGIN_SERVER_INGESTION: process_event_ee( distinct_id=distinct_id, ip=ip, site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, event_uuid=event_uuid, ) else: task_name = "posthog.tasks.process_event.process_event_with_plugins" celery_queue = settings.PLUGINS_CELERY_QUEUE celery_app.send_task( name=task_name, queue=celery_queue, args=[distinct_id, ip, request.build_absolute_uri("/")[:-1], event, team.id, now.isoformat(), sent_at,], ) timer.stop("event_endpoint") return cors_response(request, JsonResponse({"status": 1}))
def get_event(request): timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX,)) timer.start() now = timezone.now() try: data_from_request = load_data_from_request(request) data = data_from_request["data"] except TypeError: return cors_response( request, JsonResponse( {"code": "validation", "message": "Malformed request data. Make sure you're sending valid JSON.",}, status=400, ), ) if not data: return cors_response( request, JsonResponse( { "code": "validation", "message": "No data found. Make sure to use a POST request when sending the payload in the body of the request.", }, status=400, ), ) sent_at = _get_sent_at(data, request) token = _get_token(data, request) if not token: return cors_response( request, JsonResponse( { "code": "validation", "message": "API key not provided. You can find your project API key in PostHog project settings.", }, status=400, ), ) team = Team.objects.get_team_from_token(token) if team is None: try: project_id = _get_project_id(data, request) except: return cors_response( request, JsonResponse({"code": "validation", "message": "Invalid project ID.",}, status=400,), ) if not project_id: return cors_response( request, JsonResponse( { "code": "validation", "message": "Project API key invalid. You can find your project API key in PostHog project settings.", }, status=400, ), ) user = User.objects.get_from_personal_api_key(token) if user is None: return cors_response( request, JsonResponse({"code": "validation", "message": "Personal API key invalid.",}, status=400,), ) team = user.teams.get(id=project_id) if isinstance(data, dict): if data.get("batch"): # posthog-python and posthog-ruby data = data["batch"] assert data is not None elif "engage" in request.path_info: # JS identify call data["event"] = "$identify" # make sure it has an event name if isinstance(data, list): events = data else: events = [data] for event in events: try: distinct_id = _get_distinct_id(event) except KeyError: return cors_response( request, JsonResponse( { "code": "validation", "message": "You need to set user distinct ID field `distinct_id`.", "item": event, }, status=400, ), ) if "event" not in event: return cors_response( request, JsonResponse( {"code": "validation", "message": "You need to set event name field `event`.", "item": event,}, status=400, ), ) if is_ee_enabled(): process_event_ee( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) else: task_name = "posthog.tasks.process_event.process_event" celery_queue = settings.CELERY_DEFAULT_QUEUE if team.plugins_opt_in: task_name += "_with_plugins" celery_queue = settings.PLUGINS_CELERY_QUEUE celery_app.send_task( name=task_name, queue=celery_queue, args=[ distinct_id, get_ip_address(request), request.build_absolute_uri("/")[:-1], event, team.id, now.isoformat(), sent_at, ], ) if is_ee_enabled() and settings.LOG_TO_WAL: # log the event to kafka write ahead log for processing log_event( distinct_id=distinct_id, ip=get_ip_address(request), site_url=request.build_absolute_uri("/")[:-1], data=event, team_id=team.id, now=now, sent_at=sent_at, ) timer.stop("event_endpoint") return cors_response(request, JsonResponse({"status": 1}))
def test_alias_merge_properties(self) -> None: with freeze_time("2020-01-04T13:01:01Z"): Person.objects.create( distinct_ids=["old_distinct_id"], team_id=self.team.pk, properties={ "key_on_both": "old value both", "key_on_old": "old value" }, ) Person.objects.create( distinct_ids=["new_distinct_id"], team_id=self.team.pk, properties={ "key_on_both": "new value both", "key_on_new": "new value" }, ) with freeze_time("2020-01-04T16:01:01Z"): process_event( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) events = get_events() self.assertEqual(len(events), 1) distinct_ids = [ item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk) ] self.assertEqual(sorted(distinct_ids), sorted(["old_distinct_id", "new_distinct_id"])) ch_client.execute("OPTIMIZE TABLE person") ch_client.execute("OPTIMIZE TABLE persons_up_to_date") persons = get_persons(team_id=self.team.pk) self.assertEqual( persons[0]["properties"], { "key_on_both": "new value both", "key_on_new": "new value", "key_on_old": "old value", }, )
def test_alias_twice(self) -> None: Person.objects.create(team_id=self.team.pk, distinct_ids=["old_distinct_id"]) process_event( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) Person.objects.create(team_id=self.team.pk, distinct_ids=["old_distinct_id_2"]) process_event( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id_2", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) process_event_ee( "new_distinct_id", "", "", { "event": "$create_alias", "properties": { "distinct_id": "new_distinct_id", "token": self.team.api_token, "alias": "old_distinct_id_2", }, }, self.team.pk, now().isoformat(), now().isoformat(), ) distinct_ids = [ item["distinct_id"] for item in get_person_distinct_ids(team_id=self.team.pk) ] events = get_events() self.assertEqual(len(events), 2) self.assertEqual( sorted(distinct_ids), sorted(["old_distinct_id", "new_distinct_id", "old_distinct_id_2"]), )