Exemplo n.º 1
0
def test_preprocess_recording_event_creates_chunks():
    events = [
        {
            "event": "$snapshot",
            "properties": {"$session_id": "1234", "$snapshot_data": {"type": 2, "foo": "bar"}, "distinct_id": "abc123"},
        },
        {
            "event": "$snapshot",
            "properties": {"$session_id": "1234", "$snapshot_data": {"type": 1, "foo": "bar"}, "distinct_id": "abc123"},
        },
        {
            "event": "$snapshot",
            "properties": {"$session_id": "5678", "$snapshot_data": {"type": 1, "foo": "bar"}, "distinct_id": "abc123"},
        },
    ]

    preprocessed = preprocess_session_recording_events(events)
    assert preprocessed != events
    assert len(preprocessed) == 2
    for result, expected_session_id in zip(preprocessed, ["1234", "5678"]):
        assert result["event"] == "$snapshot"
        assert result["properties"]["$session_id"] == expected_session_id
        assert result["properties"]["distinct_id"] == "abc123"
        assert "chunk_id" in result["properties"]["$snapshot_data"]
        assert result["event"] == "$snapshot"

    # it does not rechunk already chunked events
    assert preprocess_session_recording_events(preprocessed) == preprocessed
def test_preprocess_recording_event_creates_chunks_split_by_session_and_window_id():
    events = [
        {
            "event": "$snapshot",
            "properties": {"$session_id": "1234", "$snapshot_data": {"type": 2, "foo": "bar"}, "distinct_id": "abc123"},
        },
        {
            "event": "$snapshot",
            "properties": {"$session_id": "1234", "$snapshot_data": {"type": 1, "foo": "bar"}, "distinct_id": "abc123"},
        },
        {
            "event": "$snapshot",
            "properties": {
                "$session_id": "5678",
                "$window_id": "1",
                "$snapshot_data": {"type": 1, "foo": "bar"},
                "distinct_id": "abc123",
            },
        },
        {
            "event": "$snapshot",
            "properties": {
                "$session_id": "5678",
                "$window_id": "2",
                "$snapshot_data": {"type": 1, "foo": "bar"},
                "distinct_id": "abc123",
            },
        },
    ]

    preprocessed = preprocess_session_recording_events(events)
    assert preprocessed != events
    assert len(preprocessed) == 3
    expected_session_ids = ["1234", "5678", "5678"]
    expected_window_ids = [None, "1", "2"]
    for index, result in enumerate(preprocessed):
        assert result["event"] == "$snapshot"
        assert result["properties"]["$session_id"] == expected_session_ids[index]
        assert result["properties"].get("$window_id") == expected_window_ids[index]
        assert result["properties"]["distinct_id"] == "abc123"
        assert "chunk_id" in result["properties"]["$snapshot_data"]
        assert result["event"] == "$snapshot"

    # it does not rechunk already chunked events
    assert preprocess_session_recording_events(preprocessed) == preprocessed
Exemplo n.º 3
0
def get_event(request):
    timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX,))
    timer.start()
    now = timezone.now()
    try:
        data = load_data_from_request(request)
    except RequestParsingError as error:
        capture_exception(error)  # We still capture this on Sentry to identify actual potential bugs
        return cors_response(
            request, generate_exception_response(f"Malformed request data: {error}", code="invalid_payload"),
        )
    if not data:
        return cors_response(
            request,
            generate_exception_response(
                "No data found. Make sure to use a POST request when sending the payload in the body of the request.",
                code="no_data",
            ),
        )

    sent_at = _get_sent_at(data, request)

    token = _get_token(data, request)

    if not token:
        return cors_response(
            request,
            generate_exception_response(
                "API key not provided. You can find your project API key in PostHog project settings.",
                type="authentication_error",
                code="missing_api_key",
                status_code=status.HTTP_401_UNAUTHORIZED,
            ),
        )

    team = Team.objects.get_team_from_token(token)

    if team is None:
        try:
            project_id = _get_project_id(data, request)
        except ValueError:
            return cors_response(
                request, generate_exception_response("Invalid Project ID.", code="invalid_project", attr="project_id"),
            )
        if not project_id:
            return cors_response(
                request,
                generate_exception_response(
                    "Project API key invalid. You can find your project API key in PostHog project settings.",
                    type="authentication_error",
                    code="invalid_api_key",
                    status_code=status.HTTP_401_UNAUTHORIZED,
                ),
            )
        user = User.objects.get_from_personal_api_key(token)
        if user is None:
            return cors_response(
                request,
                generate_exception_response(
                    "Invalid Personal API key.",
                    type="authentication_error",
                    code="invalid_personal_api_key",
                    status_code=status.HTTP_401_UNAUTHORIZED,
                ),
            )
        team = user.teams.get(id=project_id)

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    try:
        events = preprocess_session_recording_events(events)
    except ValueError as e:
        return cors_response(request, generate_exception_response(f"Invalid payload: {e}", code="invalid_payload"))

    for event in events:
        try:
            distinct_id = _get_distinct_id(event)
        except KeyError:
            return cors_response(
                request,
                generate_exception_response(
                    "You need to set user distinct ID field `distinct_id`.", code="required", attr="distinct_id"
                ),
            )
        if not event.get("event"):
            return cors_response(
                request,
                generate_exception_response(
                    "You need to set user event name, field `event`.", code="required", attr="event"
                ),
            )

        if not event.get("properties"):
            event["properties"] = {}

        _ensure_web_feature_flags_in_properties(event, team, distinct_id)

        event_uuid = UUIDT()
        ip = None if team.anonymize_ips else get_ip_address(request)

        if is_ee_enabled():
            log_topics = [KAFKA_EVENTS_WAL]

            if settings.PLUGIN_SERVER_INGESTION:
                log_topics.append(KAFKA_EVENTS_PLUGIN_INGESTION)
                statsd.Counter("%s_posthog_cloud_plugin_server_ingestion" % (settings.STATSD_PREFIX,)).increment()

            log_event(
                distinct_id=distinct_id,
                ip=ip,
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
                event_uuid=event_uuid,
                topics=log_topics,
            )

            # must done after logging because process_event_ee modifies the event, e.g. by removing $elements
            if not settings.PLUGIN_SERVER_INGESTION:
                process_event_ee(
                    distinct_id=distinct_id,
                    ip=ip,
                    site_url=request.build_absolute_uri("/")[:-1],
                    data=event,
                    team_id=team.id,
                    now=now,
                    sent_at=sent_at,
                    event_uuid=event_uuid,
                )
        else:
            task_name = "posthog.tasks.process_event.process_event_with_plugins"
            celery_queue = settings.PLUGINS_CELERY_QUEUE
            celery_app.send_task(
                name=task_name,
                queue=celery_queue,
                args=[distinct_id, ip, request.build_absolute_uri("/")[:-1], event, team.id, now.isoformat(), sent_at,],
            )
    timer.stop("event_endpoint")
    return cors_response(request, JsonResponse({"status": 1}))
Exemplo n.º 4
0
def get_event(request):
    timer = statsd.timer("posthog_cloud_event_endpoint").start()
    now = timezone.now()
    try:
        data = load_data_from_request(request)
    except RequestParsingError as error:
        capture_exception(
            error
        )  # We still capture this on Sentry to identify actual potential bugs
        return cors_response(
            request,
            generate_exception_response("capture",
                                        f"Malformed request data: {error}",
                                        code="invalid_payload"),
        )
    if not data:
        return cors_response(
            request,
            generate_exception_response(
                "capture",
                "No data found. Make sure to use a POST request when sending the payload in the body of the request.",
                code="no_data",
            ),
        )

    sent_at = _get_sent_at(data, request)

    token = _get_token(data, request)

    if not token:
        return cors_response(
            request,
            generate_exception_response(
                "capture",
                "API key not provided. You can find your project API key in PostHog project settings.",
                type="authentication_error",
                code="missing_api_key",
                status_code=status.HTTP_401_UNAUTHORIZED,
            ),
        )

    token, is_test_environment = _clean_token(token)
    assert token is not None

    team = Team.objects.get_team_from_token(token)

    if team is None:
        try:
            project_id = _get_project_id(data, request)
        except ValueError:
            return cors_response(
                request,
                generate_exception_response("capture",
                                            "Invalid Project ID.",
                                            code="invalid_project",
                                            attr="project_id"),
            )
        if not project_id:
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "Project API key invalid. You can find your project API key in PostHog project settings.",
                    type="authentication_error",
                    code="invalid_api_key",
                    status_code=status.HTTP_401_UNAUTHORIZED,
                ),
            )
        user = User.objects.get_from_personal_api_key(token)
        if user is None:
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "Invalid Personal API key.",
                    type="authentication_error",
                    code="invalid_personal_api_key",
                    status_code=status.HTTP_401_UNAUTHORIZED,
                ),
            )
        team = user.teams.get(id=project_id)

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    try:
        events = preprocess_session_recording_events(events)
    except ValueError as e:
        return cors_response(
            request,
            generate_exception_response("capture",
                                        f"Invalid payload: {e}",
                                        code="invalid_payload"))

    for event in events:
        try:
            distinct_id = _get_distinct_id(event)
        except KeyError:
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "You need to set user distinct ID field `distinct_id`.",
                    code="required",
                    attr="distinct_id",
                ),
            )
        except ValueError:
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "Distinct ID field `distinct_id` must have a non-empty value.",
                    code="required",
                    attr="distinct_id",
                ),
            )
        if not event.get("event"):
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "You need to set user event name, field `event`.",
                    code="required",
                    attr="event"),
            )

        site_url = request.build_absolute_uri("/")[:-1]
        ip = None if team.anonymize_ips else get_ip_address(request)

        if not event.get("properties"):
            event["properties"] = {}

        # Support test_[apiKey] for users with multiple environments
        if event["properties"].get(
                "$environment") is None and is_test_environment:
            event["properties"]["$environment"] = ENVIRONMENT_TEST

        _ensure_web_feature_flags_in_properties(event, team, distinct_id)

        statsd.incr("posthog_cloud_plugin_server_ingestion")
        capture_internal(event, distinct_id, ip, site_url, now, sent_at,
                         team.pk)

    timer.stop()
    statsd.incr(f"posthog_cloud_raw_endpoint_success",
                tags={
                    "endpoint": "capture",
                })
    return cors_response(request, JsonResponse({"status": 1}))
Exemplo n.º 5
0
def get_event(request):
    timer = statsd.Timer("%s_posthog_cloud" % (settings.STATSD_PREFIX, ))
    timer.start()
    now = timezone.now()
    try:
        data_from_request = load_data_from_request(request)
        data = data_from_request["data"]
    except TypeError:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "Malformed request data. Make sure you're sending valid JSON.",
                },
                status=400,
            ),
        )
    if not data:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "No data found. Make sure to use a POST request when sending the payload in the body of the request.",
                },
                status=400,
            ),
        )
    sent_at = _get_sent_at(data, request)

    token = _get_token(data, request)

    if not token:
        return cors_response(
            request,
            JsonResponse(
                {
                    "code":
                    "validation",
                    "message":
                    "API key not provided. You can find your project API key in PostHog project settings.",
                },
                status=401,
            ),
        )
    team = Team.objects.get_team_from_token(token)

    if team is None:
        try:
            project_id = _get_project_id(data, request)
        except:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "Invalid project ID.",
                    },
                    status=400,
                ),
            )
        if not project_id:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code":
                        "validation",
                        "message":
                        "Project API key invalid. You can find your project API key in PostHog project settings.",
                    },
                    status=401,
                ),
            )
        user = User.objects.get_from_personal_api_key(token)
        if user is None:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "Personal API key invalid.",
                    },
                    status=401,
                ),
            )
        team = user.teams.get(id=project_id)

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    events = preprocess_session_recording_events(events)

    for event in events:
        try:
            distinct_id = _get_distinct_id(event)
        except KeyError:
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message":
                        "You need to set user distinct ID field `distinct_id`.",
                        "item": event,
                    },
                    status=400,
                ),
            )
        if not event.get("event"):
            return cors_response(
                request,
                JsonResponse(
                    {
                        "code": "validation",
                        "message": "You need to set event name field `event`.",
                        "item": event,
                    },
                    status=400,
                ),
            )

        if not event.get("properties"):
            event["properties"] = {}

        _ensure_web_feature_flags_in_properties(event, team, distinct_id)

        event_uuid = UUIDT()
        ip = None if team.anonymize_ips else get_ip_address(request)

        if is_ee_enabled():
            log_topics = [KAFKA_EVENTS_WAL]

            if settings.PLUGIN_SERVER_INGESTION:
                log_topics.append(KAFKA_EVENTS_PLUGIN_INGESTION)
                statsd.Counter("%s_posthog_cloud_plugin_server_ingestion" %
                               (settings.STATSD_PREFIX, )).increment()

            log_event(
                distinct_id=distinct_id,
                ip=ip,
                site_url=request.build_absolute_uri("/")[:-1],
                data=event,
                team_id=team.id,
                now=now,
                sent_at=sent_at,
                event_uuid=event_uuid,
                topics=log_topics,
            )

            # must done after logging because process_event_ee modifies the event, e.g. by removing $elements
            if not settings.PLUGIN_SERVER_INGESTION:
                process_event_ee(
                    distinct_id=distinct_id,
                    ip=ip,
                    site_url=request.build_absolute_uri("/")[:-1],
                    data=event,
                    team_id=team.id,
                    now=now,
                    sent_at=sent_at,
                    event_uuid=event_uuid,
                )
        else:
            task_name = "posthog.tasks.process_event.process_event"
            if settings.PLUGIN_SERVER_INGESTION or team.plugins_opt_in:
                task_name += "_with_plugins"
                celery_queue = settings.PLUGINS_CELERY_QUEUE
            else:
                celery_queue = settings.CELERY_DEFAULT_QUEUE

            celery_app.send_task(
                name=task_name,
                queue=celery_queue,
                args=[
                    distinct_id,
                    ip,
                    request.build_absolute_uri("/")[:-1],
                    event,
                    team.id,
                    now.isoformat(),
                    sent_at,
                ],
            )
    timer.stop("event_endpoint")
    return cors_response(request, JsonResponse({"status": 1}))
Exemplo n.º 6
0
def get_event(request):
    timer = statsd.timer("posthog_cloud_event_endpoint").start()
    now = timezone.now()

    data, error_response = get_data(request)

    if error_response:
        return error_response

    sent_at = _get_sent_at(data, request)

    token = get_token(data, request)

    if not token:
        return cors_response(
            request,
            generate_exception_response(
                "capture",
                "API key not provided. You can find your project API key in PostHog project settings.",
                type="authentication_error",
                code="missing_api_key",
                status_code=status.HTTP_401_UNAUTHORIZED,
            ),
        )

    ingestion_context, db_error, error_response = get_event_ingestion_context(request, data, token)

    if error_response:
        return error_response

    send_events_to_dead_letter_queue = False
    if db_error:
        send_events_to_dead_letter_queue = True

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    try:
        events = preprocess_session_recording_events(events)
    except ValueError as e:
        return cors_response(
            request, generate_exception_response("capture", f"Invalid payload: {e}", code="invalid_payload")
        )

    site_url = request.build_absolute_uri("/")[:-1]

    ip = None if not ingestion_context or ingestion_context.anonymize_ips else get_ip_address(request)
    for event in events:
        event_uuid = UUIDT()
        distinct_id = get_distinct_id(event)
        if not distinct_id:
            continue

        payload_uuid = event.get("uuid", None)
        if payload_uuid:
            if UUIDT.is_valid_uuid(payload_uuid):
                event_uuid = UUIDT(uuid_str=payload_uuid)
            else:
                statsd.incr("invalid_event_uuid")

        event = parse_event(event, distinct_id, ingestion_context)
        if not event:
            continue

        if send_events_to_dead_letter_queue:
            kafka_event = parse_kafka_event_data(
                distinct_id=distinct_id,
                ip=None,
                site_url=site_url,
                team_id=None,
                now=now,
                event_uuid=event_uuid,
                data=event,
                sent_at=sent_at,
            )

            log_event_to_dead_letter_queue(
                data,
                event["event"],
                kafka_event,
                f"Unable to fetch team from Postgres. Error: {db_error}",
                "django_server_capture_endpoint",
            )
            continue

        try:
            capture_internal(event, distinct_id, ip, site_url, now, sent_at, ingestion_context.team_id, event_uuid)  # type: ignore
        except Exception as e:
            timer.stop()
            capture_exception(e, {"data": data})
            statsd.incr(
                "posthog_cloud_raw_endpoint_failure", tags={"endpoint": "capture",},
            )
            return cors_response(
                request,
                generate_exception_response(
                    "capture",
                    "Unable to store event. Please try again. If you are the owner of this app you can check the logs for further details.",
                    code="server_error",
                    type="server_error",
                    status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
                ),
            )

    timer.stop()
    statsd.incr(
        "posthog_cloud_raw_endpoint_success", tags={"endpoint": "capture",},
    )
    return cors_response(request, JsonResponse({"status": 1}))
Exemplo n.º 7
0
def test_preprocess_with_no_recordings():
    events = [{"event": "$pageview"}, {"event": "$pageleave"}]
    assert preprocess_session_recording_events(events) == events
Exemplo n.º 8
0
def get_event(request):
    timer = statsd.timer("posthog_cloud_event_endpoint").start()
    now = timezone.now()

    data, error_response = get_data(request)

    if error_response:
        return error_response

    sent_at = _get_sent_at(data, request)

    token = get_token(data, request)

    if not token:
        return cors_response(
            request,
            generate_exception_response(
                "capture",
                "API key not provided. You can find your project API key in PostHog project settings.",
                type="authentication_error",
                code="missing_api_key",
                status_code=status.HTTP_401_UNAUTHORIZED,
            ),
        )

    team, db_error, error_response = get_team(request, data, token)

    if error_response:
        return error_response

    send_events_to_dead_letter_queue = False
    if db_error and is_clickhouse_enabled():
        send_events_to_dead_letter_queue = True

    if isinstance(data, dict):
        if data.get("batch"):  # posthog-python and posthog-ruby
            data = data["batch"]
            assert data is not None
        elif "engage" in request.path_info:  # JS identify call
            data["event"] = "$identify"  # make sure it has an event name

    if isinstance(data, list):
        events = data
    else:
        events = [data]

    try:
        events = preprocess_session_recording_events(events)
    except ValueError as e:
        return cors_response(
            request,
            generate_exception_response("capture",
                                        f"Invalid payload: {e}",
                                        code="invalid_payload"))

    site_url = request.build_absolute_uri("/")[:-1]

    ip = None if not team or team.anonymize_ips else get_ip_address(request)
    for event in events:
        event_uuid = UUIDT()
        distinct_id = get_distinct_id(event)
        if not distinct_id:
            continue

        event = parse_event(event, distinct_id, team)
        if not event:
            continue

        if send_events_to_dead_letter_queue:
            kafka_event = parse_kafka_event_data(
                distinct_id=distinct_id,
                ip=None,
                site_url=site_url,
                team_id=None,
                now=now,
                event_uuid=event_uuid,
                data=event,
                sent_at=sent_at,
            )

            log_event_to_dead_letter_queue(
                data,
                event["event"],
                kafka_event,
                f"Unable to fetch team from Postgres. Error: {db_error}",
                "django_server_capture_endpoint",
            )
            continue

        statsd.incr("posthog_cloud_plugin_server_ingestion")
        capture_internal(event, distinct_id, ip, site_url, now, sent_at,
                         team.pk, event_uuid)  # type: ignore

    timer.stop()
    statsd.incr(
        f"posthog_cloud_raw_endpoint_success",
        tags={
            "endpoint": "capture",
        },
    )
    return cors_response(request, JsonResponse({"status": 1}))