示例#1
0
def grafana_incoming_features(
    body: Dict[str, Any],
    query_parameters: Dict[str, str],
    auth_info: mlrun.api.schemas.AuthInfo,
):
    endpoint_id = query_parameters.get("endpoint_id")
    project = query_parameters.get("project")
    start = body.get("rangeRaw", {}).get("from", "now-1h")
    end = body.get("rangeRaw", {}).get("to", "now")

    mlrun.api.utils.clients.opa.Client().query_project_resource_permissions(
        mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint,
        project,
        endpoint_id,
        mlrun.api.schemas.AuthorizationAction.read,
        auth_info,
    )

    endpoint = mlrun.api.crud.ModelEndpoints().get_endpoint(
        auth_info=auth_info, project=project, endpoint_id=endpoint_id)

    time_series = []

    feature_names = endpoint.spec.feature_names

    if not feature_names:
        logger.warn(
            "'feature_names' is either missing or not initialized in endpoint record",
            endpoint_id=endpoint.metadata.uid,
        )
        return time_series

    path = config.model_endpoint_monitoring.store_prefixes.default.format(
        project=project, kind=mlrun.api.crud.ModelEndpoints().EVENTS)
    _, container, path = parse_model_endpoint_store_prefix(path)

    client = get_frames_client(
        token=auth_info.data_session,
        address=config.v3io_framesd,
        container=container,
    )

    data: pd.DataFrame = client.read(
        backend="tsdb",
        table=path,
        columns=feature_names,
        filter=f"endpoint_id=='{endpoint_id}'",
        start=start,
        end=end,
    )

    data.drop(["endpoint_id"], axis=1, inplace=True, errors="ignore")
    data.index = data.index.astype(np.int64) // 10**6

    for feature, indexed_values in data.to_dict().items():
        target = GrafanaTimeSeriesTarget(target=feature)
        for index, value in indexed_values.items():
            data_point = GrafanaDataPoint(value=float(value), timestamp=index)
            target.add_data_point(data_point)
        time_series.append(target)

    return time_series
示例#2
0
async def test_get_endpoint_metrics(db: Session, client: TestClient):
    frames = get_frames_client(
        token=_get_access_key(),
        container="projects",
        address=config.v3io_framesd,
    )

    start = datetime.utcnow()

    for i in range(5):
        endpoint = _mock_random_endpoint()
        await write_endpoint_to_kv(_get_access_key(), endpoint)
        await run_in_threadpool(
            frames.create,
            backend="tsdb",
            table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}",
            rate="10/m",
            if_exists=1,
        )

        total = 0

        dfs = []

        for i in range(10):
            count = randint(1, 10)
            total += count
            data = {
                "predictions_per_second_count_1s": count,
                "endpoint_id": endpoint.metadata.uid,
                "timestamp": start - timedelta(minutes=10 - i),
            }
            df = pd.DataFrame(data=[data])
            dfs.append(df)

        await run_in_threadpool(
            frames.write,
            backend="tsdb",
            table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}",
            dfs=dfs,
            index_cols=["timestamp", "endpoint_id"],
        )

        response = await run_in_threadpool(
            client.get,
            url=
            f"/api/projects/test/model-endpoints/{endpoint.metadata.uid}?metric=predictions_per_second_count_1s",
            headers={"X-V3io-Session-Key": _get_access_key()},
        )

        endpoint = ModelEndpoint(**response.json())

        assert len(endpoint.status.metrics) > 0

        predictions_per_second = endpoint.status.metrics[
            "predictions_per_second_count_1s"]

        assert predictions_per_second.name == "predictions_per_second_count_1s"

        response_total = sum((m[1] for m in predictions_per_second.values))

        assert total == response_total
示例#3
0
    def __init__(
        self,
        context: MLClientCtx,
        project: str,
        model_monitoring_access_key: str,
        v3io_access_key: str,
    ):
        self.context = context
        self.project = project

        self.v3io_access_key = v3io_access_key
        self.model_monitoring_access_key = (
            model_monitoring_access_key or v3io_access_key
        )

        self.virtual_drift = VirtualDrift(inf_capping=10)

        template = config.model_endpoint_monitoring.store_prefixes.default

        kv_path = template.format(project=self.project, kind="endpoints")
        _, self.kv_container, self.kv_path = parse_model_endpoint_store_prefix(kv_path)

        tsdb_path = template.format(project=project, kind="events")
        _, self.tsdb_container, self.tsdb_path = parse_model_endpoint_store_prefix(
            tsdb_path
        )

        stream_path = template.format(project=self.project, kind="log_stream")
        _, self.stream_container, self.stream_path = parse_model_endpoint_store_prefix(
            stream_path
        )

        self.parquet_path = config.model_endpoint_monitoring.store_prefixes.user_space.format(
            project=project, kind="parquet"
        )

        logger.info(
            "Initializing BatchProcessor",
            project=project,
            model_monitoring_access_key_initalized=bool(model_monitoring_access_key),
            v3io_access_key_initialized=bool(v3io_access_key),
            parquet_path=self.parquet_path,
            kv_container=self.kv_container,
            kv_path=self.kv_path,
            tsdb_container=self.tsdb_container,
            tsdb_path=self.tsdb_path,
            stream_container=self.stream_container,
            stream_path=self.stream_path,
        )

        self.default_possible_drift_threshold = (
            config.model_endpoint_monitoring.drift_thresholds.default.possible_drift
        )
        self.default_drift_detected_threshold = (
            config.model_endpoint_monitoring.drift_thresholds.default.drift_detected
        )

        self.db = get_run_db()
        self.v3io = get_v3io_client(access_key=self.v3io_access_key)
        self.frames = get_frames_client(
            address=config.v3io_framesd,
            container=self.tsdb_container,
            token=self.v3io_access_key,
        )
示例#4
0
async def test_grafana_incoming_features(db: Session, client: TestClient):
    frames = get_frames_client(
        token=_get_access_key(),
        container="projects",
        address=config.v3io_framesd,
    )

    await run_in_threadpool(
        frames.create,
        backend="tsdb",
        table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}",
        rate="10/m",
        if_exists=1,
    )

    start = datetime.utcnow()
    endpoints = [_mock_random_endpoint() for _ in range(5)]
    for e in endpoints:
        e.spec.feature_names = ["f0", "f1", "f2", "f3"]

    for endpoint in endpoints:
        await ModelEndpoints.create_or_patch(_get_access_key(), endpoint)

        total = 0

        dfs = []

        for i in range(10):
            count = randint(1, 10)
            total += count
            data = {
                "f0": i,
                "f1": i + 1,
                "f2": i + 2,
                "f3": i + 3,
                "endpoint_id": endpoint.metadata.uid,
                "timestamp": start - timedelta(minutes=10 - i),
            }
            df = pd.DataFrame(data=[data])
            dfs.append(df)

        await run_in_threadpool(
            frames.write,
            backend="tsdb",
            table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}",
            dfs=dfs,
            index_cols=["timestamp", "endpoint_id"],
        )

    for endpoint in endpoints:
        response = await run_in_threadpool(
            client.post,
            url="/api/grafana-proxy/model-endpoints/query",
            headers={"X-V3io-Session-Key": _get_access_key()},
            json={
                "targets": [{
                    "target":
                    f"project=test;endpoint_id={endpoint.metadata.uid};target_endpoint=incoming_features"
                }]
            },
        )
        response = response.json()
        targets = [t["target"] for t in response]
        assert targets == ["f0", "f1", "f2", "f3"]

        lens = [t["datapoints"] for t in response]
        assert all(map(lambda l: len(l) == 10, lens))
示例#5
0
    def test_get_endpoint_metrics(self):
        auth_info = self._get_auth_info()
        access_key = auth_info.data_session
        db = mlrun.get_run_db()

        path = config.model_endpoint_monitoring.store_prefixes.default.format(
            project=self.project_name, kind=mlrun.api.crud.ModelEndpoints().EVENTS
        )
        _, container, path = parse_model_endpoint_store_prefix(path)

        frames = get_frames_client(
            token=access_key, container=container, address=config.v3io_framesd,
        )

        start = datetime.utcnow()

        for i in range(5):
            endpoint = self._mock_random_endpoint()
            db.create_or_patch_model_endpoint(
                endpoint.metadata.project, endpoint.metadata.uid, endpoint
            )
            frames.create(backend="tsdb", table=path, rate="10/m", if_exists=1)

            total = 0

            dfs = []

            for j in range(10):
                count = randint(1, 10)
                total += count
                data = {
                    "predictions_per_second_count_1s": count,
                    "endpoint_id": endpoint.metadata.uid,
                    "timestamp": start - timedelta(minutes=10 - j),
                }
                df = pd.DataFrame(data=[data])
                dfs.append(df)

            frames.write(
                backend="tsdb",
                table=path,
                dfs=dfs,
                index_cols=["timestamp", "endpoint_id"],
            )

            endpoint = db.get_model_endpoint(
                self.project_name,
                endpoint.metadata.uid,
                metrics=["predictions_per_second_count_1s"],
            )
            assert len(endpoint.status.metrics) > 0

            predictions_per_second = endpoint.status.metrics[
                "predictions_per_second_count_1s"
            ]

            assert predictions_per_second.name == "predictions_per_second_count_1s"

            response_total = sum((m[1] for m in predictions_per_second.values))

            assert total == response_total