def grafana_incoming_features( body: Dict[str, Any], query_parameters: Dict[str, str], auth_info: mlrun.api.schemas.AuthInfo, ): endpoint_id = query_parameters.get("endpoint_id") project = query_parameters.get("project") start = body.get("rangeRaw", {}).get("from", "now-1h") end = body.get("rangeRaw", {}).get("to", "now") mlrun.api.utils.clients.opa.Client().query_project_resource_permissions( mlrun.api.schemas.AuthorizationResourceTypes.model_endpoint, project, endpoint_id, mlrun.api.schemas.AuthorizationAction.read, auth_info, ) endpoint = mlrun.api.crud.ModelEndpoints().get_endpoint( auth_info=auth_info, project=project, endpoint_id=endpoint_id) time_series = [] feature_names = endpoint.spec.feature_names if not feature_names: logger.warn( "'feature_names' is either missing or not initialized in endpoint record", endpoint_id=endpoint.metadata.uid, ) return time_series path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=mlrun.api.crud.ModelEndpoints().EVENTS) _, container, path = parse_model_endpoint_store_prefix(path) client = get_frames_client( token=auth_info.data_session, address=config.v3io_framesd, container=container, ) data: pd.DataFrame = client.read( backend="tsdb", table=path, columns=feature_names, filter=f"endpoint_id=='{endpoint_id}'", start=start, end=end, ) data.drop(["endpoint_id"], axis=1, inplace=True, errors="ignore") data.index = data.index.astype(np.int64) // 10**6 for feature, indexed_values in data.to_dict().items(): target = GrafanaTimeSeriesTarget(target=feature) for index, value in indexed_values.items(): data_point = GrafanaDataPoint(value=float(value), timestamp=index) target.add_data_point(data_point) time_series.append(target) return time_series
async def test_get_endpoint_metrics(db: Session, client: TestClient): frames = get_frames_client( token=_get_access_key(), container="projects", address=config.v3io_framesd, ) start = datetime.utcnow() for i in range(5): endpoint = _mock_random_endpoint() await write_endpoint_to_kv(_get_access_key(), endpoint) await run_in_threadpool( frames.create, backend="tsdb", table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}", rate="10/m", if_exists=1, ) total = 0 dfs = [] for i in range(10): count = randint(1, 10) total += count data = { "predictions_per_second_count_1s": count, "endpoint_id": endpoint.metadata.uid, "timestamp": start - timedelta(minutes=10 - i), } df = pd.DataFrame(data=[data]) dfs.append(df) await run_in_threadpool( frames.write, backend="tsdb", table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}", dfs=dfs, index_cols=["timestamp", "endpoint_id"], ) response = await run_in_threadpool( client.get, url= f"/api/projects/test/model-endpoints/{endpoint.metadata.uid}?metric=predictions_per_second_count_1s", headers={"X-V3io-Session-Key": _get_access_key()}, ) endpoint = ModelEndpoint(**response.json()) assert len(endpoint.status.metrics) > 0 predictions_per_second = endpoint.status.metrics[ "predictions_per_second_count_1s"] assert predictions_per_second.name == "predictions_per_second_count_1s" response_total = sum((m[1] for m in predictions_per_second.values)) assert total == response_total
def __init__( self, context: MLClientCtx, project: str, model_monitoring_access_key: str, v3io_access_key: str, ): self.context = context self.project = project self.v3io_access_key = v3io_access_key self.model_monitoring_access_key = ( model_monitoring_access_key or v3io_access_key ) self.virtual_drift = VirtualDrift(inf_capping=10) template = config.model_endpoint_monitoring.store_prefixes.default kv_path = template.format(project=self.project, kind="endpoints") _, self.kv_container, self.kv_path = parse_model_endpoint_store_prefix(kv_path) tsdb_path = template.format(project=project, kind="events") _, self.tsdb_container, self.tsdb_path = parse_model_endpoint_store_prefix( tsdb_path ) stream_path = template.format(project=self.project, kind="log_stream") _, self.stream_container, self.stream_path = parse_model_endpoint_store_prefix( stream_path ) self.parquet_path = config.model_endpoint_monitoring.store_prefixes.user_space.format( project=project, kind="parquet" ) logger.info( "Initializing BatchProcessor", project=project, model_monitoring_access_key_initalized=bool(model_monitoring_access_key), v3io_access_key_initialized=bool(v3io_access_key), parquet_path=self.parquet_path, kv_container=self.kv_container, kv_path=self.kv_path, tsdb_container=self.tsdb_container, tsdb_path=self.tsdb_path, stream_container=self.stream_container, stream_path=self.stream_path, ) self.default_possible_drift_threshold = ( config.model_endpoint_monitoring.drift_thresholds.default.possible_drift ) self.default_drift_detected_threshold = ( config.model_endpoint_monitoring.drift_thresholds.default.drift_detected ) self.db = get_run_db() self.v3io = get_v3io_client(access_key=self.v3io_access_key) self.frames = get_frames_client( address=config.v3io_framesd, container=self.tsdb_container, token=self.v3io_access_key, )
async def test_grafana_incoming_features(db: Session, client: TestClient): frames = get_frames_client( token=_get_access_key(), container="projects", address=config.v3io_framesd, ) await run_in_threadpool( frames.create, backend="tsdb", table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}", rate="10/m", if_exists=1, ) start = datetime.utcnow() endpoints = [_mock_random_endpoint() for _ in range(5)] for e in endpoints: e.spec.feature_names = ["f0", "f1", "f2", "f3"] for endpoint in endpoints: await ModelEndpoints.create_or_patch(_get_access_key(), endpoint) total = 0 dfs = [] for i in range(10): count = randint(1, 10) total += count data = { "f0": i, "f1": i + 1, "f2": i + 2, "f3": i + 3, "endpoint_id": endpoint.metadata.uid, "timestamp": start - timedelta(minutes=10 - i), } df = pd.DataFrame(data=[data]) dfs.append(df) await run_in_threadpool( frames.write, backend="tsdb", table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}", dfs=dfs, index_cols=["timestamp", "endpoint_id"], ) for endpoint in endpoints: response = await run_in_threadpool( client.post, url="/api/grafana-proxy/model-endpoints/query", headers={"X-V3io-Session-Key": _get_access_key()}, json={ "targets": [{ "target": f"project=test;endpoint_id={endpoint.metadata.uid};target_endpoint=incoming_features" }] }, ) response = response.json() targets = [t["target"] for t in response] assert targets == ["f0", "f1", "f2", "f3"] lens = [t["datapoints"] for t in response] assert all(map(lambda l: len(l) == 10, lens))
def test_get_endpoint_metrics(self): auth_info = self._get_auth_info() access_key = auth_info.data_session db = mlrun.get_run_db() path = config.model_endpoint_monitoring.store_prefixes.default.format( project=self.project_name, kind=mlrun.api.crud.ModelEndpoints().EVENTS ) _, container, path = parse_model_endpoint_store_prefix(path) frames = get_frames_client( token=access_key, container=container, address=config.v3io_framesd, ) start = datetime.utcnow() for i in range(5): endpoint = self._mock_random_endpoint() db.create_or_patch_model_endpoint( endpoint.metadata.project, endpoint.metadata.uid, endpoint ) frames.create(backend="tsdb", table=path, rate="10/m", if_exists=1) total = 0 dfs = [] for j in range(10): count = randint(1, 10) total += count data = { "predictions_per_second_count_1s": count, "endpoint_id": endpoint.metadata.uid, "timestamp": start - timedelta(minutes=10 - j), } df = pd.DataFrame(data=[data]) dfs.append(df) frames.write( backend="tsdb", table=path, dfs=dfs, index_cols=["timestamp", "endpoint_id"], ) endpoint = db.get_model_endpoint( self.project_name, endpoint.metadata.uid, metrics=["predictions_per_second_count_1s"], ) assert len(endpoint.status.metrics) > 0 predictions_per_second = endpoint.status.metrics[ "predictions_per_second_count_1s" ] assert predictions_per_second.name == "predictions_per_second_count_1s" response_total = sum((m[1] for m in predictions_per_second.values)) assert total == response_total