def cleanup_endpoints(db: Session, client: TestClient): v3io = get_v3io_client() frames = get_frames_client(container="projects") try: all_records = v3io.kv.new_cursor( container=config.model_endpoint_monitoring_container, table_path=ENDPOINTS_TABLE_PATH, raise_for_status=RaiseForStatus.never, ).all() all_records = [r["__name"] for r in all_records] # Cleanup KV for record in all_records: get_v3io_client().kv.delete( container=config.model_endpoint_monitoring_container, table_path=ENDPOINTS_TABLE_PATH, key=record, raise_for_status=RaiseForStatus.never, ) except RuntimeError: pass try: # Cleanup TSDB frames.delete( backend="tsdb", table=ENDPOINT_EVENTS_TABLE_PATH, if_missing=fpb2.IGNORE, ) except CreateError: pass
def do(self, event: Dict): get_v3io_client().kv.update( container=self.container, table_path=self.table, key=event[ENDPOINT_ID], attributes=event, ) return event
def cleanup_endpoints(db: Session, client: TestClient): v3io = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key()) frames = get_frames_client( token=_get_access_key(), container="projects", address=config.v3io_framesd, ) try: all_records = v3io.kv.new_cursor( container="projects", table_path=f"test/{ENDPOINTS_TABLE_PATH}", raise_for_status=RaiseForStatus.never, ).all() all_records = [r["__name"] for r in all_records] # Cleanup KV for record in all_records: v3io.kv.delete( container="projects", table_path=f"test/{ENDPOINTS_TABLE_PATH}", key=record, raise_for_status=RaiseForStatus.never, ) except RuntimeError: pass try: # Cleanup TSDB frames.delete( backend="tsdb", table=f"test/{ENDPOINT_EVENTS_TABLE_PATH}", if_missing=fpb2.IGNORE, ) except CreateError: pass
def clear_endpoint_record(project: str, endpoint_id: str): """ Clears endpoint record from KV by endpoint_id """ _verify_endpoint(project, endpoint_id) logger.info("Clearing model endpoint table", endpoint_id=endpoint_id) get_v3io_client().kv.delete( container=config.httpdb.model_endpoint_monitoring.container, table_path=ENDPOINTS_TABLE_PATH, key=endpoint_id, ) logger.info("Model endpoint table deleted", endpoint_id=endpoint_id) return Response(status_code=HTTPStatus.NO_CONTENT.value)
def delete_endpoint_record(access_key: str, project: str, endpoint_id: str): """ Deletes the KV record of a given model endpoint, project and endpoint_id are used for lookup :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param endpoint_id: The id of the endpoint """ logger.info("Clearing model endpoint table", endpoint_id=endpoint_id) client = get_v3io_client(endpoint=config.v3io_api) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=ENDPOINTS) _, container, path = parse_model_endpoint_store_prefix(path) client.kv.delete( container=container, table_path=path, key=endpoint_id, access_key=access_key, ) logger.info("Model endpoint table cleared", endpoint_id=endpoint_id)
def delete_endpoint_record( self, auth_info: mlrun.api.schemas.AuthInfo, project: str, endpoint_id: str, access_key: str, ): """ Deletes the KV record of a given model endpoint, project and endpoint_id are used for lookup :param auth_info: The required auth information for doing the deletion :param project: The name of the project :param endpoint_id: The id of the endpoint :param access_key: access key with permission to delete """ logger.info("Clearing model endpoint table", endpoint_id=endpoint_id) client = get_v3io_client(endpoint=config.v3io_api) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS ) _, container, path = parse_model_endpoint_store_prefix(path) client.kv.delete( container=container, table_path=path, key=endpoint_id, access_key=access_key, ) logger.info("Model endpoint table cleared", endpoint_id=endpoint_id)
def _write_endpoint_to_kv(endpoint: ModelEndpoint): endpoint_id = get_endpoint_id(endpoint) get_v3io_client().kv.put( container=config.model_endpoint_monitoring_container, table_path=ENDPOINTS_TABLE_PATH, key=endpoint_id, attributes={ "project": endpoint.metadata.project, "function": endpoint.spec.function, "model": endpoint.spec.model, "tag": endpoint.metadata.tag, "model_class": endpoint.spec.model_class, "labels": json.dumps(endpoint.metadata.labels), **{f"_{k}": v for k, v in endpoint.metadata.labels.items()}, }, )
def write_endpoint_to_kv( self, access_key: str, endpoint: ModelEndpoint, update: bool = True ): """ Writes endpoint data to KV, a prerequisite for initializing the monitoring process :param access_key: V3IO access key for managing user permissions :param endpoint: ModelEndpoint object :param update: When True, use client.kv.update, otherwise use client.kv.put """ labels = endpoint.metadata.labels or {} searchable_labels = {f"_{k}": v for k, v in labels.items()} if labels else {} feature_names = endpoint.spec.feature_names or [] label_names = endpoint.spec.label_names or [] feature_stats = endpoint.status.feature_stats or {} current_stats = endpoint.status.current_stats or {} children = endpoint.status.children or [] monitor_configuration = endpoint.spec.monitor_configuration or {} client = get_v3io_client(endpoint=config.v3io_api) function = client.kv.update if update else client.kv.put path = config.model_endpoint_monitoring.store_prefixes.default.format( project=endpoint.metadata.project, kind=self.ENDPOINTS ) _, container, path = parse_model_endpoint_store_prefix(path) function( container=container, table_path=path, key=endpoint.metadata.uid, access_key=access_key, attributes={ "endpoint_id": endpoint.metadata.uid, "project": endpoint.metadata.project, "function_uri": endpoint.spec.function_uri, "model": endpoint.spec.model, "model_class": endpoint.spec.model_class or "", "labels": json.dumps(labels), "model_uri": endpoint.spec.model_uri or "", "stream_path": endpoint.spec.stream_path or "", "active": endpoint.spec.active or "", "state": endpoint.status.state or "", "feature_stats": json.dumps(feature_stats), "current_stats": json.dumps(current_stats), "feature_names": json.dumps(feature_names), "children": json.dumps(children), "label_names": json.dumps(label_names), "monitor_configuration": json.dumps(monitor_configuration), **searchable_labels, }, ) return endpoint
def _get_endpoint_kv_record_by_id( endpoint_id: str, attribute_names: Optional[List[str]] = None) -> Dict[str, Any]: endpoint = (get_v3io_client().kv.get( container=config.httpdb.model_endpoint_monitoring.container, table_path=ENDPOINTS_TABLE_PATH, key=endpoint_id, attribute_names=attribute_names or "*", raise_for_status=RaiseForStatus.never, ).output.item) return endpoint
def cleanup_endpoints(db: Session, client: TestClient): if not _is_env_params_dont_exist(): kv_path = config.model_endpoint_monitoring.store_prefixes.default.format( project=TEST_PROJECT, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS, ) _, kv_container, kv_path = parse_model_endpoint_store_prefix(kv_path) tsdb_path = config.model_endpoint_monitoring.store_prefixes.default.format( project=TEST_PROJECT, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.EVENTS, ) _, tsdb_container, tsdb_path = parse_model_endpoint_store_prefix( tsdb_path) v3io = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key()) frames = get_frames_client( token=_get_access_key(), container=tsdb_container, address=config.v3io_framesd, ) try: all_records = v3io.kv.new_cursor( container=kv_container, table_path=kv_path, raise_for_status=RaiseForStatus.never, ).all() all_records = [r["__name"] for r in all_records] # Cleanup KV for record in all_records: v3io.kv.delete( container=kv_container, table_path=kv_path, key=record, raise_for_status=RaiseForStatus.never, ) except RuntimeError: pass try: # Cleanup TSDB frames.delete( backend="tsdb", table=tsdb_path, if_missing=fpb2.IGNORE, ) except CreateError: pass
def cleanup_endpoints(self): db = mlrun.get_run_db() endpoints = db.list_model_endpoints(self.project_name) for endpoint in endpoints.endpoints: db.delete_model_endpoint_record( endpoint.metadata.project, endpoint.metadata.uid ) v3io = get_v3io_client( endpoint=config.v3io_api, access_key=self._get_auth_info().data_session ) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=self.project_name, kind=mlrun.api.crud.ModelEndpoints().ENDPOINTS ) _, container, path = parse_model_endpoint_store_prefix(path) frames = get_frames_client( token=self._get_auth_info().data_session, container=container, address=config.v3io_framesd, ) try: all_records = v3io.kv.new_cursor( container=container, table_path=path, raise_for_status=RaiseForStatus.never, ).all() all_records = [r["__name"] for r in all_records] # Cleanup KV for record in all_records: v3io.kv.delete( container=container, table_path=path, key=record, raise_for_status=RaiseForStatus.never, ) except RuntimeError: pass try: # Cleanup TSDB frames.delete( backend="tsdb", table=path, if_missing=fpb2.IGNORE, ) except CreateError: pass
def get_endpoint_record(kv_container: str, kv_path: str, endpoint_id: str) -> Optional[dict]: logger.info( f"Grabbing endpoint data", endpoint_id=endpoint_id, table_path=kv_path, ) try: endpoint_record = (get_v3io_client().kv.get( container=kv_container, table_path=kv_path, key=endpoint_id, ).output.item) return endpoint_record except Exception: return None
def _write_endpoint_to_kv(endpoint: ModelEndpoint): client = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key()) client.kv.put( container="projects", table_path=f"{endpoint.metadata.project}/{ENDPOINTS_TABLE_PATH}/", key=endpoint.id, attributes={ "project": endpoint.metadata.project, "function": endpoint.spec.function, "model": endpoint.spec.model, "tag": endpoint.metadata.tag, "model_class": endpoint.spec.model_class, "labels": json.dumps(endpoint.metadata.labels), **{f"_{k}": v for k, v in endpoint.metadata.labels.items()}, }, )
async def write_endpoint_to_kv( access_key: str, endpoint: ModelEndpoint, update: bool = True ): """ Writes endpoint data to KV, a prerequisite for initializing the monitoring process :param access_key: V3IO access key for managing user permissions :param endpoint: ModelEndpoint object :param update: When True, use client.kv.update, otherwise use client.kv.put """ labels = endpoint.metadata.labels or {} searchable_labels = {f"_{k}": v for k, v in labels.items()} if labels else {} feature_names = endpoint.spec.feature_names or [] feature_stats = endpoint.status.feature_stats or {} current_stats = endpoint.status.current_stats or {} monitor_configuration = endpoint.spec.monitor_configuration or {} client = get_v3io_client(endpoint=config.v3io_api) function = client.kv.update if update else client.kv.put await run_in_threadpool( function, container=config.model_endpoint_monitoring.container, table_path=f"{endpoint.metadata.project}/{ENDPOINTS_TABLE_PATH}", key=endpoint.metadata.uid, access_key=access_key, attributes={ "endpoint_id": endpoint.metadata.uid, "project": endpoint.metadata.project, "function_uri": endpoint.spec.function_uri, "model": endpoint.spec.model, "model_class": endpoint.spec.model_class or "", "labels": json.dumps(labels), "model_uri": endpoint.spec.model_uri or "", "stream_path": endpoint.spec.stream_path or "", "active": endpoint.spec.active or "", "state": endpoint.status.state or "", "feature_stats": json.dumps(feature_stats), "current_stats": json.dumps(current_stats), "feature_names": json.dumps(feature_names), "monitor_configuration": json.dumps(monitor_configuration), **searchable_labels, }, ) return endpoint
def get_endpoint_kv_record_by_id( access_key: str, project: str, endpoint_id: str, attribute_names: Optional[List[str]] = None, ) -> Dict[str, Any]: client = get_v3io_client(endpoint=config.v3io_api) endpoint = client.kv.get( container=config.model_endpoint_monitoring.container, table_path=f"{project}/{ENDPOINTS_TABLE_PATH}", key=endpoint_id, access_key=access_key, attribute_names=attribute_names or "*", raise_for_status=RaiseForStatus.never, ).output.item return endpoint
def __init__(self, context: MLClientCtx, project: str): self.context = context self.project = project self.virtual_drift = VirtualDrift(inf_capping=10) template = config.model_endpoint_monitoring.store_prefixes.default self.parquet_path = template.format(project=self.project, kind="parquet") kv_path = template.format(project=self.project, kind="endpoints") _, self.kv_container, self.kv_path = parse_model_endpoint_store_prefix( kv_path) tsdb_path = template.format(project=project, kind="events") _, self.tsdb_container, self.tsdb_path = parse_model_endpoint_store_prefix( tsdb_path) stream_path = template.format(project=self.project, kind="log_stream") _, self.stream_container, self.stream_path = parse_model_endpoint_store_prefix( stream_path) logger.info( "Initializing BatchProcessor", parquet_path=self.parquet_path, kv_container=self.kv_container, kv_path=self.kv_path, tsdb_container=self.tsdb_container, tsdb_path=self.tsdb_path, stream_container=self.stream_container, stream_path=self.stream_path, ) self.default_possible_drift_threshold = ( config.model_endpoint_monitoring.drift_thresholds.default. possible_drift) self.default_drift_detected_threshold = ( config.model_endpoint_monitoring.drift_thresholds.default. drift_detected) self.db = get_run_db() self.v3io = get_v3io_client() self.frames = get_frames_client(address=config.v3io_framesd, container=self.tsdb_container)
def get_endpoint_record(kv_container: str, kv_path: str, endpoint_id: str, access_key: str) -> Optional[dict]: logger.info( "Grabbing endpoint data", container=kv_container, table_path=kv_path, key=endpoint_id, ) try: endpoint_record = (get_v3io_client().kv.get( container=kv_container, table_path=kv_path, key=endpoint_id, access_key=access_key, raise_for_status=v3io.dataplane.RaiseForStatus.always, ).output.item) return endpoint_record except Exception: return None
def clear_endpoint_record(access_key: str, project: str, endpoint_id: str): """ Clears the KV data of a given model endpoint :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param endpoint_id: The id of the endpoint """ verify_endpoint(project, endpoint_id) logger.info("Clearing model endpoint table", endpoint_id=endpoint_id) client = get_v3io_client(endpoint=config.v3io_api) client.kv.delete( container=config.model_endpoint_monitoring.container, table_path=f"{project}/{ENDPOINTS_TABLE_PATH}", key=endpoint_id, access_key=access_key, ) logger.info("Model endpoint table deleted", endpoint_id=endpoint_id)
async def delete_endpoint_record(access_key: str, project: str, endpoint_id: str): """ Deletes the KV record of a given model endpoint, project nad endpoint_id are used for lookup :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param endpoint_id: The id of the endpoint """ logger.info("Clearing model endpoint table", endpoint_id=endpoint_id) client = get_v3io_client(endpoint=config.v3io_api) await run_in_threadpool( client.kv.delete, container=config.model_endpoint_monitoring.container, table_path=f"{project}/{ENDPOINTS_TABLE_PATH}", key=endpoint_id, access_key=access_key, ) logger.info("Model endpoint table cleared", endpoint_id=endpoint_id)
def cleanup_endpoints(db: Session, client: TestClient): # Do nothing unless its system test env if _is_env_params_dont_exist(): return v3io = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key()) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=TEST_PROJECT, kind=ENDPOINTS ) _, container, path = parse_model_endpoint_store_prefix(path) frames = get_frames_client( token=_get_access_key(), container=container, address=config.v3io_framesd, ) try: all_records = v3io.kv.new_cursor( container=container, table_path=path, raise_for_status=RaiseForStatus.never, ).all() all_records = [r["__name"] for r in all_records] # Cleanup KV for record in all_records: v3io.kv.delete( container=container, table_path=path, key=record, raise_for_status=RaiseForStatus.never, ) except RuntimeError: pass try: # Cleanup TSDB frames.delete( backend="tsdb", table=path, if_missing=fpb2.IGNORE, ) except CreateError: pass
def get_endpoint( access_key: str, project: str, endpoint_id: str, metrics: Optional[List[str]] = None, start: str = "now-1h", end: str = "now", feature_analysis: bool = False, ) -> ModelEndpoint: """ Returns a ModelEndpoint object with additional metrics and feature related data. :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param endpoint_id: The id of the model endpoint :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric' :param start: The start time of the metrics :param end: The end time of the metrics :param feature_analysis: When True, the base feature statistics and current feature statistics will be added to the output of the resulting object """ logger.info( "Getting model endpoint record from kv", endpoint_id=endpoint_id, ) client = get_v3io_client(endpoint=config.v3io_api) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=ENDPOINTS) _, container, path = parse_model_endpoint_store_prefix(path) endpoint = client.kv.get( container=container, table_path=path, key=endpoint_id, access_key=access_key, raise_for_status=RaiseForStatus.never, ) endpoint = endpoint.output.item if not endpoint: raise MLRunNotFoundError(f"Endpoint {endpoint_id} not found") labels = endpoint.get("labels") feature_names = endpoint.get("feature_names") feature_names = _json_loads_if_not_none(feature_names) label_names = endpoint.get("label_names") label_names = _json_loads_if_not_none(label_names) feature_stats = endpoint.get("feature_stats") feature_stats = _json_loads_if_not_none(feature_stats) current_stats = endpoint.get("current_stats") current_stats = _json_loads_if_not_none(current_stats) drift_measures = endpoint.get("drift_measures") drift_measures = _json_loads_if_not_none(drift_measures) monitor_configuration = endpoint.get("monitor_configuration") monitor_configuration = _json_loads_if_not_none(monitor_configuration) endpoint = ModelEndpoint( metadata=ModelEndpointMetadata( project=endpoint.get("project"), labels=_json_loads_if_not_none(labels), uid=endpoint_id, ), spec=ModelEndpointSpec( function_uri=endpoint.get("function_uri"), model=endpoint.get("model"), model_class=endpoint.get("model_class") or None, model_uri=endpoint.get("model_uri") or None, feature_names=feature_names or None, label_names=label_names or None, stream_path=endpoint.get("stream_path") or None, algorithm=endpoint.get("algorithm") or None, monitor_configuration=monitor_configuration or None, active=endpoint.get("active") or None, ), status=ModelEndpointStatus( state=endpoint.get("state") or None, feature_stats=feature_stats or None, current_stats=current_stats or None, first_request=endpoint.get("first_request") or None, last_request=endpoint.get("last_request") or None, accuracy=endpoint.get("accuracy") or None, error_count=endpoint.get("error_count") or None, drift_status=endpoint.get("drift_status") or None, ), ) if feature_analysis and feature_names: endpoint_features = get_endpoint_features( feature_names=feature_names, feature_stats=feature_stats, current_stats=current_stats, ) if endpoint_features: endpoint.status.features = endpoint_features endpoint.status.drift_measures = drift_measures if metrics: endpoint_metrics = get_endpoint_metrics( access_key=access_key, project=project, endpoint_id=endpoint_id, start=start, end=end, metrics=metrics, ) if endpoint_metrics: endpoint.status.metrics = endpoint_metrics return endpoint
def list_endpoints( access_key: str, project: str, model: Optional[str] = None, function: Optional[str] = None, labels: Optional[List[str]] = None, metrics: Optional[List[str]] = None, start: str = "now-1h", end: str = "now", ) -> ModelEndpointList: """ Returns a list of ModelEndpointState objects. Each object represents the current state of a model endpoint. This functions supports filtering by the following parameters: 1) model 2) function 3) labels By default, when no filters are applied, all available endpoints for the given project will be listed. In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based and depends on the 'start' and 'end' parameters. By default, when the metrics parameter is None, no metrics are added to the output of this function. :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param model: The name of the model to filter by :param function: The name of the function to filter by :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key") :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric' :param start: The start time of the metrics :param end: The end time of the metrics """ logger.info( "Listing endpoints", project=project, model=model, function=function, labels=labels, metrics=metrics, start=start, end=end, ) client = get_v3io_client(endpoint=config.v3io_api) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=ENDPOINTS) _, container, path = parse_model_endpoint_store_prefix(path) cursor = client.kv.new_cursor( container=container, table_path=path, access_key=access_key, filter_expression=build_kv_cursor_filter_expression( project, function, model, labels), attribute_names=["endpoint_id"], ) endpoint_list = ModelEndpointList(endpoints=[]) while True: item = cursor.next_item() if item is None: break endpoint_id = item["endpoint_id"] endpoint = ModelEndpoints.get_endpoint( access_key=access_key, project=project, endpoint_id=endpoint_id, metrics=metrics, start=start, end=end, ) endpoint_list.endpoints.append(endpoint) return endpoint_list
def list_endpoints( project: str, model: Optional[str] = Query(None), function: Optional[str] = Query(None), tag: Optional[str] = Query(None), labels: List[str] = Query([], alias="label"), start: str = Query(default="now-1h"), end: str = Query(default="now"), metrics: bool = Query(default=False), ): """ Returns a list of endpoints of type 'ModelEndpoint', supports filtering by model, function, tag and labels. Lables can be used to filter on the existance of a label: `api/projects/{project}/model-endpoints/?label=mylabel` Or on the value of a given label: `api/projects/{project}/model-endpoints/?label=mylabel=1` Multiple labels can be queried in a single request by either using `&` seperator: `api/projects/{project}/model-endpoints/?label=mylabel=1&label=myotherlabel=2` Or by using a `,` (comma) seperator: `api/projects/{project}/model-endpoints/?label=mylabel=1,myotherlabel=2` """ client = get_v3io_client() cursor = client.kv.new_cursor( container=config.httpdb.model_endpoint_monitoring.container, table_path=ENDPOINTS_TABLE_PATH, attribute_names=ENDPOINT_TABLE_ATTRIBUTES, filter_expression=_build_kv_cursor_filter_expression( project, function, model, tag, labels), ) endpoints = cursor.all() endpoint_state_list = [] for endpoint in endpoints: endpoint_metrics = None if metrics: endpoint_metrics = _get_endpoint_metrics( endpoint_id=get_endpoint_id(ModelEndpoint(**endpoint)), name=["predictions", "latency"], start=start, end=end, ) # Collect labels (by convention labels are labeled with underscore '_'), ignore builtin '__name' field state = ModelEndpointState( endpoint=ModelEndpoint( metadata=ModelEndpointMetadata( project=endpoint.get("project"), tag=endpoint.get("tag"), labels=json.loads(endpoint.get("labels")), ), spec=ModelEndpointSpec( model=endpoint.get("model"), function=endpoint.get("function"), model_class=endpoint.get("model_class"), ), status=ObjectStatus(state="active"), ), first_request=endpoint.get("first_request"), last_request=endpoint.get("last_request"), error_count=endpoint.get("error_count"), alert_count=endpoint.get("alert_count"), drift_status=endpoint.get("drift_status"), metrics=endpoint_metrics, ) endpoint_state_list.append(state) return ModelEndpointStateList(endpoints=endpoint_state_list)
def test_grafana_individual_feature_analysis(db: Session, client: TestClient): endpoint_data = { "timestamp": "2021-02-28 21:02:58.642108", "project": TEST_PROJECT, "model": "test-model", "function": "v2-model-server", "tag": "latest", "model_class": "ClassifierModel", "endpoint_id": "test.test_id", "labels": "null", "latency_avg_1s": 42427.0, "predictions_per_second_count_1s": 141, "first_request": "2021-02-28 21:02:58.642108", "last_request": "2021-02-28 21:02:58.642108", "error_count": 0, "feature_names": '["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]', "feature_stats": '{"sepal length (cm)": {"count": 30, "mean": 5.946666666666668, "std": 0.8394305678023165, "min": 4.7, "max": 7.9, "hist": [[4, 4, 4, 4, 4, 3, 4, 0, 3, 4, 1, 1, 2, 1, 0, 1, 0, 0, 1, 1], [4.7, 4.86, 5.0200000000000005, 5.18, 5.34, 5.5, 5.66, 5.82, 5.98, 6.140000000000001, 6.300000000000001, 6.46, 6.62, 6.78, 6.94, 7.1, 7.26, 7.42, 7.58, 7.74, 7.9]]}, "sepal width (cm)": {"count": 30, "mean": 3.119999999999999, "std": 0.4088672324766359, "min": 2.2, "max": 3.8, "hist": [[1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 3, 3, 2, 2, 0, 3, 1, 1, 0, 4], [2.2, 2.2800000000000002, 2.3600000000000003, 2.44, 2.52, 2.6, 2.68, 2.7600000000000002, 2.84, 2.92, 3, 3.08, 3.16, 3.24, 3.3200000000000003, 3.4, 3.48, 3.56, 3.6399999999999997, 3.7199999999999998, 3.8]]}, "petal length (cm)": {"count": 30, "mean": 3.863333333333333, "std": 1.8212317418360753, "min": 1.3, "max": 6.7, "hist": [[6, 6, 6, 6, 6, 6, 0, 0, 1, 2, 0, 3, 3, 2, 2, 3, 1, 1, 1, 1], [1.3, 1.57, 1.84, 2.1100000000000003, 2.38, 2.6500000000000004, 2.92, 3.1900000000000004, 3.46, 3.7300000000000004, 4, 4.2700000000000005, 4.54, 4.8100000000000005, 5.08, 5.3500000000000005, 5.62, 5.89, 6.16, 6.430000000000001, 6.7]]}, "petal width (cm)": {"count": 30, "mean": 1.2733333333333334, "std": 0.8291804567674381, "min": 0.1, "max": 2.5, "hist": [[5, 5, 5, 5, 5, 5, 0, 0, 1, 2, 3, 2, 1, 0, 2, 3, 1, 1, 0, 4], [0.1, 0.22, 0.33999999999999997, 0.45999999999999996, 0.58, 0.7, 0.82, 0.94, 1.06, 1.1800000000000002, 1.3, 1.42, 1.54, 1.6600000000000001, 1.78, 1.9, 2.02, 2.14, 2.2600000000000002, 2.38, 2.5]]}}', # noqa "current_stats": '{"petal length (cm)": {"count": 100.0, "mean": 2.861, "std": 1.4495485190537463, "min": 1.0, "max": 5.1, "hist": [[4, 20, 20, 4, 2, 0, 0, 0, 0, 1, 0, 2, 3, 2, 8, 7, 6, 10, 7, 4], [1.0, 1.205, 1.41, 1.615, 1.8199999999999998, 2.025, 2.23, 2.4349999999999996, 2.6399999999999997, 2.8449999999999998, 3.05, 3.255, 3.46, 3.665, 3.8699999999999997, 4.074999999999999, 4.279999999999999, 4.484999999999999, 4.6899999999999995, 4.895, 5.1]]}, "petal width (cm)": {"count": 100.0, "mean": 5.471000000000001, "std": 0.6416983463254116, "min": 4.3, "max": 7.0, "hist": [[4, 1, 6, 5, 5, 19, 4, 1, 13, 5, 7, 6, 4, 4, 5, 2, 1, 5, 1, 2], [4.3, 4.435, 4.57, 4.705, 4.84, 4.975, 5.109999999999999, 5.245, 5.38, 5.515, 5.65, 5.785, 5.92, 6.055, 6.1899999999999995, 6.325, 6.46, 6.595, 6.73, 6.865, 7.0]]}, "sepal length (cm)": {"count": 100.0, "mean": 0.7859999999999998, "std": 0.5651530587354012, "min": 0.1, "max": 1.8, "hist": [[5, 29, 7, 7, 1, 1, 0, 0, 0, 0, 7, 3, 5, 0, 13, 7, 10, 3, 1, 1], [0.1, 0.185, 0.27, 0.355, 0.43999999999999995, 0.5249999999999999, 0.61, 0.695, 0.7799999999999999, 0.8649999999999999, 0.9499999999999998, 1.035, 1.12, 1.205, 1.29, 1.375, 1.46, 1.545, 1.63, 1.7149999999999999, 1.8]]}, "sepal width (cm)": {"count": 100.0, "mean": 3.0989999999999998, "std": 0.4787388735948953, "min": 2.0, "max": 4.4, "hist": [[1, 2, 4, 3, 4, 8, 6, 8, 14, 7, 11, 10, 6, 3, 7, 2, 1, 1, 1, 1], [2.0, 2.12, 2.24, 2.3600000000000003, 2.48, 2.6, 2.72, 2.8400000000000003, 2.96, 3.08, 3.2, 3.3200000000000003, 3.4400000000000004, 3.5600000000000005, 3.6800000000000006, 3.8000000000000003, 3.9200000000000004, 4.040000000000001, 4.16, 4.28, 4.4]]}}', # noqa "drift_measures": '{"petal width (cm)": {"tvd": 0.4, "hellinger": 0.38143130942893605, "kld": 1.3765624725652992}, "tvd_sum": 1.755886699507389, "tvd_mean": 0.43897167487684724, "hellinger_sum": 1.7802062191831514, "hellinger_mean": 0.44505155479578784, "kld_sum": 9.133613874253776, "kld_mean": 2.283403468563444, "sepal width (cm)": {"tvd": 0.3551724137931034, "hellinger": 0.4024622641158891, "kld": 1.7123635755188409}, "petal length (cm)": {"tvd": 0.445, "hellinger": 0.39975075965755447, "kld": 1.6449612084377268}, "sepal length (cm)": {"tvd": 0.5557142857142856, "hellinger": 0.5965618859807716, "kld": 4.399726617731908}}', # noqa } v3io = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key()) v3io.kv.put( container="projects", table_path=f"{TEST_PROJECT}/model-endpoints/endpoints", key="test.test_id", attributes=endpoint_data, ) response = client.post( url="/api/grafana-proxy/model-endpoints/query", headers={"X-V3io-Session-Key": _get_access_key()}, json={ "targets": [{ "target": f"project={TEST_PROJECT};endpoint_id=test.test_id;target_endpoint=individual_feature_analysis" # noqa }] }, ) assert response.status_code == 200 response_json = response.json() assert len(response_json) == 1 assert "columns" in response_json[0] assert "rows" in response_json[0] assert len(response_json[0]["rows"]) == 4
def list_endpoints( self, auth_info: mlrun.api.schemas.AuthInfo, project: str, model: Optional[str] = None, function: Optional[str] = None, labels: Optional[List[str]] = None, metrics: Optional[List[str]] = None, start: str = "now-1h", end: str = "now", top_level: Optional[bool] = False, uids: Optional[List[str]] = None, ) -> ModelEndpointList: """ Returns a list of ModelEndpointState objects. Each object represents the current state of a model endpoint. This functions supports filtering by the following parameters: 1) model 2) function 3) labels 4) top level 5) uids By default, when no filters are applied, all available endpoints for the given project will be listed. In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based and depends on the 'start' and 'end' parameters. By default, when the metrics parameter is None, no metrics are added to the output of this function. :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param model: The name of the model to filter by :param function: The name of the function to filter by :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key") :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric' :param start: The start time of the metrics :param end: The end time of the metrics :param top_level: if True will return only routers and endpoint that are NOT children of any router :param uids: will return ModelEndpointList of endpoints with uid in uids """ logger.info( "Listing endpoints", project=project, model=model, function=function, labels=labels, metrics=metrics, start=start, end=end, top_level=top_level, uids=uids, ) endpoint_list = ModelEndpointList(endpoints=[]) if uids is None: client = get_v3io_client(endpoint=config.v3io_api) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS, ) _, container, path = parse_model_endpoint_store_prefix(path) cursor = client.kv.new_cursor( container=container, table_path=path, access_key=auth_info.data_session, filter_expression=self.build_kv_cursor_filter_expression( project, function, model, labels, top_level, ), attribute_names=["endpoint_id"], raise_for_status=RaiseForStatus.never, ) try: items = cursor.all() except Exception: return endpoint_list uids = [item["endpoint_id"] for item in items] for endpoint_id in uids: endpoint = self.get_endpoint( auth_info=auth_info, project=project, endpoint_id=endpoint_id, metrics=metrics, start=start, end=end, ) endpoint_list.endpoints.append(endpoint) return endpoint_list
def delete_model_endpoints_resources(self, project_name: str): auth_info = mlrun.api.schemas.AuthInfo( data_session=os.getenv("V3IO_ACCESS_KEY") ) access_key = auth_info.data_session # we would ideally base on config.v3io_api but can't for backwards compatibility reasons, # we're using the igz version heuristic if not config.igz_version or not config.v3io_api: return endpoints = self.list_endpoints(auth_info, project_name) for endpoint in endpoints.endpoints: self.delete_endpoint_record( auth_info, endpoint.metadata.project, endpoint.metadata.uid, access_key, ) v3io = get_v3io_client(endpoint=config.v3io_api, access_key=access_key) path = config.model_endpoint_monitoring.store_prefixes.default.format( project=project_name, kind=mlrun.api.schemas.ModelMonitoringStoreKinds.ENDPOINTS, ) tsdb_path = parse_model_endpoint_project_prefix(path, project_name) _, container, path = parse_model_endpoint_store_prefix(path) frames = get_frames_client( token=access_key, container=container, address=config.v3io_framesd, ) try: all_records = v3io.kv.new_cursor( container=container, table_path=path, raise_for_status=RaiseForStatus.never, access_key=access_key, ).all() all_records = [r["__name"] for r in all_records] # Cleanup KV for record in all_records: v3io.kv.delete( container=container, table_path=path, key=record, access_key=access_key, raise_for_status=RaiseForStatus.never, ) except RuntimeError as exc: # KV might raise an exception even it was set not raise one. exception is raised if path is empty or # not exist, therefore ignoring failures until they'll fix the bug. # TODO: remove try except after bug is fixed logger.debug( "Failed cleaning model endpoints KV. Ignoring", exc=str(exc), traceback=traceback.format_exc(), ) pass # Cleanup TSDB try: frames.delete( backend="tsdb", table=path, if_missing=frames_pb2.IGNORE, ) except CreateError: # frames might raise an exception if schema file does not exist. pass # final cleanup of tsdb path tsdb_path.replace("://u", ":///u") store, _ = mlrun.store_manager.get_or_create_store(tsdb_path) store.rm(tsdb_path, recursive=True)
async def test_grafana_overall_feature_analysis(db: Session, client: TestClient): endpoint_data = { "timestamp": "2021-02-28 21:02:58.642108", "project": "test", "model": "test-model", "function": "v2-model-server", "tag": "latest", "model_class": "ClassifierModel", "endpoint_id": "test.test_id", "labels": "null", "latency_avg_1s": 42427.0, "predictions_per_second_count_1s": 141, "first_request": "2021-02-28 21:02:58.642108", "last_request": "2021-02-28 21:02:58.642108", "error_count": 0, "feature_names": '["sepal length (cm)", "sepal width (cm)", "petal length (cm)", "petal width (cm)"]', "feature_stats": '{"sepal length (cm)": {"count": 30, "mean": 5.946666666666668, "std": 0.8394305678023165, "min": 4.7, "max": 7.9, "hist": [[4, 4, 4, 4, 4, 3, 4, 0, 3, 4, 1, 1, 2, 1, 0, 1, 0, 0, 1, 1], [4.7, 4.86, 5.0200000000000005, 5.18, 5.34, 5.5, 5.66, 5.82, 5.98, 6.140000000000001, 6.300000000000001, 6.46, 6.62, 6.78, 6.94, 7.1, 7.26, 7.42, 7.58, 7.74, 7.9]]}, "sepal width (cm)": {"count": 30, "mean": 3.119999999999999, "std": 0.4088672324766359, "min": 2.2, "max": 3.8, "hist": [[1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 3, 3, 2, 2, 0, 3, 1, 1, 0, 4], [2.2, 2.2800000000000002, 2.3600000000000003, 2.44, 2.52, 2.6, 2.68, 2.7600000000000002, 2.84, 2.92, 3, 3.08, 3.16, 3.24, 3.3200000000000003, 3.4, 3.48, 3.56, 3.6399999999999997, 3.7199999999999998, 3.8]]}, "petal length (cm)": {"count": 30, "mean": 3.863333333333333, "std": 1.8212317418360753, "min": 1.3, "max": 6.7, "hist": [[6, 6, 6, 6, 6, 6, 0, 0, 1, 2, 0, 3, 3, 2, 2, 3, 1, 1, 1, 1], [1.3, 1.57, 1.84, 2.1100000000000003, 2.38, 2.6500000000000004, 2.92, 3.1900000000000004, 3.46, 3.7300000000000004, 4, 4.2700000000000005, 4.54, 4.8100000000000005, 5.08, 5.3500000000000005, 5.62, 5.89, 6.16, 6.430000000000001, 6.7]]}, "petal width (cm)": {"count": 30, "mean": 1.2733333333333334, "std": 0.8291804567674381, "min": 0.1, "max": 2.5, "hist": [[5, 5, 5, 5, 5, 5, 0, 0, 1, 2, 3, 2, 1, 0, 2, 3, 1, 1, 0, 4], [0.1, 0.22, 0.33999999999999997, 0.45999999999999996, 0.58, 0.7, 0.82, 0.94, 1.06, 1.1800000000000002, 1.3, 1.42, 1.54, 1.6600000000000001, 1.78, 1.9, 2.02, 2.14, 2.2600000000000002, 2.38, 2.5]]}}', # noqa "drift_measures": '{"petal width (cm)": {"tvd": 0.4, "hellinger": 0.38143130942893605, "kld": 1.3765624725652992}, "tvd_sum": 1.755886699507389, "tvd_mean": 0.43897167487684724, "hellinger_sum": 1.7802062191831514, "hellinger_mean": 0.44505155479578784, "kld_sum": 9.133613874253776, "kld_mean": 2.283403468563444, "sepal width (cm)": {"tvd": 0.3551724137931034, "hellinger": 0.4024622641158891, "kld": 1.7123635755188409}, "petal length (cm)": {"tvd": 0.445, "hellinger": 0.39975075965755447, "kld": 1.6449612084377268}, "sepal length (cm)": {"tvd": 0.5557142857142856, "hellinger": 0.5965618859807716, "kld": 4.399726617731908}}', # noqa } v3io = get_v3io_client(endpoint=config.v3io_api, access_key=_get_access_key()) await run_in_threadpool( v3io.kv.put, container="projects", table_path="test/model-endpoints/endpoints", key="test.test_id", attributes=endpoint_data, ) response = await run_in_threadpool( client.post, url="/api/grafana-proxy/model-endpoints/query", headers={"X-V3io-Session-Key": _get_access_key()}, json={ "targets": [{ "target": "project=test;endpoint_id=test.test_id;target_endpoint=overall_feature_analysis" }] }, ) assert response.status_code == 200 response_json = response.json() assert len(response_json) == 1 assert "columns" in response_json[0] assert "rows" in response_json[0] assert len(response_json[0]["rows"][0]) == 6
def list_endpoints( access_key: str, project: str, model: Optional[str] = None, function: Optional[str] = None, tag: Optional[str] = None, labels: Optional[List[str]] = None, metrics: Optional[List[str]] = None, start: str = "now-1h", end: str = "now", ) -> List[ModelEndpointState]: """ Returns a list of `ModelEndpointState` objects. Each object represents the current state of a model endpoint. This functions supports filtering by the following parameters: 1) model 2) function 3) tag 4) labels By default, when no filters are applied, all available endpoints for the given project will be listed. In addition, this functions provides a facade for listing endpoint related metrics. This facade is time-based and depends on the 'start' and 'end' parameters. By default, when the metrics parameter is None, no metrics are added to the output of this function. :param access_key: V3IO access key for managing user permissions :param project: The name of the project :param model: The name of the model to filter by :param function: The name of the function to filter by :param tag: A tag to filter by :param labels: A list of labels to filter by. Label filters work by either filtering a specific value of a label (i.e. list("key==value")) or by looking for the existence of a given key (i.e. "key") :param metrics: A list of metrics to return for each endpoint, read more in 'TimeMetric' :param start: The start time of the metrics :param end: The end time of the metrics """ client = get_v3io_client(endpoint=config.v3io_api) cursor = client.kv.new_cursor( container=config.model_endpoint_monitoring.container, table_path=f"{project}/{ENDPOINTS_TABLE_PATH}", access_key=access_key, attribute_names=ENDPOINT_TABLE_ATTRIBUTES, filter_expression=_build_kv_cursor_filter_expression( project, function, model, tag, labels ), ) endpoints = cursor.all() endpoint_state_list = [] for endpoint in endpoints: endpoint_metrics = {} if metrics: endpoint_metrics = _get_endpoint_metrics( access_key=access_key, project=project, endpoint_id=endpoint.get("id"), name=metrics, start=start, end=end, ) # Collect labels (by convention labels are labeled with underscore '_'), ignore builtin '__name' field state = ModelEndpointState( endpoint=ModelEndpoint( metadata=ModelEndpointMetadata( project=endpoint.get("project"), tag=endpoint.get("tag"), labels=json.loads(endpoint.get("labels")), ), spec=ModelEndpointSpec( model=endpoint.get("model"), function=endpoint.get("function"), model_class=endpoint.get("model_class"), ), status=ObjectStatus(state="active"), ), first_request=endpoint.get("first_request"), last_request=endpoint.get("last_request"), error_count=endpoint.get("error_count"), drift_status=endpoint.get("drift_status"), metrics=endpoint_metrics, ) endpoint_state_list.append(state) return endpoint_state_list
def do(self, event: Dict): endpoint_id = event[ENDPOINT_ID] if endpoint_id not in self.feature_names: endpoint_record = get_endpoint_record( kv_container=self.kv_container, kv_path=self.kv_path, endpoint_id=endpoint_id, access_key=self.access_key, ) feature_names = endpoint_record.get(FEATURE_NAMES) feature_names = json.loads( feature_names) if feature_names else None label_columns = endpoint_record.get(LABEL_COLUMNS) label_columns = json.loads( label_columns) if label_columns else None if not feature_names and self._infer_columns_from_data: feature_names = self._infer_feature_names_from_data(event) if not feature_names: logger.warn( "Feature names are not initialized, they will be automatically generated", endpoint_id=endpoint_id, ) feature_names = [ f"f{i}" for i, _ in enumerate(event[FEATURES]) ] get_v3io_client().kv.update( container=self.kv_container, table_path=self.kv_path, access_key=self.access_key, key=event[ENDPOINT_ID], attributes={FEATURE_NAMES: json.dumps(feature_names)}, raise_for_status=RaiseForStatus.always, ) if not label_columns and self._infer_columns_from_data: label_columns = self._infer_label_columns_from_data(event) if not label_columns: logger.warn( "label column names are not initialized, they will be automatically generated", endpoint_id=endpoint_id, ) label_columns = [ f"p{i}" for i, _ in enumerate(event[PREDICTION]) ] get_v3io_client().kv.update( container=self.kv_container, table_path=self.kv_path, access_key=self.access_key, key=event[ENDPOINT_ID], attributes={LABEL_COLUMNS: json.dumps(label_columns)}, raise_for_status=RaiseForStatus.always, ) self.label_columns[endpoint_id] = label_columns self.feature_names[endpoint_id] = feature_names logger.info("Label columns", endpoint_id=endpoint_id, label_columns=label_columns) logger.info("Feature names", endpoint_id=endpoint_id, feature_names=feature_names) feature_names = self.feature_names[endpoint_id] features = event[FEATURES] event[NAMED_FEATURES] = { name: feature for name, feature in zip(feature_names, features) } label_columns = self.label_columns[endpoint_id] prediction = event[PREDICTION] event[NAMED_PREDICTIONS] = { name: prediction for name, prediction in zip(label_columns, prediction) } logger.info("Mapped event", event=event) return event
def do(self, event: Dict): endpoint_id = event[ENDPOINT_ID] if endpoint_id not in self.feature_names: endpoint_record = get_endpoint_record( kv_container=self.kv_container, kv_path=self.kv_path, endpoint_id=endpoint_id, ) feature_names = endpoint_record.get(FEATURE_NAMES) feature_names = json.loads( feature_names) if feature_names else None label_columns = endpoint_record.get(LABEL_COLUMNS) label_columns = json.loads( label_columns) if label_columns else None if not feature_names: logger.warn( f"Feature names are not initialized, they will be automatically generated", endpoint_id=endpoint_id, ) feature_names = [ f"f{i}" for i, _ in enumerate(event[FEATURES]) ] get_v3io_client().kv.update( container=self.kv_container, table_path=self.kv_path, key=event[ENDPOINT_ID], attributes={FEATURE_NAMES: json.dumps(feature_names)}, ) if not label_columns: logger.warn( f"label column names are not initialized, they will be automatically generated", endpoint_id=endpoint_id, ) label_columns = [ f"p{i}" for i, _ in enumerate(event[PREDICTION]) ] get_v3io_client().kv.update( container=self.kv_container, table_path=self.kv_path, key=event[ENDPOINT_ID], attributes={LABEL_COLUMNS: json.dumps(label_columns)}, ) self.label_columns[endpoint_id] = label_columns self.feature_names[endpoint_id] = feature_names feature_names = self.feature_names[endpoint_id] features = event[FEATURES] event[NAMED_FEATURES] = { name: feature for name, feature in zip(feature_names, features) } label_columns = self.label_columns[endpoint_id] prediction = event[PREDICTION] event[NAMED_PREDICTIONS] = { name: prediction for name, prediction in zip(label_columns, prediction) } return event