Пример #1
0
 def get_predictions(freq, ts_range):
   freq = int(freq)
   ts = pd.date_range(ts_range['$gt'], ts_range['$lte'], freq='H')
   current_timestamp_ms = int(round(time.time() * 1000))
   print(ts_range['$gt'], file=sys.stderr)
   #known_metrics = cdsw.read_metrics(model_deployment_crn, ts_range['$gt'].timestamp(), ts_range['$lte'].timestamp())
   known_metrics = cdsw.read_metrics(model_deployment_crn, 0, current_timestamp_ms)
   print(known_metrics, file=sys.stderr)
   df = pd.io.json.json_normalize(known_metrics["metrics"])
   return df
Пример #2
0
    def query_model_metrics(self, **kwargs):
        """
        Use the cdsw.read_metrics() functionality to query saved model metrics from the PostgresSQL database,
        and return details in formatted dataframe
        Query metrics for the model deployment saved in self.latest_deployment_details. Optionally, can pass
        additional arguments to indicate start/end timestamp.
        """

        ipt = {}
        ipt["model_deployment_crn"] = self.latest_deployment_details[
            "latest_deployment_crn"]

        if kwargs:
            ipt.update(kwargs)

        return self.format_model_metrics_query(cdsw.read_metrics(**ipt))
Пример #3
0
import cdsw

print(cdsw.read_metrics('mean_sq_err'))
API_KEY = os.getenv("CDSW_API_KEY")
PROJECT_NAME = os.getenv("CDSW_PROJECT")

cml = CMLBootstrap(HOST, USERNAME, API_KEY, PROJECT_NAME)

latest_model = cml.get_model({
    "id": model_id,
    "latestModelDeployment": True,
    "latestModelBuild": True
})

Model_CRN = latest_model["crn"]
Deployment_CRN = latest_model["latestModelDeployment"]["crn"]

# Read in the model metrics dict.
model_metrics = cdsw.read_metrics(model_crn=Model_CRN,
                                  model_deployment_crn=Deployment_CRN)

# This is a handy way to unravel the dict into a big pandas dataframe.
metrics_df = pd.io.json.json_normalize(
    model_metrics["metrics"])  # [metric_start_index:])
metrics_df.tail().T

# Do some conversions & calculations
metrics_df['startTimeStampMs'] = pd.to_datetime(metrics_df['startTimeStampMs'],
                                                unit='ms')
metrics_df['endTimeStampMs'] = pd.to_datetime(metrics_df['endTimeStampMs'],
                                              unit='ms')
metrics_df["processing_time"] = (
    metrics_df["endTimeStampMs"] -
    metrics_df["startTimeStampMs"]).dt.microseconds * 1000
Пример #5
0
from scipy.stats import chisquare

# Define our uqique model deployment id
model_deployment_crn = "crn:cdp:ml:us-west-1:12a0079b-1591-4ca0-b721-a446bda74e67:workspace:ec3efe6f-c4f5-4593-857b-a80698e4857e/d5c3fbbe-d604-4f3b-b98a-227ecbd741b4"

# Define our training distribution for
training_distribution_percent = pd.DataFrame({
    "Excellent": [0.50],
    "Poor": [0.50]
})
training_distribution_percent

current_timestamp_ms = int(round(time.time() * 1000))

known_metrics = cdsw.read_metrics(model_deployment_crn=model_deployment_crn,
                                  start_timestamp_ms=0,
                                  end_timestamp_ms=current_timestamp_ms)

df = pd.io.json.json_normalize(known_metrics["metrics"])

# Do some conversions & Calculations
df['startTimeStampMs'] = pd.to_datetime(df['startTimeStampMs'], unit='ms')
df['endTimeStampMs'] = pd.to_datetime(df['endTimeStampMs'], unit='ms')
df["processing_time"] = (df["endTimeStampMs"] -
                         df["startTimeStampMs"]).dt.microseconds * 1000

non_agg_metrics = df.dropna(subset=["metrics.prediction"])
non_agg_metrics.tail()

# Visualize the processing time
non_agg_metrics.plot(kind='line', x='predictionUuid', y='processing_time')
Пример #6
0
import cdsw, time, os
import random

import json

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import chisquare

# Define our uqique model deployment id
model_deployment_crn = "GET THIS FROM MODEL SCREEN IN CML"

current_timestamp_ms = int(round(time.time() * 1000))

known_metrics = cdsw.read_metrics(model_deployment_crn, 0,
                                  current_timestamp_ms)

df = pd.io.json.json_normalize(known_metrics["metrics"])
df

# Do some conversions & Calculations
df['startTimeStampMs'] = pd.to_datetime(df['startTimeStampMs'], unit='ms')
df['endTimeStampMs'] = pd.to_datetime(df['endTimeStampMs'], unit='ms')
df["processing_time"] = (df["endTimeStampMs"] -
                         df["startTimeStampMs"]).dt.microseconds * 1000

#df.plot(kind='line',x='endTimeStampMs',y='metrics.MonthlyCharges',color='red')

cdsw.track_aggregate_metrics({"accuracy": random.random()},
                             current_timestamp_ms,
                             current_timestamp_ms,
Пример #7
0
for i in range(len(score_x)):
    output = cdsw.call_model(model_access_key, {"petal_length": score_x[i][0]}, api_key)
    #Record the UUID of each prediction for correlation with ground truth.
    uuids.append(output["response"]["uuid"])
    predictions.append(output["response"]["prediction"])

    
print(output)
    
# Record the current time.
end_timestamp_ms=int(round(time.time() * 1000))

# We can now use the read_metrics function to read the metrics we just
# generated into the current session, by querying by time window.
data = cdsw.read_metrics(model_deployment_crn=model_deployment_crn,
            start_timestamp_ms=start_timestamp_ms,
            end_timestamp_ms=end_timestamp_ms)
data = data['metrics']

# Now, ground truth is known and we want to track the true value
# corresponding to each prediction above.
score_y = iris.data[:test_size, 3].reshape(-1, 1) # Observed petal width

# Track the true values alongside the corresponding predictions using
# track_delayed_metrics. At the same time, calculate the mean absolute
# prediction error.
mean_absolute_error = 0
n = len(score_y)
for i in range(n):
    ground_truth = score_x[i][0]
    cdsw.track_delayed_metrics({"actual_result":ground_truth}, uuids[i])