def test_feature_importance(): num_significant_features = 6 ion_instance_id = ION1.ION_INSTANCE_ID ion_node_id = ION1.NODE_1_ID pipeline_instance_id = ION1.PIPELINE_INST_ID_1 set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, model_id=ION1.MODEL_ID) rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost", mlops_port="3456", token="") rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX) with requests_mock.mock() as m: m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances) m.get(rest_helper.url_get_ees(), json=test_ee_info) m.get(rest_helper.url_get_agents(), json=test_agents_info) m.get(rest_helper.url_get_model_list(), json=test_models_info) m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info) m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID), json=test_model_stats) m.get(rest_helper.url_get_uuid("model"), json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"}) m.post(rest_helper.url_post_stat(pipeline_instance_id), json={}) # Test Python channel mlops.init(ctx=None, mlops_mode=MLOpsMode.AGENT) published_model = mlops.Model(name="dtr_mlops_model", model_format=ModelFormat.SPARKML, description="model of decision tree regression with explainability") published_model.feature_importance(model=FinalModel, feature_names=FinalModel.feature_names, num_significant_features=num_significant_features) mlops.done()
def test_publish_model_api(): pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE) model_data = "MODEL_DATA" annotation = {"a": "b"} model = pm.Model(name="my model", model_format=ModelFormat.TEXT, description="test model") model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4())) f = open(model_file, 'w') f.write(model_data) f.close() model.set_model_path(model_file) model.set_annotations(annotation) model_id = pm.publish_model(model) assert (model_id == model.get_id()) os.remove(model_file) model_df = pm.get_model_by_id(model_id, download=True) pm.done() # accessing 0th row, 'data' column of returned model dataframe print(model_df.iloc[0]) assert (model_data == model_df.iloc[0]['data']) assert (annotation == model_df.iloc[0]['annotations'])
def gen_data_dist_stats(spark_ctx): spark_session = SparkSession(spark_ctx) # Import Data ################################## K = 3 # fixed number of centers num_attr = 10 # fixed number of attributes num_rows = 60000 # number of rows in the dataset input_data = generate_dataset(num_attr, num_rows, K, spark_ctx) column_names_all = input_data.columns for col_index in range(0, len(column_names_all)): input_data = input_data.withColumnRenamed(column_names_all[col_index], 'c' + str(col_index)) input_data = input_data.cache() input_train = input_data # SparkML pipeline ################################## exclude_cols = [] column_names = input_train.columns input_col_names = [] for elmts in column_names: ind = True for excludes in exclude_cols: if elmts == excludes: ind = False if ind: input_col_names.append(elmts) print(input_col_names) vector_assembler = VectorAssembler(inputCols=input_col_names, outputCol="features") kmeans_pipe = KMeans(k=K, initMode="k-means||", initSteps=5, tol=1e-4, maxIter=100, featuresCol="features") full_pipe = [vector_assembler, kmeans_pipe] model_kmeans = Pipeline(stages=full_pipe).fit(input_train) try: mlops.set_data_distribution_stat(data=input_train, model=model_kmeans) m = mlops.Model(model_format=ModelFormat.SPARKML) m.set_data_distribution_stat(data=input_train) print("PM: done generating histogram") except Exception as e: print("PM: failed to generate histogram using pm.stat") print(e) # Indicating that model statistics were reported mlops.set_stat(E2EConstants.MODEL_STATS_REPORTED_STAT_NAME, 1) return model_kmeans
def main(): mlops.init() options = parse_args() # Save the model s = "Hello World" f = tempfile.NamedTemporaryFile() pickle.dump(s, f) f.flush() m = mlops.Model(name=options.model_name, model_format=ModelFormat.BINARY, description=options.model_description) m.set_annotations({"aaa": "my annotations"}) m.set_model_path(f.name) mlops.publish_model(m) mlops.set_stat("model post time, minute", dt.now().minute) mlops.set_stat("posted model size", m.metadata.size) mlops.done()
def test_stats_model(): # Publishing stats with model model = mlops.Model(model_format=ModelFormat.SPARKML) # Adding multiple points (to see a graph in the ui), expecting each run to generate 8 points model.set_stat("model_stat1", 1.0) model.set_stat("model_stat1", 3.0) model.set_stat("model_stat1", 4.0) model.set_stat("model_stat1", 5.0) model.set_stat("model_stat1", 6.0) model.set_stat("model_stat1", 2.0) model.set_stat("model_stat1", 7.0) model.set_stat("model_stat1", 8.0) # Multi line graphs stat_name = "model_stat-multi-line-test" data = [[5, 15, 20], [55, 155, 255], [75, 175, 275]] columns = ["a", "b", "c"] for row in data: mlt = MultiLineGraph().name(stat_name).labels(columns).data(row) model.set_stat(mlt) # KPI stats sec_now = int(time.time()) kpi_window_start = sec_now val = 3.56 nr_kpi_point = 10 kpi_name = "model_test-kpi-1" kpi_window_end = kpi_window_start for i in range(nr_kpi_point): model.set_kpi(kpi_name, val, kpi_window_end, KpiValue.TIME_SEC) kpi_window_end += 1 val += 1 # opaque stats obj = {"1": "aaaaaa", "2": 33} stat_name = "opq-1" opq = Opaque().name(stat_name).data(obj) model.set_stat(opq)