Пример #1
0
def test_feature_importance():
    num_significant_features = 6
    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    pipeline_instance_id = ION1.PIPELINE_INST_ID_1
    set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, model_id=ION1.MODEL_ID)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost",
                                                     mlops_port="3456", token="")
    rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)
    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID), json=test_model_stats)
        m.get(rest_helper.url_get_uuid("model"), json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"})
        m.post(rest_helper.url_post_stat(pipeline_instance_id), json={})

        # Test Python channel
        mlops.init(ctx=None, mlops_mode=MLOpsMode.AGENT)
        published_model = mlops.Model(name="dtr_mlops_model",
                                      model_format=ModelFormat.SPARKML,
                                      description="model of decision tree regression with explainability")
        published_model.feature_importance(model=FinalModel, feature_names=FinalModel.feature_names,
                                           num_significant_features=num_significant_features)
        mlops.done()
Пример #2
0
def test_publish_model_api():
    pm.init(ctx=None, mlops_mode=MLOpsMode.STAND_ALONE)

    model_data = "MODEL_DATA"
    annotation = {"a": "b"}
    model = pm.Model(name="my model",
                     model_format=ModelFormat.TEXT,
                     description="test model")

    model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4()))
    f = open(model_file, 'w')
    f.write(model_data)
    f.close()

    model.set_model_path(model_file)
    model.set_annotations(annotation)

    model_id = pm.publish_model(model)
    assert (model_id == model.get_id())
    os.remove(model_file)

    model_df = pm.get_model_by_id(model_id, download=True)

    pm.done()

    # accessing 0th row, 'data' column of returned model dataframe
    print(model_df.iloc[0])
    assert (model_data == model_df.iloc[0]['data'])
    assert (annotation == model_df.iloc[0]['annotations'])
Пример #3
0
def gen_data_dist_stats(spark_ctx):

    spark_session = SparkSession(spark_ctx)

    # Import Data
    ##################################
    K = 3  # fixed number of centers
    num_attr = 10  # fixed number of attributes
    num_rows = 60000  # number of rows in the dataset
    input_data = generate_dataset(num_attr, num_rows, K, spark_ctx)

    column_names_all = input_data.columns
    for col_index in range(0, len(column_names_all)):
        input_data = input_data.withColumnRenamed(column_names_all[col_index],
                                                  'c' + str(col_index))

    input_data = input_data.cache()

    input_train = input_data

    # SparkML pipeline
    ##################################
    exclude_cols = []
    column_names = input_train.columns
    input_col_names = []
    for elmts in column_names:
        ind = True
        for excludes in exclude_cols:
            if elmts == excludes:
                ind = False
        if ind:
            input_col_names.append(elmts)
    print(input_col_names)

    vector_assembler = VectorAssembler(inputCols=input_col_names,
                                       outputCol="features")

    kmeans_pipe = KMeans(k=K,
                         initMode="k-means||",
                         initSteps=5,
                         tol=1e-4,
                         maxIter=100,
                         featuresCol="features")
    full_pipe = [vector_assembler, kmeans_pipe]
    model_kmeans = Pipeline(stages=full_pipe).fit(input_train)

    try:
        mlops.set_data_distribution_stat(data=input_train, model=model_kmeans)
        m = mlops.Model(model_format=ModelFormat.SPARKML)
        m.set_data_distribution_stat(data=input_train)
        print("PM: done generating histogram")
    except Exception as e:
        print("PM: failed to generate histogram using pm.stat")
        print(e)

    # Indicating that model statistics were reported
    mlops.set_stat(E2EConstants.MODEL_STATS_REPORTED_STAT_NAME, 1)
    return model_kmeans
Пример #4
0
def main():
    mlops.init()

    options = parse_args()

    # Save the model
    s = "Hello World"
    f = tempfile.NamedTemporaryFile()
    pickle.dump(s, f)
    f.flush()

    m = mlops.Model(name=options.model_name,
                    model_format=ModelFormat.BINARY,
                    description=options.model_description)
    m.set_annotations({"aaa": "my annotations"})
    m.set_model_path(f.name)
    mlops.publish_model(m)
    mlops.set_stat("model post time, minute", dt.now().minute)
    mlops.set_stat("posted model size", m.metadata.size)
    mlops.done()
Пример #5
0
def test_stats_model():
    # Publishing stats with model

    model = mlops.Model(model_format=ModelFormat.SPARKML)
    # Adding multiple points (to see a graph in the ui), expecting each run to generate 8 points
    model.set_stat("model_stat1", 1.0)
    model.set_stat("model_stat1", 3.0)
    model.set_stat("model_stat1", 4.0)
    model.set_stat("model_stat1", 5.0)
    model.set_stat("model_stat1", 6.0)
    model.set_stat("model_stat1", 2.0)
    model.set_stat("model_stat1", 7.0)
    model.set_stat("model_stat1", 8.0)

    # Multi line graphs
    stat_name = "model_stat-multi-line-test"
    data = [[5, 15, 20], [55, 155, 255], [75, 175, 275]]
    columns = ["a", "b", "c"]

    for row in data:
        mlt = MultiLineGraph().name(stat_name).labels(columns).data(row)
        model.set_stat(mlt)

    # KPI stats
    sec_now = int(time.time())
    kpi_window_start = sec_now
    val = 3.56
    nr_kpi_point = 10
    kpi_name = "model_test-kpi-1"
    kpi_window_end = kpi_window_start
    for i in range(nr_kpi_point):
        model.set_kpi(kpi_name, val, kpi_window_end, KpiValue.TIME_SEC)
        kpi_window_end += 1
        val += 1

    # opaque stats
    obj = {"1": "aaaaaa", "2": 33}

    stat_name = "opq-1"
    opq = Opaque().name(stat_name).data(obj)
    model.set_stat(opq)