Пример #1
0
def test_create_model():
    with requests_mock.mock() as m:
        rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)

        model_id = "model_5906255e-0a3d-4fef-8653-8d41911264fb"
        m.get(rh.url_get_uuid("model"), json={"id": model_id})

        ion = ION()
        ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"
        mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

        model_data = "MODEL_DATA"
        model = mh.create_model(name="my model", model_format=ModelFormat.TEXT, description="test model")

        model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4()))
        f = open(model_file, 'w')
        f.write(model_data)
        f.close()

        model.set_model_path(model_file)

        assert model.get_id() == model_id
        os.remove(model_file)

        rh.done()
Пример #2
0
def test_publish_model_rest():
    with requests_mock.mock() as m:
        rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost", mlops_port="4567")

        model_id = "model_5906255e-0a3d-4fef-8653-8d41911264fb"

        m.post('http://localhost:4567/models', json=model_id)
        m.get(rh.url_get_uuid("model"), json={"id": model_id})

        ion = ION()
        ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"

        mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

        model_data = "MODEL_DATA"
        model = mh.create_model(name="my model", model_format=ModelFormat.TEXT, description="test model",
                      user_defined="whatever I want goes here")

        model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4()))
        f = open(model_file, 'w')
        f.write(model_data)
        f.close()

        model.set_model_path(model_file)

        my_id = mh.publish_model(model, None)
        os.remove(model_file)

        assert (model_id == my_id)

        rh.done()
Пример #3
0
    def __init__(self, config, mode=None):
        """
        Perform initialization of the MLOpsCtx.
        It expects configuration from the environment to arrive in the standard usage mode.
        :param config: :class:`ConfigInfo` for this MLOps instantiation
        :param mode: python or pyspark
        :return:
        : raises MLOpsException for invalid configurations
        """
        super(MLOpsCtx, self).__init__(__name__)

        self._info("MLOpsCtx __init__ called")
        self._info("Config\n{}".format(config))
        self._agent_list = []
        self._rest_helper = None

        self._ci = config
        self._mode = mode

        self._ion = None  # Will contain an ION class corresponding to the active ION which this mlops is part of
        self._ees_dict = {}  # Will contain all ees defined with the agent inside
        self._agents_dict = {}
        self._rest_helper = MlOpsRestFactory().get_rest_helper(self._mode, self._ci.mlops_server, self._ci.mlops_port, self._ci.token)

        if self._mode == MLOpsMode.AGENT or self._mode == MLOpsMode.REST_ACCUMULATOR:
            # In agent mode, we talk with the agent and use the mlops prefix to the http requests
            self._validate_config()
            self._info("Agent mode")

            self._rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)

            json_dict = self._detect_ion_structure()
            health_json_dict = self._fetch_health_thresholds()
            self._detect_ees_and_agents()
            self._build_ion_obj(json_dict)
            self._build_health_obj(health_json_dict)

        elif self._mode == MLOpsMode.ATTACH:
            # In attach mode, we connect either to the ZK or to the server directly

            if self._ci.zk_host:
                self._detect_mlops_server_via_zk()
            self._validate_config()
            self._info("In pm mode - will try to connect to server")

            ion_json_dict = self._detect_ion_structure()
            health_json_dict = self._fetch_health_thresholds()
            self._detect_ees_and_agents()
            self._build_ion_obj(ion_json_dict)
            self._build_health_obj(health_json_dict)

        elif self._mode == MLOpsMode.STAND_ALONE:
            # In stand alone mode, we do not have a valid ION structure

            self._logger.info("In stand-alone mode: ctx data will not be available")
            self._set_stand_alone_values()
        else:
            raise MLOpsException("Unsupported operation mode: {}".format(self._mode))
Пример #4
0
def test_convert_models_json_dict_to_dataframe():
    rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)
    ion = ION()
    ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"
    mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

    df = mh.convert_models_json_dict_to_dataframe(models_list_json_dict)
    assert len(df) == 2
    rh.done()
Пример #5
0
def test_publish_model():
    expected_models_list_json_dict = [
        {
            models.json_fields.MODEL_ID_FIELD: '',
            models.json_fields.MODEL_NAME_FIELD: 'my model name',
            models.json_fields.MODEL_FORMAT_FIELD: 'Text',
            models.json_fields.MODEL_VERSION_FIELD: '',
            models.json_fields.MODEL_DESCRIPTION_FIELD: 'test model',
            models.json_fields.MODEL_TRAIN_VERSION_FIELD: '',
            models.json_fields.MODEL_SIZE_FIELD: 10,
            models.json_fields.MODEL_OWNER_FIELD: '',
            models.json_fields.MODEL_CREATED_ON_FIELD: None,
            models.json_fields.MODEL_FLAG_VALUES_FIELD: [],
            models.json_fields.MODEL_ANNOTATIONS_FIELD: {"custom_data": "my content"},
            models.json_fields.MODEL_ACTIVE_FIELD: False
        }
    ]

    rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.STAND_ALONE)
    ion = ION()
    mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

    model_data = "MODEL_DATA"
    model = mh.create_model(name="my model name", model_format=ModelFormat.TEXT, description="test model")
    model.set_annotations({"custom_data": "my content"})

    model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4()))
    f = open(model_file, 'w')
    f.write(model_data)
    f.close()
    model.set_model_path(model_file)

    my_id = mh.publish_model(model, None)
    os.remove(model_file)
    assert my_id == model.get_id()
    expected_models_list_json_dict[0][models.json_fields.MODEL_ID_FIELD] = my_id

    ret_data = mh.download_model(my_id)
    assert ret_data == model_data

    result_model_list = mh.fetch_all_models_json_dict()

    actual_json_dumps = json.dumps(result_model_list, sort_keys=True, indent=2)
    local_json_dump = json.dumps(expected_models_list_json_dict, sort_keys=True, indent=2)
    print("Expected_Dumps: {}".format(local_json_dump))
    print("Actual_Dumps: {}".format(actual_json_dumps))

    assert expected_models_list_json_dict == result_model_list

    with pytest.raises(MLOpsException):
        mh.publish_model("Not a model", None)
    rh.done()
Пример #6
0
def test_model_list_dict_from_json():
    with requests_mock.mock() as m:
        m.get('http://localhost:3456/v1/models', json=models_list_json_dict)

        rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)
        ion = ION()
        ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"
        mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

        result_model_list = mh.fetch_all_models_json_dict()
        print("Type is: {}".format(type(result_model_list)))
        print("result_model_list: {}".format(result_model_list))
        json_str_orig = json.dumps(models_list_json_dict, sort_keys=True, indent=2)
        json_str_got = json.dumps(result_model_list, sort_keys=True, indent=2)
        assert json_str_orig == json_str_got
        rh.done()
Пример #7
0
    def attach(self,
               mlapp_id,
               mlops_server=Constants.MLOPS_DEFAULT_HOST,
               mlops_port=Constants.MLOPS_DEFAULT_PORT,
               user=Constants.MLOPS_DEFAULT_USER,
               password=None):
        """
        Attach to a running MLApp and run in its context.
        Side effect: sets up mlops_context
        :param mlapp_id: the id of the MLApp to connect to
        :param mlops_server: the host to connect to
        :param mlops_port: the port MLOps is using
        :param user: user name to use for connection
        :param password: password to use for authentication
        :return:
    `
        Note: Attach only works for pure python code
        """
        self._logger.info(
            "Connecting to mlops: {} {}: {} user: {} pass: {}".format(
                mlops_server, Constants.ION_LITERAL, mlapp_id, user, password))

        # Connecting directly the server
        rest_helper = MlOpsRestFactory().get_rest_helper(
            MLOpsMode.ATTACH, mlops_server, mlops_port, None)
        token = rest_helper.login(user, password)

        # Setting the environment for mlops
        ci = ConfigInfo()
        ci.token = token
        ci.zk_host = None
        ci.mlops_port = str(mlops_port)  # Constants.MLOPS_DEFAULT_PORT
        ci.mlops_server = mlops_server
        ci.ion_id = mlapp_id
        ci.mlops_mode = MLOpsMode.ATTACH
        ci.output_channel_type = OutputChannel.PYTHON

        # TODO: for now assume node "0" - allow providing the node id or just become any node
        ci.ion_node_id = "0"
        ci.pipeline_id = "0"

        self._logger.info("MLOps configuration:\n{}".format(ci))
        ci.set_env()

        # calling init
        self.init(ctx=None, mlops_mode=MLOpsMode.ATTACH)
Пример #8
0
def test_all_alerts():
    mlops_ctx = build_ion_ctx()

    with requests_mock.mock() as m:
        m.get('http://localhost:3456/events', json=alerts_list)

        rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)
        event_helper = EventBroker(mlops_ctx, None)

        ef = EventFilter()

        alerts = event_helper.get_events(ef)

        assert len(alerts) == 2
        alert_df = alerts[alerts.id == AlertsInfo.ALERT_0_ID]
        assert alert_df.iloc[0]["node"] == ION1.NODE_0_ID
        rest_helper.done()
Пример #9
0
def test_get_models_with_filter():
    with requests_mock.mock() as m:
        m.get('http://localhost:3456/v1/models', json=models_list_json_dict)

        rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)
        ion = ION()
        ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"
        mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

        mf = ModelFilter()
        mf.time_window_start = datetime.utcfromtimestamp(1518460571573 / 1000)
        mf.time_window_end = datetime.utcfromtimestamp(1518460577573 / 1000)

        filtered_models = mh.get_models_dataframe(model_filter=mf, download=False)
        assert len(filtered_models) == 1
        print(filtered_models[[models.json_fields.MODEL_NAME_FIELD, models.json_fields.MODEL_CREATED_ON_FIELD]])
        rh.done()
Пример #10
0
def test_get_models_with_filter_2():
    with requests_mock.mock() as m:
        m.get('http://localhost:3456/v1/models', json=models_list_json_dict)

        rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)
        ion = ION()
        ion.id = "13445bb4-535a-4d45-b2f2-77293026e3da"
        mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

        model_id_to_filter = '8c95deaf-87e4-4c21-bc92-e5b1a0454f9a'
        mf = ModelFilter()
        mf.id = model_id_to_filter

        filtered_models = mh.get_models_dataframe(model_filter=mf, download=False)
        print(filtered_models[[models.json_fields.MODEL_ID_FIELD, models.json_fields.MODEL_FORMAT_FIELD]])
        assert len(filtered_models) == 1
        assert filtered_models.iloc[0][models.json_fields.MODEL_FORMAT_FIELD] == 'TEXT'
        assert filtered_models.iloc[0][models.json_fields.MODEL_ID_FIELD] == model_id_to_filter
        rh.done()
Пример #11
0
def test_attach():

    mlapp_id = "144a045d-c927-4afb-a85c-5224bd68f1bb"

    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    token = ION1.TOKEN

    set_mlops_env(ion_id=ion_instance_id,
                  ion_node_id=ion_node_id,
                  token=token,
                  model_id=ION1.MODEL_ID)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT,
                                                     mlops_server="localhost",
                                                     mlops_port="3456",
                                                     token=token)

    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id),
              json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id),
              json=test_health_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID),
              json=test_model_stats)
        m.get(rest_helper.url_get_uuid("model"),
              json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"})
        m.post(rest_helper.url_login(), json={"token": token})

        pm.attach(mlapp_id=ION1.ION_INSTANCE_ID,
                  mlops_server="localhost",
                  mlops_port=3456,
                  password="******")
        mlapp_id_ret = pm.get_mlapp_id()
        assert (mlapp_id_ret == ION1.ION_ID)

        mlapp_policy_ret = pm.get_mlapp_policy()
        assert (str(mlapp_policy_ret) ==
                "Policy:\nhealthThreshold: 0.2\ncanaryThreshold: 0.5\n")
        pm.done()
Пример #12
0
def test_get_models_with_filter_3():
    with requests_mock.mock() as m:
        m.get('http://localhost:3456/models', json=models_list_json_dict)

        rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT)
        ion = ION()
        ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"
        mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

        mf = ModelFilter()
        mf.time_window_start = datetime.utcfromtimestamp(1518460571573 / 1000)
        mf.time_window_end = datetime.utcfromtimestamp(1518460577573 / 1000)
        mf.pipeline_instance_id = ['94bf382b-47d5-4b80-b76c-3bca862e6e23', 'asdf']

        filtered_models = mh.get_models_dataframe(model_filter=mf, download=False)
        assert len(filtered_models) == 1
        print(filtered_models[["name", "createdTimestamp", "pipelineInstanceId"]])
        # No model found
        mf.id = "111111111111111"
        filtered_models = mh.get_models_dataframe(model_filter=mf, download=False)
        assert len(filtered_models) == 0
        rh.done()
Пример #13
0
def test_publish_model():
    rh = MlOpsRestFactory().get_rest_helper(MLOpsMode.STAND_ALONE)
    ion = ION()
    ion.id = "bdc2ee10-767c-4524-ba72-8268a3894bff"
    local_models_list_json_dict[0]["workflowRunId"] = ion.id
    mh = ModelHelper(rest_helper=rh, ion=ion, stats_helper=None)

    model_data = "MODEL_DATA"
    model = mh.create_model(name="my model", model_format=ModelFormat.TEXT, description="test model",
                  user_defined="whatever I want goes here")

    model_file = os.path.join(os.path.sep, "tmp", str(uuid.uuid4()))
    f = open(model_file, 'w')
    f.write(model_data)
    f.close()
    model.set_model_path(model_file)

    my_id = mh.publish_model(model, None)
    os.remove(model_file)
    assert my_id == model.get_id()
    local_models_list_json_dict[0]["modelId"] = my_id

    ret_data = mh.download_model(my_id)
    assert ret_data == model_data

    result_model_list = mh.fetch_all_models_json_dict()

    actual_json_dumps = json.dumps(result_model_list, sort_keys=True, indent=2)
    local_json_dump = json.dumps(local_models_list_json_dict, sort_keys=True, indent=2)
    print("Expected_Dumps: {}".format(local_json_dump))
    print("Actual_Dumps: {}".format(actual_json_dumps))

    assert local_models_list_json_dict == result_model_list

    with pytest.raises(MLOpsException):
        mh.publish_model("Not a model", None)
    rh.done()
Пример #14
0
def build_ion_ctx():
    ion_instance_id = ION1.ION_ID
    ion_node_id = ION1.NODE_1_ID
    token = "token_token_token"

    set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, token=token)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost", mlops_port="3456",
                                                     token=token)

    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info)

        ci = ConfigInfo().read_from_env()
        mlops_ctx = MLOpsCtx(config=ci, mode=MLOpsMode.ATTACH)
        return mlops_ctx
Пример #15
0
def test_feature_importance():
    num_significant_features = 6
    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    pipeline_instance_id = ION1.PIPELINE_INST_ID_1
    set_mlops_env(ion_id=ion_instance_id, ion_node_id=ion_node_id, model_id=ION1.MODEL_ID)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT, mlops_server="localhost",
                                                     mlops_port="3456", token="")
    rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)
    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id), json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id), json=test_health_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID), json=test_model_stats)
        m.get(rest_helper.url_get_uuid("model"), json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"})
        m.post(rest_helper.url_post_stat(pipeline_instance_id), json={})

        # Test Python channel
        mlops.init(ctx=None, mlops_mode=MLOpsMode.AGENT)
        published_model = mlops.Model(name="dtr_mlops_model",
                                      model_format=ModelFormat.SPARKML,
                                      description="model of decision tree regression with explainability")
        published_model.feature_importance(model=FinalModel, feature_names=FinalModel.feature_names,
                                           num_significant_features=num_significant_features)
        mlops.done()
Пример #16
0
class MLHealth(object):
    """
    This class provides APIs to access health metrics created for a workflow instance.
    It includes stats and models.
    """
    def __init__(self):
        self._sc = None
        self._eco_server = None
        self._eco_port = None
        self._db_host = None
        self._db_port = None
        self._mode = None
        self._wf_id = None
        self._agent_list = []
        self._jvm_mlops = None
        self._rest_helper = None
        self._zk_host = None
        self._token = None

    @staticmethod
    def _search_list_dict(kv, key, value):
        for x in kv:
            if x[key] == value:
                return x
        return None

    def init(self,
             sc=None,
             wf_id=None,
             eco_server=None,
             eco_port=None,
             db_host=None,
             db_port=None,
             zk_host=None,
             token=None,
             mode=MLOpsMode.PYSPARK):
        """
        Perform initialization of the health library. eco and db configuration can be
        set up using environment variables and hence are optional. This is true for the workflow
        instance id as well. Currently, python and pyspark mode of operation are supported.

        :param sc: optional spark context for pyspark jobs
        :param wf_id: workflow instance id
        :param eco_server: eco server host
        :param eco_port: eco server port
        :param db_host: stats db host
        :param db_port: stats db port
        :param zk_host: zookeeper host port string
        :param token: authentication token
        :param mode: python or pyspark
        :return:
        """
        self._sc = sc

        no_zk = False
        if zk_host is None:
            if os.environ.get(Constants.MLOPS_ZK_HOST) is not None:
                self._zk_host = os.environ[Constants.MLOPS_ZK_HOST]
            else:
                no_zk = True
        else:
            self._zk_host = zk_host

        if token is None:
            if os.environ.get(Constants.MLOPS_TOKEN) is not None:
                self._token = os.environ[Constants.MLOPS_TOKEN]
            else:
                raise MLOpsException("Internal Error: No auth token provided")
        else:
            self._token = token

        if no_zk is False:
            # initialize zk connections and get active eco server
            try:
                zk = KazooClient(hosts=self._zk_host, read_only=True)
                zk.start()
                if zk.exists('/ECO/curator/activeHostPort'):
                    data, stat = zk.get("/ECO/curator/activeHostPort")
                    eco_host_port = data.decode("utf-8").split(':')

                    if len(eco_host_port) is 2:
                        self._eco_server = eco_host_port[0]
                        self._eco_port = eco_host_port[1]
                    else:
                        raise MLOpsException(
                            "Internal Error: Invalid zookeeper active server "
                            "entry")
                else:
                    raise MLOpsException(
                        "Unable to connect to the active MLOps server")
                zk.stop()
            except Exception:
                raise MLOpsException("Unable to locate active MLOps server")

        # if eco server was found using zookeeper, then don't use the environment variable
        if self._eco_server is None:
            if eco_server is None:
                if os.environ.get(Constants.MLOPS_ECO_HOST) is not None:
                    self._eco_server = os.environ[Constants.MLOPS_ECO_HOST]
                else:
                    raise MLOpsException("MLOps server host not provided")
            else:
                self._eco_server = eco_server

        if self._eco_port is None:
            if eco_port is None:
                if os.environ.get(Constants.MLOPS_ECO_PORT) is not None:
                    self._eco_port = os.environ[Constants.MLOPS_ECO_PORT]
                else:
                    raise MLOpsException("MLOps server port not provided")
            else:
                self._eco_port = eco_port

        if db_host is None:
            if os.environ.get(Constants.MLOPS_TIMESERIES_DB_HOST) is not None:
                self._db_host = os.environ[Constants.MLOPS_TIMESERIES_DB_HOST]
            else:
                raise MLOpsException("Database server host not provided")
        else:
            self._db_host = db_host

        if db_port is None:
            if os.environ.get(Constants.MLOPS_TIMESERIES_DB_PORT) is not None:
                self._db_port = os.environ[Constants.MLOPS_TIMESERIES_DB_PORT]
            else:
                raise MLOpsException("Database server port not provided")
        else:
            self._db_port = db_port

        if wf_id is None:
            if os.environ.get(Constants.MLOPS_HEALTH_WF_ID) is not None:
                self._wf_id = os.environ[Constants.MLOPS_HEALTH_WF_ID]
            else:
                raise MLOpsException("{} instance id not provided".format(
                    Constants.ION_LITERAL))
        else:
            self._wf_id = wf_id

        self._mode = mode

        self._rest_helper = MlOpsRestFactory().get_rest_helper(
            MLOpsMode.STAND_ALONE)
        self._rest_helper.init(self._eco_server, self._eco_port, self._token)

        if sc is not None:
            try:
                import pyspark
                import pyspark.mllib.common as ml
                from pyspark.sql import SQLContext
                import pyspark.sql.types

                if not isinstance(sc, pyspark.context.SparkContext):
                    raise MLOpsException("sc argument is not pyspark context")

                # initialize jvm to mlops
                self._jvm_mlops = sc._jvm.org.mlpiper.mlops.MLOps
                ping_val = 5
                ping_ret = self._jvm_mlops.ping(ping_val)
                if ping_ret != 5:
                    raise MLOpsException(
                        "Got unexpected value from MLOps.ping sent {} got {} ".
                        format(ping_val, ping_ret))
            except Exception:
                err = "Unable to access MLOps objects within the health program"
                self._jvm_mlops = None
                raise MLOpsException(err)

        groups = self._rest_helper.get_groups()
        agents = self._rest_helper.get_agents()

        # based on ion / workflow run id, get ion description
        wf_instances = self._rest_helper.get_workflow_instances()

        wfi = MLHealth._search_list_dict(wf_instances, 'id', self._wf_id)
        if wfi is None:
            raise MLOpsException("Could not locate {} instance {}".format(
                Constants.ION_LITERAL, self._wf_id))

        # get agents/groups
        node_info = wfi['pipelineInstanceIdToWfNode']

        for id in node_info:
            kv = node_info[id]
            agents_in_group = MLHealth._search_list_dict(
                groups, 'id', kv['groupId'])['agents']
            if agents_in_group is None:
                raise MLOpsException("Could not locate group {}".format(
                    kv['groupId']))

            ptype = kv['pipelineType']

            agent_addrs = []

            for aig in agents_in_group:
                agent_addr = MLHealth._search_list_dict(agents, 'id', aig)
                if agent_addr is None:
                    raise MLOpsException(
                        "Could not locate agent {} in group description".
                        format(aig))

                agent_addrs.append(agent_addr['address'])
            self._agent_list.append((id, ptype, agent_addrs))

    def _get_sparksql_context(self):
        from pyspark.sql import SQLContext
        return SQLContext.getOrCreate(self._sc)

    def _convert_pd_to_df(self, pd_df):
        sql_context = self._get_sparksql_context()
        return sql_context.createDataFrame(pd_df)

    def _get_model_pdf(self):
        models = self._rest_helper.get_model_list()
        return pd.read_json(json.dumps(models))

    def _get_model_python(self, start_time, end_time):
        mdf = self._get_model_pdf()

        newdf = mdf[(mdf['workflowRunId'] == self._wf_id)
                    & (mdf['createdTimestamp'] >= start_time) &
                    (mdf['createdTimestamp'] <= end_time)]
        if newdf.shape[0] == 0:
            raise MLOpsException(
                "No models found in time range {}:{} for instance {}".format(
                    start_time, end_time, self._wf_id))

        output_df = newdf[['createdTimestamp', 'name', 'id']]

        vals = newdf['id'].values
        models = self._rest_helper.get_model_by_id(vals)

        output_df = output_df.assign(model=models)
        return output_df

    def _get_model_pyspark(self, start_time, end_time):
        df = self._get_model_python(start_time, end_time)
        sdf = self._convert_pd_to_df(df)

        return sdf

    def get_model(self, start_time, end_time):
        """
        Retrieve models from eco server based on start and end times. Use the workflow
        instance id to retrieve only models that are specific to this job.

        :param start_time: start time in milliseconds
        :param end_time: end time in milliseconds
        :return: spark or pandas dataframe based on mode with the models as a byte array
        """
        if self._mode == MLOpsMode.PYSPARK:
            return self._get_model_pyspark(start_time, end_time)
        elif self._mode == MLOpsMode.PYTHON:
            return self._get_model_python(start_time, end_time)
        else:
            raise MLOpsException("Invalid mode: [{}]".format(self._mode))
Пример #17
0
    def init(self,
             sc=None,
             wf_id=None,
             eco_server=None,
             eco_port=None,
             db_host=None,
             db_port=None,
             zk_host=None,
             token=None,
             mode=MLOpsMode.PYSPARK):
        """
        Perform initialization of the health library. eco and db configuration can be
        set up using environment variables and hence are optional. This is true for the workflow
        instance id as well. Currently, python and pyspark mode of operation are supported.

        :param sc: optional spark context for pyspark jobs
        :param wf_id: workflow instance id
        :param eco_server: eco server host
        :param eco_port: eco server port
        :param db_host: stats db host
        :param db_port: stats db port
        :param zk_host: zookeeper host port string
        :param token: authentication token
        :param mode: python or pyspark
        :return:
        """
        self._sc = sc

        no_zk = False
        if zk_host is None:
            if os.environ.get(Constants.MLOPS_ZK_HOST) is not None:
                self._zk_host = os.environ[Constants.MLOPS_ZK_HOST]
            else:
                no_zk = True
        else:
            self._zk_host = zk_host

        if token is None:
            if os.environ.get(Constants.MLOPS_TOKEN) is not None:
                self._token = os.environ[Constants.MLOPS_TOKEN]
            else:
                raise MLOpsException("Internal Error: No auth token provided")
        else:
            self._token = token

        if no_zk is False:
            # initialize zk connections and get active eco server
            try:
                zk = KazooClient(hosts=self._zk_host, read_only=True)
                zk.start()
                if zk.exists('/ECO/curator/activeHostPort'):
                    data, stat = zk.get("/ECO/curator/activeHostPort")
                    eco_host_port = data.decode("utf-8").split(':')

                    if len(eco_host_port) is 2:
                        self._eco_server = eco_host_port[0]
                        self._eco_port = eco_host_port[1]
                    else:
                        raise MLOpsException(
                            "Internal Error: Invalid zookeeper active server "
                            "entry")
                else:
                    raise MLOpsException(
                        "Unable to connect to the active MLOps server")
                zk.stop()
            except Exception:
                raise MLOpsException("Unable to locate active MLOps server")

        # if eco server was found using zookeeper, then don't use the environment variable
        if self._eco_server is None:
            if eco_server is None:
                if os.environ.get(Constants.MLOPS_ECO_HOST) is not None:
                    self._eco_server = os.environ[Constants.MLOPS_ECO_HOST]
                else:
                    raise MLOpsException("MLOps server host not provided")
            else:
                self._eco_server = eco_server

        if self._eco_port is None:
            if eco_port is None:
                if os.environ.get(Constants.MLOPS_ECO_PORT) is not None:
                    self._eco_port = os.environ[Constants.MLOPS_ECO_PORT]
                else:
                    raise MLOpsException("MLOps server port not provided")
            else:
                self._eco_port = eco_port

        if db_host is None:
            if os.environ.get(Constants.MLOPS_TIMESERIES_DB_HOST) is not None:
                self._db_host = os.environ[Constants.MLOPS_TIMESERIES_DB_HOST]
            else:
                raise MLOpsException("Database server host not provided")
        else:
            self._db_host = db_host

        if db_port is None:
            if os.environ.get(Constants.MLOPS_TIMESERIES_DB_PORT) is not None:
                self._db_port = os.environ[Constants.MLOPS_TIMESERIES_DB_PORT]
            else:
                raise MLOpsException("Database server port not provided")
        else:
            self._db_port = db_port

        if wf_id is None:
            if os.environ.get(Constants.MLOPS_HEALTH_WF_ID) is not None:
                self._wf_id = os.environ[Constants.MLOPS_HEALTH_WF_ID]
            else:
                raise MLOpsException("{} instance id not provided".format(
                    Constants.ION_LITERAL))
        else:
            self._wf_id = wf_id

        self._mode = mode

        self._rest_helper = MlOpsRestFactory().get_rest_helper(
            MLOpsMode.STAND_ALONE)
        self._rest_helper.init(self._eco_server, self._eco_port, self._token)

        if sc is not None:
            try:
                import pyspark
                import pyspark.mllib.common as ml
                from pyspark.sql import SQLContext
                import pyspark.sql.types

                if not isinstance(sc, pyspark.context.SparkContext):
                    raise MLOpsException("sc argument is not pyspark context")

                # initialize jvm to mlops
                self._jvm_mlops = sc._jvm.org.mlpiper.mlops.MLOps
                ping_val = 5
                ping_ret = self._jvm_mlops.ping(ping_val)
                if ping_ret != 5:
                    raise MLOpsException(
                        "Got unexpected value from MLOps.ping sent {} got {} ".
                        format(ping_val, ping_ret))
            except Exception:
                err = "Unable to access MLOps objects within the health program"
                self._jvm_mlops = None
                raise MLOpsException(err)

        groups = self._rest_helper.get_groups()
        agents = self._rest_helper.get_agents()

        # based on ion / workflow run id, get ion description
        wf_instances = self._rest_helper.get_workflow_instances()

        wfi = MLHealth._search_list_dict(wf_instances, 'id', self._wf_id)
        if wfi is None:
            raise MLOpsException("Could not locate {} instance {}".format(
                Constants.ION_LITERAL, self._wf_id))

        # get agents/groups
        node_info = wfi['pipelineInstanceIdToWfNode']

        for id in node_info:
            kv = node_info[id]
            agents_in_group = MLHealth._search_list_dict(
                groups, 'id', kv['groupId'])['agents']
            if agents_in_group is None:
                raise MLOpsException("Could not locate group {}".format(
                    kv['groupId']))

            ptype = kv['pipelineType']

            agent_addrs = []

            for aig in agents_in_group:
                agent_addr = MLHealth._search_list_dict(agents, 'id', aig)
                if agent_addr is None:
                    raise MLOpsException(
                        "Could not locate agent {} in group description".
                        format(aig))

                agent_addrs.append(agent_addr['address'])
            self._agent_list.append((id, ptype, agent_addrs))
Пример #18
0
def test_mlops_structure_api():
    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    token = ION1.TOKEN

    set_mlops_env(ion_id=ion_instance_id,
                  ion_node_id=ion_node_id,
                  token=token,
                  model_id=ION1.MODEL_ID)
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT,
                                                     mlops_server="localhost",
                                                     mlops_port="3456",
                                                     token=token)

    rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)
    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id),
              json=test_workflow_instances)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id),
              json=test_health_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID),
              json=test_model_stats)
        m.get(rest_helper.url_get_uuid("model"),
              json={"id": "model_5906255e-0a3d-4fef-8653-8d41911264fb"})

        pm.init(ctx=None, mlops_mode=MLOpsMode.AGENT)
        assert pm.get_mlapp_id() == ION1.ION_ID
        assert pm.get_mlapp_name() == ION1.ION_NAME

        curr_node = pm.get_current_node()
        assert curr_node.id == ion_node_id

        nodes = pm.get_nodes()
        assert len(nodes) == 2

        node0 = pm.get_node('1')
        assert node0 is not None
        assert node0.pipeline_pattern_id == ION1.PIPELINE_PATTERN_ID_1
        assert node0.pipeline_instance_id == ION1.PIPELINE_INST_ID_1

        node0_agents = pm.get_agents('1')
        assert len(node0_agents) == 1
        assert node0_agents[0].id == ION1.AGENT_ID_0
        assert node0_agents[0].hostname == 'localhost'

        agent = pm.get_agent('1', ION1.AGENT_ID_0)
        assert agent.id == ION1.AGENT_ID_0
        assert agent.hostname == 'localhost'

        model = pm.current_model()
        assert model is not None
        assert model.metadata.modelId == ION1.MODEL_ID

        pm.done()
Пример #19
0
def test_suppress_connection_errors():
    import requests
    from parallelm.mlops.events.event import Event
    from parallelm.mlops.mlops_env_constants import MLOpsEnvConstants

    ion_instance_id = ION1.ION_INSTANCE_ID
    ion_node_id = ION1.NODE_1_ID
    token = ION1.TOKEN
    pipeline_instance_id = ION1.PIPELINE_INST_ID_1

    set_mlops_env(ion_id=ion_instance_id,
                  ion_node_id=ion_node_id,
                  token=token,
                  model_id=ION1.MODEL_ID)

    os.environ[MLOpsEnvConstants.MLOPS_AGENT_PUBLIC_ADDRESS] = "placeholder"
    rest_helper = MlOpsRestFactory().get_rest_helper(MLOpsMode.AGENT,
                                                     mlops_server="localhost",
                                                     mlops_port="3456",
                                                     token=token)

    rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)

    with requests_mock.mock() as m:
        m.get(rest_helper.url_get_workflow_instance(ion_instance_id),
              json=test_workflow_instances)
        m.get(rest_helper.url_get_health_thresholds(ion_instance_id),
              json=test_health_info)
        m.get(rest_helper.url_get_ees(), json=test_ee_info)
        m.get(rest_helper.url_get_agents(), json=test_agents_info)
        m.get(rest_helper.url_get_model_list(), json=test_models_info)
        m.get(rest_helper.url_get_model_stats(ION1.MODEL_ID),
              json=test_model_stats)

        m.post(rest_helper.url_post_event(pipeline_instance_id),
               exc=requests.exceptions.ConnectionError)
        m.post(rest_helper.url_post_stat(pipeline_instance_id),
               exc=requests.exceptions.ConnectionError)

        pm.init(ctx=None, mlops_mode=MLOpsMode.AGENT)

        event_obj = Event(label="event_name",
                          event_type=EventType.System,
                          description=None,
                          data="123",
                          is_alert=False,
                          timestamp=None)

        with pytest.raises(MLOpsConnectionException):
            pm.set_event(name="event_name", data="123", type=EventType.System)

        with pytest.raises(MLOpsConnectionException):
            pm.event(event_obj)

        with pytest.raises(MLOpsConnectionException):
            pm.set_stat("stat_same", 3)

        pm.suppress_connection_errors(True)
        pm.set_event(name="event_name", data="123", type=EventType.System)
        pm.event(event_obj)
        pm.set_stat("stat_same", 3)
        pm.suppress_connection_errors(False)

        pm.done()
Пример #20
0
class MLOpsCtx(BaseObj):
    """
    Provide context information for MLOps library.
    This is an internal class which should not be exposed to users of MLOps.
    The object contains the structure of the ION and groups and such.
    """

    def __init__(self, config, mode=None):
        """
        Perform initialization of the MLOpsCtx.
        It expects configuration from the environment to arrive in the standard usage mode.
        :param config: :class:`ConfigInfo` for this MLOps instantiation
        :param mode: python or pyspark
        :return:
        : raises MLOpsException for invalid configurations
        """
        super(MLOpsCtx, self).__init__(__name__)

        self._info("MLOpsCtx __init__ called")
        self._info("Config\n{}".format(config))
        self._agent_list = []
        self._rest_helper = None

        self._ci = config
        self._mode = mode

        self._ion = None  # Will contain an ION class corresponding to the active ION which this mlops is part of
        self._ees_dict = {}  # Will contain all ees defined with the agent inside
        self._agents_dict = {}
        self._rest_helper = MlOpsRestFactory().get_rest_helper(self._mode, self._ci.mlops_server, self._ci.mlops_port, self._ci.token)

        if self._mode == MLOpsMode.AGENT or self._mode == MLOpsMode.REST_ACCUMULATOR:
            # In agent mode, we talk with the agent and use the mlops prefix to the http requests
            self._validate_config()
            self._info("Agent mode")

            self._rest_helper.set_prefix(Constants.URL_MLOPS_PREFIX)

            json_dict = self._detect_ion_structure()
            health_json_dict = self._fetch_health_thresholds()
            self._detect_ees_and_agents()
            self._build_ion_obj(json_dict)
            self._build_health_obj(health_json_dict)

        elif self._mode == MLOpsMode.ATTACH:
            # In attach mode, we connect either to the ZK or to the server directly

            if self._ci.zk_host:
                self._detect_mlops_server_via_zk()
            self._validate_config()
            self._info("In pm mode - will try to connect to server")

            ion_json_dict = self._detect_ion_structure()
            health_json_dict = self._fetch_health_thresholds()
            self._detect_ees_and_agents()
            self._build_ion_obj(ion_json_dict)
            self._build_health_obj(health_json_dict)

        elif self._mode == MLOpsMode.STAND_ALONE:
            # In stand alone mode, we do not have a valid ION structure

            self._logger.info("In stand-alone mode: ctx data will not be available")
            self._set_stand_alone_values()
        else:
            raise MLOpsException("Unsupported operation mode: {}".format(self._mode))

    def _fetch_health_thresholds(self):
        return self._rest_helper.get_health_thresholds(self._ci.ion_id)

    def _set_stand_alone_values(self):
        self._ion = ION()
        self._ion.id = 1
        self._ion.name = "ION_1"

    def _validate_config(self):
        """
        Validate that all config information is present
        :return:
        :raises MLOpsException for invalid configurations
        """
        if self._ci.token is None:
            raise MLOpsException("Internal Error: No auth token provided")

        if self._ci.mlops_server is None or self._ci.mlops_port is None:
            raise MLOpsException("MLOps server host or port were not provided")

        if self._ci.ion_id is None:
            MLOpsException("{} instance id not provided".format(Constants.ION_LITERAL))

    def _detect_mlops_server_via_zk(self):
        """
        Detect the active mlops server via the ZK
        :return:
        """
        zk = None
        try:
            zk = KazooClient(hosts=self._ci.zk_host, read_only=True)
            zk.start()
            if zk.exists(Constants.MLOPS_ZK_ACTIVE_HOST_PORT):
                data, stat = zk.get(Constants.MLOPS_ZK_ACTIVE_HOST_PORT)
                eco_host_port = data.decode("utf-8").split(':')

                if len(eco_host_port) is 2:
                    self._ci.mlops_server = eco_host_port[0]
                    self._ci.mlops_port = eco_host_port[1]
                else:
                    raise MLOpsException("Internal Error: Invalid zookeeper active server entry, host_port: {}"
                                         .format(eco_host_port))
            else:
                raise MLOpsException("Unable to connect to the active MLOps server, zk_host: {}"
                                     .format(self._ci.zk_host))
        except Exception as e:
            raise MLOpsException("{}, zk_host: {}".format(e, self._ci.zk_host))
        finally:
            if zk:
                zk.stop()

    @staticmethod
    def _search_list_dict(kv, key, value):
        for x in kv:
            if x[key] == value:
                return x
        return None

    def _detect_ion_structure(self):
        """
        Detect the current ion structure (pipeline, groups, agents, etc.)
        :return:
        :raises MLOpsException if ION or other structures are not found
        """
        self._info("Detecting {} structure".format(Constants.ION_LITERAL))

        # This is the max number of retries to wait until the ION is running.
        # The pipelineInstance part of the workflow description does not appear until the ION
        # switches to RUNNING state. For this reason, the code loop until the pipelineInstances part
        # appears in the JSON.
        max_tries = Constants.WAIT_FOR_PIPELINE_INSTANCE_TO_APPEAR_TIMEOUT

        wf_instance = {}
        found = False
        for idx in range(0, max_tries):

            wf_instance = self._rest_helper.get_workflow_instance(self._ci.ion_id)
            if wf_instance is None:
                raise MLOpsException("Could not locate {} instance {}".format(
                    Constants.ION_LITERAL, self._ci.ion_id))

            self._debug("{} status: {}".format(Constants.ION_LITERAL, wf_instance['status']))

            if IONJsonConstants.PIPELINE_INSTANCES_SECTION in wf_instance:
                found = True
                break
            self._info("Could not find {} in workflow json - try {}".format(
                IONJsonConstants.PIPELINE_INSTANCES_SECTION, idx))
            time.sleep(1)

        if found is False:
            raise MLOpsException("Could not find {} section in workflow information".format(
                IONJsonConstants.PIPELINE_INSTANCES_SECTION))

        ion_json_dict = wf_instance
        self._debug("workflow: {}".format(ion_json_dict))
        return ion_json_dict

    def _detect_ees_and_agents(self):
        ees_json_dict = self._rest_helper.get_ees()
        agents_json_dict = self._rest_helper.get_agents()

        self._debug("Agents JSON:\n{}\n\n".format(agents_json_dict))
        self._debug("EEs JSON:\n{}\n\n".format(ees_json_dict))

        # Generating a dict of all agents by ID

        for agent_json in agents_json_dict:
            agent_obj = Agent()
            agent_obj.id = str(agent_json["id"])
            agent_obj.hostname = str(agent_json["address"])
            self._agents_dict[agent_obj.id] = agent_obj

        for ee_json in ees_json_dict:
            ee = EE()
            ee.name = str(ee_json["name"])
            ee.id = str(ee_json["id"])
            ee.agent_id = str(ee_json["agentId"])

            # get agent object we created above in the agent_dict
            if ee.agent_id not in self._agents_dict:
                raise MLOpsException("EE {} contains Agent {} which is not in global agent list".format(
                    ee.name, ee.agent_id))

            agent_obj = self._agents_dict[ee.agent_id]
            ee.agents.append(agent_obj)
            ee.agent_by_id[ee.agent_id] = agent_obj
            ee.agent_by_hostname[agent_obj.hostname] = agent_obj

            self._ees_dict[ee.id] = ee
            self._logger.info("EE:\n{}".format(ee))

    def _build_ion_obj(self, ion_json_dict):
        ion_builder = IONBuilder()
        self._ion = ion_builder.build_from_dict(ion_json_dict)
        # This info line is important so we can understand what was the ion structure if errors happens at customer site
        self._info("{}:\n{}".format(Constants.ION_LITERAL, self._ion))
        self._info("---------------------")

    def _build_health_obj(self, health_json_dict):
        self._ion.policy.set_thresholds(health_json_dict[IONJsonConstants.ION_GLOBAL_THRESHOLD_TAG], health_json_dict[IONJsonConstants.ION_CANARY_THRESHOLD_TAG])

    def rest_helper(self):
        return self._rest_helper

    def ion(self):
        """
        Return a copy of the ion object
        :return: ION object
        :rtype: ION
        """
        return copy.deepcopy(self._ion)

    def ion_id(self):
        """
        Return the ION id
        :return:
        """
        return self._ion.id

    def ion_node_id(self):
        """
        Return the current node id this code is running in
        :return: ION node id
        """
        return self._ci.ion_node_id

    def current_node(self):
        if self._ci.ion_node_id not in self._ion.node_by_id:
            raise MLOpsException("Current node id: [{}] is not detected in {}".format(
                self._ci.ion_node_id, Constants.ION_LITERAL))
        return self._ion.node_by_id[self._ci.ion_node_id]

    def ion_name(self):
        """
        Return the ION name
        :return:
        """
        return self._ion.name

    def ion_policy(self):
        """
        Return the ION policy
        :return:
        """
        return self._ion.policy

    def ion_nodes(self):
        """
        Return a list of ION components
        :return:
        """
        return copy.deepcopy(self._ion.nodes)

    def get_ion_node(self, name):
        """
        Return a component object given its name
        :param name:
        :return: Component object matching name (if found), None if not found
        """
        self._debug("Getting {} component [{}]".format(Constants.ION_LITERAL, name))
        self._debug("{} comps by name: {}".format(Constants.ION_LITERAL, self._ion.node_by_name))
        if name in self._ion.node_by_name:
            return self._ion.node_by_name[name]
        return None

    def get_ion_node_by_pipeline_instance(self, pipe_instance_id):
        """
        Return a list of ion_nodes for the given pipeline instance.
        :param pipe_instance_id: id of the pipeline
        :return: ion_nodes or None if not found
        """
        if pipe_instance_id in self._ion.node_by_pipe_inst_id:
            return self._ion.node_by_pipe_inst_id[pipe_instance_id]
        return None

    def get_ion_node_agents(self, node):
        """
        Return a list of agents for the given ion_node
        :param node: a node within the ION
        :return: List of agents for the given ion_node
        :raises MLOpsException for invalid arguments
        """

        # Getting by component name
        if isinstance(node, six.string_types):
            if node not in self._ion.node_by_name:
                raise MLOpsException("Node {} is not part of current {}".format(
                    node, Constants.ION_LITERAL))

            node_obj = self._ion.node_by_name[node]
            if node_obj.ee_id not in self._ees_dict:
                raise MLOpsException("Component {} had ee_id {} which is not part of valid ees".format(
                    node_obj.name, node_obj.ee_id))

            ee_obj = self._ees_dict[node_obj.ee_id]
            # Note: calling deepcopy in order for the user to get a copy of agents objects and not point to internal
            # data
            agent_list = copy.deepcopy(ee_obj.agents)
            return agent_list
        else:
            raise MLOpsException("component argument should be component name (string)")

    def get_agent_by_id(self, agent_id):
        """
        Return agent object by ID, assuming the agent is part of the current ION
        :param agent_id: Agent Id to search for
        :type agent_id: str
        :return: agent_id or None if not found
        """

        if agent_id in self._agents_dict:
            return self._agents_dict[agent_id]
        return None

    def done(self):
        if self._rest_helper is not None:
            self._rest_helper.done()