Пример #1
0
    def _extract_timeline_capture(self):
        """
        The method extracts bar graphs and events from the time capture files.

        :param self:
        :return:
        """
        try:
            self.untar_timeline_capture()
            extracted_dir = self._extracted_dir
        except Exception as err:
            raise MLOpsException(err)

        for file_name in self._file_names:
            if '-stats-' in file_name:
                self._extract_stats(file_name)
                if file_name in self._bar_df_file.keys():
                    self._name_stats_df(file_name,
                                        self._bar_df_file[file_name])
                if file_name in self._multilinegraph_df_file.keys():
                    self._name_stats_df(
                        file_name, self._multilinegraph_df_file[file_name])
                if file_name in self._matrix_df_file.keys():
                    self._name_stats_df(file_name,
                                        self._matrix_df_file[file_name])
                if file_name in self._multigraph_df_file.keys():
                    self._name_stats_df(file_name,
                                        self._multigraph_df_file[file_name])

            elif 'events' in file_name:
                self._events_df_file = pd.read_csv(extracted_dir + file_name,
                                                   na_filter=False)

            elif 'aggregate' in file_name:
                self._aggregate_df_file[file_name] = pd.read_csv(
                    extracted_dir + file_name, na_filter=False)

            elif 'sysstat' in file_name:
                self._sys_stat_df_file[file_name] = pd.read_csv(
                    extracted_dir + file_name, na_filter=False)

            elif 'ion-details' in file_name:
                print("parse ion-details file")
                self._parse_mlapp(extracted_dir + file_name)

            with open(self._extracted_dir + str(file_name), 'r') as f:
                self._files[file_name] = f.read()
        shutil.rmtree(self._tmpdir)
        return
Пример #2
0
    def login(self, user='******', auth='admin'):
        payload = {"username": user, "password": auth}
        url = build_url(self._mlops_server, self._mlops_port, self._prefix,
                        MLOpsRestHandles.LOGIN)

        try:
            headers = {"Content-Type": "application/json;charset=UTF-8"}
            r = requests.post(url, data=json.dumps(payload), headers=headers)
            if r.ok:
                token = r.json()
                self._token = token['token']
                return self._token
            else:
                raise MLOpsException(
                    'Call {} with payload {} failed text:[{}]'.format(
                        url, payload, r.text))
        except requests.exceptions.ConnectionError as e:
            self._error(e)
            raise MLOpsException(
                "Connection to MLOps server [{}:{}] refused".format(
                    self._mlops_server, self._mlops_port))
        except Exception as e:
            raise MLOpsException('Call ' + str(url) + ' failed with error ' +
                                 str(e))
Пример #3
0
def datetime_to_timestamp_milli(datetime_object):
    """
    Return a number representing the milliseconds since the epoch.
    :param datetime_object:  datetime object
    :type datetime_object: datetime
    :return: integer representing the milliseconds since the epoch
    :raises MLOpsException for invalid arguments
    """
    if isinstance(datetime_object, datetime):

        timestamp = (datetime_object - datetime(1970, 1, 1)).total_seconds() * 1000
        return int(timestamp)

    # Note: other objects/str support can be added here.
    raise MLOpsException("Time object is not datetime.datetime object")
Пример #4
0
    def _to_dict(self):
        if len(self._data) == 0:
            raise MLOpsException("There is no data in bar graph")

        if len(self._col_names) == 0:
            raise MLOpsException("No columns names were provided")

        dd = {}
        ll = []

        edge_values = []
        bin_values = self._data

        if self._is_data_continuous:
            # TODO: Combine all continuous data representation generation into single place so that it would be easy to change in future.

            for each_edge_index in range(0, len(self._col_names) - 1):
                edge_values.append(
                    str(self._col_names[each_edge_index]) + " to " +
                    str(self._col_names[each_edge_index + 1]))
        else:
            edge_values = self._col_names

        check_list_of_str(edge_values, error_prefix="Columns names")

        if len(edge_values) != len(bin_values):
            raise MLOpsException(
                "Number of data point does not match number of columns. "
                "edges values are: {} and bin values are {}".format(
                    edge_values, bin_values))

        for label, value in zip(edge_values, bin_values):
            ll.append({label: value})

        dd[self._name] = ll
        return dd
Пример #5
0
    def event(self, event_obj):
        """
        Generate an event which is sent to MLOps.

        :param event_obj: Object of type :class:`Event` (can be inheriting from Event)
        :return: The current mlops instance for further calls
        :raises: MLOpsException
        """
        self._verify_mlops_is_ready()

        if not isinstance(event_obj, Event):
            raise MLOpsException("Event object must be an instance of Event class")

        self._event_broker.send_event(event_obj)
        return self
Пример #6
0
    def get_model(self, start_time, end_time):
        """
        Retrieve models from eco server based on start and end times. Use the workflow
        instance id to retrieve only models that are specific to this job.

        :param start_time: start time in milliseconds
        :param end_time: end time in milliseconds
        :return: spark or pandas dataframe based on mode with the models as a byte array
        """
        if self._mode == MLOpsMode.PYSPARK:
            return self._get_model_pyspark(start_time, end_time)
        elif self._mode == MLOpsMode.PYTHON:
            return self._get_model_python(start_time, end_time)
        else:
            raise MLOpsException("Invalid mode: [{}]".format(self._mode))
Пример #7
0
    def verify_version_is_supported(self, version):
        if self._curr_version not in self._versions_info:
            raise MLOpsException(
                "Current version [{}] is not registered".format(
                    self._curr_version))

        # In case version is None then only verifying current version
        if version is None:
            return

        if version not in self._versions_info:
            raise MLOpsException(
                "Version [{}] is unknown. Are you sure this is a correct version?"
                .format(version))

        if version == self._curr_version:
            return

        current_version_info = self._versions_info[self._curr_version]
        if version not in current_version_info.supported_versions:
            raise MLOpsException(
                "Requested version [{}] is not in current version [{}]. Supported versions are: {}"
                .format(version, self._curr_version,
                        current_version_info.supported_versions))
Пример #8
0
    def get_mlops_stat(self, model_id):

        if len(self._feature_value) == 0:
            raise MLOpsException("There is no data in histogram graph")

        data = self._to_dict()

        mlops_stat = MLOpsStat(name=self._name,
                               stat_table=self._name,
                               stat_type=self._stat_type,
                               graph_type=StatGraphType.BARGRAPH,
                               mode=StatsMode.Instant,
                               data=data,
                               model_id=model_id)
        return mlops_stat
Пример #9
0
    def get_events(self, event_filter):
        if not isinstance(event_filter, EventFilter):
            raise MLOpsException(
                "event_filter argument is not of type EventFilter")

        query = self._build_query(event_filter)
        self._logger.info(query.get())

        events_list_json = self._mlops_ctx.rest_helper().get_alerts(
            event_filter.to_query_dict())

        df = pd.DataFrame(events_list_json)

        df = self._add_node_and_agent_cols(df)
        return df
Пример #10
0
 def __init__(self,
              stats_helper,
              rest_helper,
              name,
              model_format,
              description,
              user_defined,
              id=None):
     super(Model, self).__init__(rest_helper, id)
     self.model_path = None
     self.metadata = ModelMetadata(self.get_id(), name, model_format,
                                   description, user_defined, 0)
     if stats_helper and not isinstance(stats_helper, StatsHelper):
         raise MLOpsException(
             "stats_helper object must be an instance of StatsHelper class")
     self._stats_helper = stats_helper
Пример #11
0
    def publish_model(self, model):
        """
        Exports Model to the PM service.
        Model data and metadata must be set using :class:`Model`

        :param model: Object of type :class:`Model`
        :return: The model Id
        :raises: MLOpsException
        """
        self._verify_mlops_is_ready()

        if not isinstance(model, Model):
            raise MLOpsException("Model object must be an instance of Model class")

        model_id = self._model_helper.publish_model(model, self._config.pipeline_id)

        return model_id
Пример #12
0
    def _init_output_channel(self, ctx):
        """
        Sets the output channel according to the operation mode or detects from env
        :param ctx: Spark context (or None if not running in Spark)
        :return:
        """
        self._logger.info("setting output channel - 1 {}".format(self._config.mlops_mode))
        if self._config.mlops_mode == MLOpsMode.STAND_ALONE:
            if ctx is None:
                from parallelm.mlops.channels.file_channel import FileChannel
                self._output_channel = FileChannel()
            else:
                self._logger.info("output_channel == pyspark for Stand_Alone mode")

                from parallelm.mlops.channels.mlops_pyspark_channel import MLOpsPySparkChannel
                self._output_channel = MLOpsPySparkChannel(ctx)
                logger_factory.set_logger_provider_func(self._output_channel.get_logger)
                self._logger = logger_factory.get_logger(__name__)

        elif self._config.mlops_mode == MLOpsMode.ATTACH:
            # For now, support only python when attaching to an ION
            from parallelm.mlops.channels.mlops_python_channel import MLOpsPythonChannel
            self._output_channel = MLOpsPythonChannel(self._mlops_ctx.rest_helper(),
                                                      self._mlops_ctx.current_node().pipeline_instance_id)
        elif self._config.mlops_mode == MLOpsMode.AGENT:
            # In agent mode if the context is None, we use the python channel. Otherwise, use the pyspark channel.
            if ctx is None:
                self._logger.info("output_channel = python")
                from parallelm.mlops.channels.mlops_python_channel import MLOpsPythonChannel
                self._output_channel = MLOpsPythonChannel(self._mlops_ctx.rest_helper(),
                                                          self._mlops_ctx.current_node().pipeline_instance_id)
            else:
                self._logger.info("output_channel = pyspark")
                from parallelm.mlops.channels.mlops_pyspark_channel import MLOpsPySparkChannel
                self._output_channel = MLOpsPySparkChannel(ctx, self._mlops_ctx.rest_helper(),
                                                           self._mlops_ctx.current_node().pipeline_instance_id)
                logger_factory.set_logger_provider_func(self._output_channel.get_logger)
                self._logger = logger_factory.get_logger(__name__)
        elif self._config.mlops_mode == MLOpsMode.REST_ACCUMULATOR:
            self._logger.info("output_channel = rest accumulator")
            from parallelm.mlops.channels.python_accumulator_channel import PythonAccumulatorChannel
            self._output_channel = PythonAccumulatorChannel(self._mlops_ctx.rest_helper(),
                                                            self._mlops_ctx.current_node().pipeline_instance_id)
        else:
            raise MLOpsException("Mlops mode [{}] is not supported".format(self._config.mlops_mode))
        self._logger.info("setting output channel - 2 {} {}".format(self._config.mlops_mode, self._output_channel))
Пример #13
0
    def get_mlops_stat(self, model_id):

        if len(self._tbl_rows) == 0:
            raise MLOpsException("No rows data found in table object")

        tbl_data = self._to_semi_json(escape=False)
        semi_json = self._to_semi_json()

        mlops_stat = MLOpsStat(name=self._name,
                               stat_type=InfoType_pb2.General,
                               graph_type=StatGraphType.MATRIX,
                               mode=StatsMode.Instant,
                               data=tbl_data,
                               string_data=semi_json,
                               json_data_dump=tbl_data,
                               model_id=model_id)
        return mlops_stat
Пример #14
0
    def send_event(self, event_obj):

        if not isinstance(event_obj, Event):
            raise MLOpsException(
                "Event object must be an instance of Event class")

        evt = ReflexEvent()
        evt.eventType = event_obj.type
        evt.eventLabel = event_obj.label
        evt.isAlert = event_obj.is_alert
        evt.data = self._event_data_as_json(int(time.time() * 1e3),
                                            event_obj.type, event_obj.label,
                                            event_obj.description,
                                            event_obj.data)

        self._logger.info("Sending alert: {}".format(evt))
        self._mlops_channel.event(evt)
Пример #15
0
    def get_models_by_time(self,
                           start_time,
                           end_time,
                           download=False,
                           pipeline_name=None):
        """
        Retrieve models in the context of the current MLApp from MLOps based on start and end times.

        :param start_time: a datetime object specifying window start time
        :type start_time: datetime
        :param end_time: a datetime object specifying window end time
        :type end_time: datetime
        :param download: If true, download the model data and provide it as an additional column in the dataframe
        :type download: bool
        :param pipeline_name: query by pipeline
        :type pipeline_name: string
        :return: Spark or pandas dataframe based on mode with the models as a byte array
        """

        self._verify_mlops_is_ready()

        self._verify_time_window(start_time, end_time)

        ion = self._mlops_ctx.ion()

        if self._api_test_mode:
            self._logger.info(
                "API testing mode - returning without performing call - in {}".
                format(inspect.stack()[0][3]))

        model_filter = ModelFilter()
        model_filter.time_window_start = start_time
        model_filter.time_window_end = end_time
        if pipeline_name is not None:
            if pipeline_name not in ion.pipeline_by_name:
                raise MLOpsException(
                    "Error: invalid pipeline name {}".format(pipeline_name))
            pipeline_instances = [
                x[1] for x in ion.pipeline_to_pipelineInstances[pipeline_name]
            ]
            model_filter.pipeline_instance_id = pipeline_instances

        model_df = self._model_helper.get_models_dataframe(
            model_filter=model_filter, download=download)
        return model_df
Пример #16
0
    def _get_model_python(self, start_time, end_time):
        mdf = self._get_model_pdf()

        newdf = mdf[(mdf['workflowRunId'] == self._wf_id)
                    & (mdf['createdTimestamp'] >= start_time) &
                    (mdf['createdTimestamp'] <= end_time)]
        if newdf.shape[0] == 0:
            raise MLOpsException(
                "No models found in time range {}:{} for instance {}".format(
                    start_time, end_time, self._wf_id))

        output_df = newdf[['createdTimestamp', 'name', 'id']]

        vals = newdf['id'].values
        models = self._rest_helper.get_model_by_id(vals)

        output_df = output_df.assign(model=models)
        return output_df
Пример #17
0
    def _timestamp_as_nanoseconds(self, timestamp, units):
        if not timestamp:
            timestamp = time.time()
            units = KpiValue.TIME_SEC
        elif isinstance(timestamp, six.string_types):
            timestamp = float(timestamp)

        # Default timestamp units are nanoseconds
        if units:
            if units == KpiValue.TIME_SEC:
                timestamp = timestamp * 1e+9
            elif units == KpiValue.TIME_MSEC:
                timestamp = timestamp * 1e+6
            elif units != KpiValue.TIME_NSEC:
                raise MLOpsException("Invalid timestamp units! unit: {}, should be one of: {}".format(
                    units, [KpiValue.TIME_SEC, KpiValue.TIME_MSEC, KpiValue.TIME_NSEC]))

        return timestamp
Пример #18
0
    def download_model(self, model_id):
        """
        Download a specific model.
        Note model size might be big, check the expected model size before downloading it.

        :param model_id:
        :return: Model Object with model member field pointing to model data
        """
        if isinstance(model_id, six.string_types):
            model_data = self._rest_helper.download_model(model_id)
            return model_data
        elif isinstance(model_id, Model):
            model_id.data = self._rest_helper.download_model(model_id.id)
            return model_id
        else:
            raise MLOpsException(
                "model_id argument should be either model_id string, or Model object, got {}"
                .format(type(model_id)))
Пример #19
0
    def download(self, filepath):
        """
        Download the model content specified by this model metadata and save it on the local file system.
        Note model size might be big, check the expected model size before downloading it.

        :param: filepath  the file path in the local file system to save the model's content
        """
        content = self._rest_helper.download_model(self.get_id())

        # In case the model was created from a json response of get model REST API
        if self.metadata.size and self.metadata.size != len(content):
            raise MLOpsException("Unexpected downloaded model size! model id: {}, expected size: {},"
                                 " downloaded size: {}".format(self.get_id(), self.metadata.size, len(content)))

        with io.open(filepath, mode='wb') as f:
            f.write(content)

        self.set_model_path(filepath)
Пример #20
0
    def _get_url_request_response(self, url):

        counter = 0
        while True:

            r = requests.get(url, cookies=self._return_cookie())
            if r.status_code == HTTPStatus.OK:
                return r
            elif r.status_code == HTTPStatus.SERVICE_UNAVAIL:
                self._warn(
                    "{} Got {} from server - possibly server is down - will try again in 5 seconds, url: {}"
                    .format(counter, r.status_code, url))
                counter += 1
                time.sleep(self._service_unavail_sleep_time)
            else:
                raise MLOpsException(
                    "Got HTTP Error [{}]. GET url [{}]. error [{}]".format(
                        r.status_code, url, r.text))
Пример #21
0
 def _get_url_request_response_as_json(self, url):
     """
     TODO: The better way to detect proper(JSON) content would be to check Content-Type header first.
           But ECO sends much of the data as application/octet-stream type.
           So if we want to use HTTP headers to understand what data is being send, we should first
           define our own guidelines about actual formats we send and types we set in headers.
     """
     ret = ""
     response = self._get_url_request_response(url)
     try:
         ret = response.json()
     except Exception as e:
         raise MLOpsException("Trying to parse response content as json:\n "
                              "Content(trimmed): {}\n"
                              "Content-Type: {}\n"
                              "failed with error: {}".format(
                                  str(response.text)[:2048],
                                  response.headers['Content-Type'], str(e)))
     return ret
Пример #22
0
    def pack(self, source_dir_path):
        """
        Packs a folder
        :param source_dir_path: folder to pack
        :return: path to created tar gz file
        """
        if not (os.path.exists(source_dir_path)
                and os.access(source_dir_path, os.R_OK)):
            raise MLOpsException(
                "Path: {} does not exist or not readable".format(
                    source_dir_path))

        with tarfile.open(self.source_gzip, "w:gz") as tar:
            tar.add(source_dir_path, arcname=os.path.basename(source_dir_path))
        self._logger.info(
            "Directory was packed successfully! source={}, dest={}".format(
                source_dir_path, self.source_gzip))

        return self.source_gzip
Пример #23
0
    def add_series(self, label, x, y):
        """
        Add a new line to the MultiGraph object

        :param label: name for the line
        :param x: vector of values for the x axis
        :param y: vector of values for the y axis
        :return: self
        """
        check_vec_of_numbers(y, error_prefix="y_series data")
        if len(y) != len(x):
            raise MLOpsException(
                "y_series data is not in the same lenght of x_series data")

        self._labels.append(label)
        self._x_series.append(copy.deepcopy(x))
        self._y_series.append(copy.deepcopy(y))

        return self
Пример #24
0
    def set_stat(self, name, data, model_id, category, timestamp, **kwargs):
        # If name supports the stat_object API, return the object.
        if isinstance(name, MLOpsStatGetter):
            self._output_channel.stat_object(name.get_mlops_stat(model_id))
            return self

        # If data supports the stat_object API, return the object.
        elif isinstance(data, MLOpsStatGetter):
            self._output_channel.stat_object(data.get_mlops_stat(model_id))
            return self

        if isinstance(name, ClassificationMetrics):
            self._set_classification_stat(name=name,
                                          data=data,
                                          model_id=model_id,
                                          timestamp=timestamp, **kwargs)

            return self
        elif isinstance(name, RegressionMetrics):
            self._set_regression_stat(name=name,
                                      data=data,
                                      model_id=model_id,
                                      timestamp=timestamp, **kwargs)
            return self
        elif isinstance(name, ClusteringMetrics):
            self._set_clustering_stat(name=name,
                                      data=data,
                                      model_id=model_id,
                                      timestamp=timestamp, **kwargs)
            return self

        if category in (StatCategory.CONFIG, StatCategory.TIME_SERIES):
            self._logger.debug("{} stat called: name: {} data_type: {} class: {}".
                               format(Constants.OFFICIAL_NAME, name, type(data), category))

            self._validate_supported_conf_ts_data_type(data)
            self._output_channel.stat(name, data, model_id, category, **kwargs)
        else:
            raise MLOpsException("stat_class: {} not supported in set_stat call".format(category))
Пример #25
0
    def get_models_by_time(self,
                           start_time,
                           end_time,
                           download=False,
                           pipeline_name=None):
        """
        Retrieve models in the context of the current MLApp from MLOps based on start and end times.
        Currently not active

        :param start_time: a datetime object specifying window start time
        :type start_time: datetime
        :param end_time: a datetime object specifying window end time
        :type end_time: datetime
        :param download: If true, download the model data and provide it as an additional column
         in the dataframe
        :type download: bool
        :param pipeline_name: query by pipeline
        :type pipeline_name: string
        :return: Spark or pandas dataframe based on mode with the models as a byte array
        """

        raise MLOpsException("get_models_by_time is not available")
Пример #26
0
    def _validate_feature_importance_inputs(self,
                                            feature_importance_vector=None,
                                            feature_names=None,
                                            model=None,
                                            df=None):
        """
        verify common parameters. specific parameters are verified in each output channel
        :param feature_importance_vector: feature importance vector optional
        :param feature_names: feature names vector optional
        :param model: optional pipeline model for pyspark, sklearn model for python
        :param df: optional dataframe for analysis
        :raises: MLOpsException
        """

        # check that either model is provided or feature importance vector
        if not feature_importance_vector and not model:
            raise MLOpsException(
                "must provide either feature importance vector or a supporting model"
            )
        # check that either df is provided or feature names vector
        if df is None and not feature_names:
            raise MLOpsException(
                "must provide either feature names vector or a dataframe that can provide the names"
            )

        if feature_importance_vector:
            if not isinstance(feature_importance_vector, list):
                raise MLOpsException(
                    "features importance vector must be a list")
            for feature_importance_element in feature_importance_vector:
                if not isinstance(feature_importance_element,
                                  (six.integer_types, float)):
                    raise MLOpsException(
                        "features importance elements must be a number. got: {} "
                        .format(feature_importance_element))
        if feature_names:
            if not isinstance(feature_names, list):
                raise MLOpsException("features names vector must be a list")
            for feature_names_element in feature_names:
                if not isinstance(feature_names_element, six.string_types):
                    raise MLOpsException(
                        "features name elements must be a string. got: {} ".
                        format(feature_names_element))
Пример #27
0
 def set_annotations(self, annotations):
     if annotations is None or not isinstance(annotations, dict):
         raise MLOpsException("Model annotations must be not None dict")
     self.metadata.annotations = annotations
Пример #28
0
 def _validate_stats_helper(self):
     if not self._stats_helper:
         raise MLOpsException("stats_helper object was not set or is None")
Пример #29
0
 def table(self):
     raise MLOpsException("Not implemented")
Пример #30
0
 def api_raise_mlops_or_test_exception(self, arg=0):
     if arg == 0:
         raise MLOpsException("raising MLOps exception")
     else:
         raise MLOpsTestException("raising MLOps test exception")