示例#1
0
 def add_v3io_stream_trigger(
     self,
     stream_path,
     name="stream",
     group="serving",
     seek_to="earliest",
     shards=1,
 ):
     """add v3io stream trigger to the function"""
     endpoint = None
     if "://" in stream_path:
         endpoint, stream_path = parse_v3io_path(stream_path, suffix="")
     container, path = split_path(stream_path)
     shards = shards or 1
     self.add_trigger(
         name,
         V3IOStreamTrigger(
             name=name,
             container=container,
             path=path[1:],
             consumerGroup=group,
             seekTo=seek_to,
             webapi=endpoint or "http://v3io-webapi:8081",
         ),
     )
     self.spec.min_replicas = shards
     self.spec.max_replicas = shards
示例#2
0
def init_functions(functions: dict, project=None, secrets=None):
    for f in functions.values():
        # Add V3IO Mount
        f.apply(mount_v3io())
        
        # Always pull images to keep updates
        f.spec.image_pull_policy = 'Always'
    
    # Define inference-stream related triggers
    functions['s2p'].add_trigger('labeled_stream', V3IOStreamTrigger(url=f'{labeled_stream_url}@s2p'))
示例#3
0
    def add_v3io_stream_trigger(
        self,
        stream_path,
        name="stream",
        group="serving",
        seek_to="earliest",
        shards=1,
        extra_attributes=None,
        **kwargs,
    ):
        """add v3io stream trigger to the function

        :param stream_path:    v3io stream path (e.g. 'v3io:///projects/myproj/stream1'
        :param name:           trigger name
        :param group:          consumer group
        :param seek_to:        start seek from: "earliest", "latest", "time", "sequence"
        :param shards:         number of shards (used to set number of replicas)
        :param extra_attributes:  key/value dict with extra trigger attributes
        :param kwargs:         extra V3IOStreamTrigger class attributes
        """
        endpoint = None
        if "://" in stream_path:
            endpoint, stream_path = parse_v3io_path(stream_path, suffix="")
        container, path = split_path(stream_path)
        shards = shards or 1
        self.add_trigger(
            name,
            V3IOStreamTrigger(
                name=name,
                container=container,
                path=path[1:],
                consumerGroup=group,
                seekTo=seek_to,
                webapi=endpoint or "http://v3io-webapi:8081",
                extra_attributes=extra_attributes,
                **kwargs,
            ),
        )
        self.spec.min_replicas = shards
        self.spec.max_replicas = shards
示例#4
0
 def add_v3io_stream_trigger(
     self,
     stream_path,
     name="stream",
     group="serving",
     seek_to="earliest",
     shards=1,
 ):
     """add v3io stream trigger to the function"""
     container, path = split_path(stream_path)
     shards = shards or 1
     self.add_trigger(
         name,
         V3IOStreamTrigger(
             name=name,
             container=container,
             path=path[1:],
             consumerGroup=group,
             seekTo=seek_to,
         ),
     )
     self.spec.min_replicas = shards
     self.spec.max_replicas = shards
示例#5
0
def concept_drift_deployer(
    context: MLClientCtx,
    base_dataset: DataItem,
    input_stream: str,
    output_stream: str,
    output_tsdb: str,
    tsdb_batch_size: int,
    callbacks: list,
    models: list = ["ddm", "eddm", "pagehinkley"],
    models_dest="models",
    pagehinkley_threshold: float = 10,
    ddm_warning_level: float = 2,
    ddm_out_control_level: float = 3,
    label_col="label",
    prediction_col="prediction",
    hub_url: str = mlconf.hub_url,
    fn_tag: str = "master",
):
    """Deploy a streaming Concept Drift detector on a labeled stream
       This function is the Deployment step for the Streaming Concept Drift Detector.
       It will load the selected drift detectors and initialize them with the
       base_dataset's statistics.  Then it will deploy the concept_drift_streaming
       function and pass the models to it for streaming concept-drift detection on top
       of a labeled stream.

    :param context:         MLRun context
    :param base_dataset:    Dataset containing label_col and prediction_col to initialize the detectors
    :param input_stream:    labeled stream to track.
                            Should contain label_col and prediction_col
    :param output_stream:   Output stream to push the detector's alerts
    :param output_tsdb:     Output TSDB table to allow analysis and display
    :param tsdb_batch_size: Batch size of alerts to buffer before pushing to the TSDB
    :param callbacks:       Additional rest endpoints to send the alert data to
    :param models:          List of the detectors to deploy
                            Defaults to ['ddm', 'eddm', 'pagehinkley'].
    :param models_dest:     Location for saving the detectors
                            Defaults to 'models' (in relation to artifact_path).
    :param pagehinkley_threshold:  Drift level threshold for PH detector Defaults to 10.
    :param ddm_warning_level:      Warning level alert for DDM detector Defaults to 2.
    :param ddm_out_control_level:  Drift level alert for DDM detector Defaults to 3.
    :param label_col:       Label column to be used on base_dataset and input_stream
                            Defaults to 'label'.
    :param prediction_col:  Prediction column to be used on base_dataset and input_stream
                            Defaults to 'prediction'.
    :param hub_url:         hub_url in case the default is not used, concept_drift_streaming will be loaded
                            by this url
                            Defaults to mlconf.hub_url.
    :param fn_tag:          hub tag to use
                            Defaults to 'master'
    """

    mlconf.dbpath = mlconf.dbpath or "http://mlrun-api:8080"
    mlconf.hub_url = hub_url
    fn = import_function(url="hub://concept_drift_streaming")

    context.logger.info("Loading base dataset")
    base_df = base_dataset.as_df()
    error_stream = np.where(
        base_df[prediction_col].values == base_df[label_col].values, 0, 1
    )

    context.logger.info("Creating models")
    models = [
        model.strip()
        for model in os.getenv("models", "pagehinkley, ddm, eddm").split(",")
    ]
    models = {
        "eddm": skmultiflow.drift_detection.EDDM(),
        "pagehinkley": skmultiflow.drift_detection.PageHinkley(
            min_instances=len(error_stream), threshold=pagehinkley_threshold
        ),
        "ddm": skmultiflow.drift_detection.DDM(
            min_num_instances=len(error_stream),
            warning_level=ddm_warning_level,
            out_control_level=ddm_out_control_level,
        ),
    }

    context.logger.info("Streaming data to models")
    for i in range(len(error_stream)):
        for model_name, model in models.items():
            model.add_element(error_stream[i])

    context.logger.info("Logging ready models")
    for name, model in models.items():
        data = dumps(model)
        model_file = f"{name}.pkl"
        context.log_model(
            f"{name}_concept_drift",
            body=data,
            labels={"framework": "skmultiflow", "workflow": "concept-drift"},
            model_file=model_file,
            model_dir=models_dest,
            tag="latest",
        )
        fn.set_envs(
            {
                f"{name}_model_path": os.path.join(
                    context.artifact_path, models_dest, model_file
                )
            }
        )

    context.logger.info("Deploying Concept Drift Streaming function")
    fn.set_envs(
        {
            "label_col": label_col,
            "prediction_col": prediction_col,
            "drift_stream": output_stream,
            "tsdb_table": output_tsdb,
            "pagehinkley_threshold": pagehinkley_threshold,
            "ddm_warning_level": ddm_warning_level,
            "ddm_out_control": ddm_out_control_level,
        }
    )
    fn.add_trigger(
        "labeled_stream", V3IOStreamTrigger(url=input_stream, name="labeled_stream")
    )
    fn.apply(mount_v3io())
    fn.deploy(project=context.project)