def run_mlbase_xgboost_regression(context: mlrun.MLClientCtx): import json import xgboost as xgb from mlrun.frameworks.xgboost import apply_mlrun model = xgb.XGBRegressor() X_train, X_test, y_train, y_test = get_dataset(classification=False) model_handler = apply_mlrun( model, context, X_train=X_train, y_train=y_train, X_test=X_test, y_test=y_test, ) model.fit(X_train, y_train) json_artifact = "test.json" with open(json_artifact, "w") as json_file: json.dump({"test": 0}, json_file, indent=4) model_handler.register_artifacts( context.log_artifact( json_artifact, local_path=json_artifact, artifact_path=context.artifact_path, db_key=False, )) model_handler.update()
def custom_hyper_func(context: mlrun.MLClientCtx): best_accuracy = 0 for param in [1, 2, 4, 3]: with context.get_child_context(myparam=param) as child: accuracy = child.get_param("myparam") child.log_result("accuracy", accuracy) if accuracy > best_accuracy: child.mark_as_best() best_accuracy = accuracy
def _extract_properties_from_context( context: mlrun.MLClientCtx) -> Dict[str, Any]: """ Extract the properties of the run this context belongs to. :param context: The context to get his properties. :return: The properties as a dictionary where each key is the property name. """ run = mlrun.RunObject.from_dict(context.to_dict()) runs = mlrun.lists.RunList([run.to_dict()]) info = {} for property_name, property_value in list(zip(*runs.to_rows())): info[property_name] = property_value return info
def concept_drift_deployer( context: MLClientCtx, base_dataset: DataItem, input_stream: str, output_stream: str, output_tsdb: str, tsdb_batch_size: int, callbacks: list, models: list = ["ddm", "eddm", "pagehinkley"], models_dest="models", pagehinkley_threshold: float = 10, ddm_warning_level: float = 2, ddm_out_control_level: float = 3, label_col="label", prediction_col="prediction", hub_url: str = mlconf.hub_url, fn_tag: str = "master", ): """Deploy a streaming Concept Drift detector on a labeled stream This function is the Deployment step for the Streaming Concept Drift Detector. It will load the selected drift detectors and initialize them with the base_dataset's statistics. Then it will deploy the concept_drift_streaming function and pass the models to it for streaming concept-drift detection on top of a labeled stream. :param context: MLRun context :param base_dataset: Dataset containing label_col and prediction_col to initialize the detectors :param input_stream: labeled stream to track. Should contain label_col and prediction_col :param output_stream: Output stream to push the detector's alerts :param output_tsdb: Output TSDB table to allow analysis and display :param tsdb_batch_size: Batch size of alerts to buffer before pushing to the TSDB :param callbacks: Additional rest endpoints to send the alert data to :param models: List of the detectors to deploy Defaults to ['ddm', 'eddm', 'pagehinkley']. :param models_dest: Location for saving the detectors Defaults to 'models' (in relation to artifact_path). :param pagehinkley_threshold: Drift level threshold for PH detector Defaults to 10. :param ddm_warning_level: Warning level alert for DDM detector Defaults to 2. :param ddm_out_control_level: Drift level alert for DDM detector Defaults to 3. :param label_col: Label column to be used on base_dataset and input_stream Defaults to 'label'. :param prediction_col: Prediction column to be used on base_dataset and input_stream Defaults to 'prediction'. :param hub_url: hub_url in case the default is not used, concept_drift_streaming will be loaded by this url Defaults to mlconf.hub_url. :param fn_tag: hub tag to use Defaults to 'master' """ mlconf.dbpath = mlconf.dbpath or "http://mlrun-api:8080" mlconf.hub_url = hub_url fn = import_function(url="hub://concept_drift_streaming") context.logger.info("Loading base dataset") base_df = base_dataset.as_df() error_stream = np.where( base_df[prediction_col].values == base_df[label_col].values, 0, 1 ) context.logger.info("Creating models") models = [ model.strip() for model in os.getenv("models", "pagehinkley, ddm, eddm").split(",") ] models = { "eddm": skmultiflow.drift_detection.EDDM(), "pagehinkley": skmultiflow.drift_detection.PageHinkley( min_instances=len(error_stream), threshold=pagehinkley_threshold ), "ddm": skmultiflow.drift_detection.DDM( min_num_instances=len(error_stream), warning_level=ddm_warning_level, out_control_level=ddm_out_control_level, ), } context.logger.info("Streaming data to models") for i in range(len(error_stream)): for model_name, model in models.items(): model.add_element(error_stream[i]) context.logger.info("Logging ready models") for name, model in models.items(): data = dumps(model) model_file = f"{name}.pkl" context.log_model( f"{name}_concept_drift", body=data, labels={"framework": "skmultiflow", "workflow": "concept-drift"}, model_file=model_file, model_dir=models_dest, tag="latest", ) fn.set_envs( { f"{name}_model_path": os.path.join( context.artifact_path, models_dest, model_file ) } ) context.logger.info("Deploying Concept Drift Streaming function") fn.set_envs( { "label_col": label_col, "prediction_col": prediction_col, "drift_stream": output_stream, "tsdb_table": output_tsdb, "pagehinkley_threshold": pagehinkley_threshold, "ddm_warning_level": ddm_warning_level, "ddm_out_control": ddm_out_control_level, } ) fn.add_trigger( "labeled_stream", V3IOStreamTrigger(url=input_stream, name="labeled_stream") ) fn.apply(mount_v3io()) fn.deploy(project=context.project)