def main(force: bool, skip: bool, model_metadata: str, register_folder: str, model_name: str): if force and skip: raise ValueError( "Model registration cannot be both forced and skipped") # Determine if the model should be registered if skip: print("Registration skipped") register_recommended = False elif force: print("Model registration forced") register_recommended = True else: register_recommended = read_recommendation(folder_path=register_folder) print(f"Model Registration Is Recommended?: {register_recommended}") # If model registration is recommended, then register the model if register_recommended: metadata = read_metadata(model_metadata) challenger_model_run = Run.get(workspace=workspace, run_id=metadata['run_id']) challenger_model_run.register_model(model_name, model_path=metadata['model_path'])
def load_challenger_model(metadata: dict) -> Any: train_run = Run.get(workspace, metadata['run_id']) challenger_path = "./challenger/model.pkl" train_run.download_file(name=metadata['model_path'], output_file_path=challenger_path) return joblib.load(challenger_path)
def main(event: func.EventGridEvent): try: result = json.dumps({ 'id': event.id, 'data': event.get_json(), 'topic': event.topic, 'subject': event.subject, 'event_type': event.event_type }) logging.info('PipelineRunMonitor: processed an event: %s', result) if (event.event_type == "Microsoft.MachineLearningServices.RunCompleted" or event.get_json()["runStatus"] == "Failed"): # Retrieve environment variables subscriptionId = os.environ["SUBSCRIPTION_ID"] rg_name = os.environ["RESOURCE_GROUP"] ws_name = os.environ["WORKSPACE_NAME"] app_insights_connection_string = os.environ[ "APP_INSIGHTS_CONNECTION_STRING"] # Managed identity authentication msi_auth = MsiAuthentication() # Azure ML workspace aml_workspace = Workspace(subscription_id=subscriptionId, resource_group=rg_name, workspace_name=ws_name, auth=msi_auth) logging.info(f"Azure ML workspace: {aml_workspace}") # Set up logger for Application Insights logger = logging.getLogger(__name__) logger.addHandler( AzureLogHandler( connection_string=app_insights_connection_string)) aml_run = Run.get(aml_workspace, event.get_json()["runId"]) custom_dimensions = { "parent_run_id": aml_run.parent.id if aml_run.parent else aml_run.id, "parent_run_name": aml_run.parent.name if aml_run.parent else aml_run.name, "parent_run_number": aml_run.parent.number if aml_run.parent else aml_run.number, "run_number": aml_run.number, "step_id": aml_run.id, "step_name": aml_run.name, "experiment_name": aml_run.experiment.name, "run_url": aml_run.parent.get_portal_url() if aml_run.parent else aml_run.get_portal_url(), "parent_run_status": aml_run.parent.status if aml_run.parent else aml_run.status, "run_status": aml_run.status, "type": "run_detail", "workspace_name": aml_run.experiment.workspace.name } details = aml_run.get_details() logger.info(json.dumps(details, default=lambda o: ''), extra={'custom_dimensions': custom_dimensions}) elif (event.event_type == "Microsoft.MachineLearningServices.RunStatusChanged" and event.get_json()["runStatus"] == "Running" and event.get_json()["runProperties"]["azureml.runsource"] == "azureml.PipelineRun"): # Please write a pipeline run notification here pass except Exception as ex: logging.exception( f"PipelineRunMonitor: stopped execution due to the following error: {ex}" ) raise Exception("PipelineRunMonitor error") from ex
def run(input_data): # 1.0 Set up Logging entry_script = EntryScript() logger = entry_script.logger logger.info('Making forecasts') os.makedirs('./outputs', exist_ok=True) all_predictions = pd.DataFrame() # 2.0 Iterate through input data for idx, file_path in enumerate(input_data): date1 = datetime.datetime.now() file_name, file_extension = os.path.splitext( os.path.basename(file_path)) logger.info(file_path) if file_extension.lower() == ".parquet": data = pd.read_parquet(file_path) else: data = pd.read_csv(file_path) tags_dict = {} if hasattr(args, "many_models_run_id") and args.many_models_run_id: tags_dict['RunId'] = args.many_models_run_id for column_name in args.group_column_names: tags_dict.update( {column_name: str(data.iat[0, data.columns.get_loc(column_name)])}) print(tags_dict) model_string = '_'.join(str(v) for k, v in sorted( tags_dict.items()) if k in args.group_column_names) logger.info("model string to encode " + model_string) sha = hashlib.sha256() sha.update(model_string.encode()) model_name = 'automl_' + sha.hexdigest() logger.info('starting (' + file_path + ') ' + str(date1)) ws = current_step_run.experiment.workspace logger.info('query the model ' + model_name) model_list = Model.list(ws, name=model_name, tags=tags_dict, latest=True) if not model_list: print("Could not find model") continue # 4.0 Un-pickle model and make predictions model_path = model_list[0].download(exist_ok=True) model = joblib.load(model_path) model_name = model_list[0].name print('Unpickled the model ' + model_name) # Grab relevant model metrics run_id = model_list[0].run_id run = Run.get(ws, run_id) target_metric = run.get_metrics(name='mean_absolute_error')['mean_absolute_error'] X_test = data.copy() if args.target_column_name is not None: X_test.pop(args.target_column_name) print("prediction data head") print(X_test.head()) y_predictions, X_trans = model.forecast( X_test, ignore_data_errors=True) print('Made predictions ' + model_name) # Insert predictions/model metrics to test set predicted_column_name = 'Predictions' data[predicted_column_name] = y_predictions data['model_metric'] = np.full(len(y_predictions), target_metric) print(data.head()) print('Inserted predictions ' + model_name) cols = list(data.columns.values) print(cols) all_predictions = all_predictions.append(data) # 5.0 Log the run date2 = datetime.datetime.now() logger.info('ending (' + str(file_path) + ') ' + str(date2)) print(all_predictions.head()) return all_predictions