def main(force: bool, skip: bool, model_metadata: str, register_folder: str,
         model_name: str):
    if force and skip:
        raise ValueError(
            "Model registration cannot be both forced and skipped")

    # Determine if the model should be registered
    if skip:
        print("Registration skipped")
        register_recommended = False
    elif force:
        print("Model registration forced")
        register_recommended = True
    else:
        register_recommended = read_recommendation(folder_path=register_folder)
        print(f"Model Registration Is Recommended?: {register_recommended}")

    # If model registration is recommended, then register the model
    if register_recommended:
        metadata = read_metadata(model_metadata)
        challenger_model_run = Run.get(workspace=workspace,
                                       run_id=metadata['run_id'])

        challenger_model_run.register_model(model_name,
                                            model_path=metadata['model_path'])
示例#2
0
def load_challenger_model(metadata: dict) -> Any:
    train_run = Run.get(workspace, metadata['run_id'])
    challenger_path = "./challenger/model.pkl"

    train_run.download_file(name=metadata['model_path'],
                            output_file_path=challenger_path)

    return joblib.load(challenger_path)
def main(event: func.EventGridEvent):

    try:
        result = json.dumps({
            'id': event.id,
            'data': event.get_json(),
            'topic': event.topic,
            'subject': event.subject,
            'event_type': event.event_type
        })
        logging.info('PipelineRunMonitor: processed an event: %s', result)

        if (event.event_type
                == "Microsoft.MachineLearningServices.RunCompleted"
                or event.get_json()["runStatus"] == "Failed"):

            # Retrieve environment variables
            subscriptionId = os.environ["SUBSCRIPTION_ID"]
            rg_name = os.environ["RESOURCE_GROUP"]
            ws_name = os.environ["WORKSPACE_NAME"]
            app_insights_connection_string = os.environ[
                "APP_INSIGHTS_CONNECTION_STRING"]

            # Managed identity authentication
            msi_auth = MsiAuthentication()

            # Azure ML workspace
            aml_workspace = Workspace(subscription_id=subscriptionId,
                                      resource_group=rg_name,
                                      workspace_name=ws_name,
                                      auth=msi_auth)
            logging.info(f"Azure ML workspace: {aml_workspace}")

            # Set up logger for Application Insights
            logger = logging.getLogger(__name__)
            logger.addHandler(
                AzureLogHandler(
                    connection_string=app_insights_connection_string))

            aml_run = Run.get(aml_workspace, event.get_json()["runId"])
            custom_dimensions = {
                "parent_run_id":
                aml_run.parent.id if aml_run.parent else aml_run.id,
                "parent_run_name":
                aml_run.parent.name if aml_run.parent else aml_run.name,
                "parent_run_number":
                aml_run.parent.number if aml_run.parent else aml_run.number,
                "run_number":
                aml_run.number,
                "step_id":
                aml_run.id,
                "step_name":
                aml_run.name,
                "experiment_name":
                aml_run.experiment.name,
                "run_url":
                aml_run.parent.get_portal_url()
                if aml_run.parent else aml_run.get_portal_url(),
                "parent_run_status":
                aml_run.parent.status if aml_run.parent else aml_run.status,
                "run_status":
                aml_run.status,
                "type":
                "run_detail",
                "workspace_name":
                aml_run.experiment.workspace.name
            }
            details = aml_run.get_details()
            logger.info(json.dumps(details, default=lambda o: ''),
                        extra={'custom_dimensions': custom_dimensions})

        elif (event.event_type
              == "Microsoft.MachineLearningServices.RunStatusChanged"
              and event.get_json()["runStatus"] == "Running"
              and event.get_json()["runProperties"]["azureml.runsource"]
              == "azureml.PipelineRun"):
            # Please write a pipeline run notification here
            pass
    except Exception as ex:
        logging.exception(
            f"PipelineRunMonitor: stopped execution due to the following error: {ex}"
        )
        raise Exception("PipelineRunMonitor error") from ex
def run(input_data):
    # 1.0 Set up Logging
    entry_script = EntryScript()
    logger = entry_script.logger
    logger.info('Making forecasts')
    os.makedirs('./outputs', exist_ok=True)

    all_predictions = pd.DataFrame()
    # 2.0 Iterate through input data
    for idx, file_path in enumerate(input_data):
        date1 = datetime.datetime.now()
        file_name, file_extension = os.path.splitext(
            os.path.basename(file_path))
        logger.info(file_path)
        if file_extension.lower() == ".parquet":
            data = pd.read_parquet(file_path)
        else:
            data = pd.read_csv(file_path)

        tags_dict = {}
        if hasattr(args, "many_models_run_id") and args.many_models_run_id:
            tags_dict['RunId'] = args.many_models_run_id

        for column_name in args.group_column_names:
            tags_dict.update(
                {column_name: str(data.iat[0, data.columns.get_loc(column_name)])})

        print(tags_dict)

        model_string = '_'.join(str(v) for k, v in sorted(
            tags_dict.items()) if k in args.group_column_names)
        logger.info("model string to encode " + model_string)
        sha = hashlib.sha256()
        sha.update(model_string.encode())
        model_name = 'automl_' + sha.hexdigest()

        logger.info('starting (' + file_path + ') ' + str(date1))

        ws = current_step_run.experiment.workspace
        logger.info('query the model ' + model_name)
        model_list = Model.list(ws, name=model_name,
                                tags=tags_dict, latest=True)

        if not model_list:
            print("Could not find model")
            continue

        # 4.0 Un-pickle model and make predictions
        model_path = model_list[0].download(exist_ok=True)
        model = joblib.load(model_path)
        model_name = model_list[0].name
        print('Unpickled the model ' + model_name)

        # Grab relevant model metrics
        run_id = model_list[0].run_id
        run = Run.get(ws, run_id)
        target_metric = run.get_metrics(name='mean_absolute_error')['mean_absolute_error']

        X_test = data.copy()
        if args.target_column_name is not None:
            X_test.pop(args.target_column_name)

        print("prediction data head")
        print(X_test.head())
        y_predictions, X_trans = model.forecast(
            X_test, ignore_data_errors=True)
        print('Made predictions ' + model_name)

        # Insert predictions/model metrics to test set
        predicted_column_name = 'Predictions'
        data[predicted_column_name] = y_predictions
        data['model_metric'] = np.full(len(y_predictions), target_metric)
        print(data.head())
        print('Inserted predictions ' + model_name)

        cols = list(data.columns.values)
        print(cols)

        all_predictions = all_predictions.append(data)

        # 5.0 Log the run
        date2 = datetime.datetime.now()
        logger.info('ending (' + str(file_path) + ') ' + str(date2))

    print(all_predictions.head())
    return all_predictions