def main(): run = Run.get_context() ws, exp, run_id = get_aml_context(run) parser = argparse.ArgumentParser("evaluate") parser.add_argument( "--run_id", type=str, help="Training run ID", ) parser.add_argument("--model_name", type=str, help="Name of the Model") parser.add_argument( "--ml_params", type=str, help= "Parameters for ML pipelne in json format with defaults defined in parameters.json", # NOQA: E501 ) args = parser.parse_args() if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name tag_name = 'experiment_name' cancel_if_perform_worse = parse_ml_params(run, args.ml_params) model = get_model(model_name=model_name, tag_name=tag_name, tag_value=exp.name, aml_workspace=ws) if (model is not None): should_register = evaluate_model_performs_better(model, run) if ((not should_register) and cancel_if_perform_worse): run.parent.cancel() else: print("This is the first model, register it")
def init(): """ Initializer called once per node that runs the scoring job. Parse command line arguments and get the right model to use for scoring. """ try: print("Initializing batch scoring script...") # Get the model using name/version/tags filter model_filter = parse_args() amlmodel = get_model(model_name=model_filter[0], model_version=model_filter[1], tag_name=model_filter[2], tag_value=model_filter[3]) # Load the model using name/version found global model modelpath = Model.get_model_path(model_name=amlmodel.name, version=amlmodel.version) model = joblib.load(modelpath) print("Loaded model {}".format(model_filter[0])) except Exception as ex: print("Error: {}".format(ex))
if (args.run_id is not None): run_id = args.run_id if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name metric_eval = "auc" allow_run_cancel = args.allow_run_cancel # Parameterize the matrices on which the models should be compared # Add golden data set on which all the model performance can be evaluated try: firstRegistration = False tag_name = 'experiment_name' model = get_model(model_name=model_name, tag_name=tag_name, tag_value=exp.name, aml_workspace=ws) if (model is not None): production_model_auc = 0 if (metric_eval in model.tags): production_model_auc = float(model.tags[metric_eval]) new_model_auc = float(run.parent.get_metrics().get(metric_eval)) if (production_model_auc is None or new_model_auc is None): print("Unable to find", metric_eval, "metrics, " "exiting evaluation") if ((allow_run_cancel).lower() == 'true'): run.parent.cancel() else: print("Current Production model auc: {}, " "New trained model auc: {}".format(production_model_auc,
if (run_id == 'amlcompute'): run_id = run.parent.id model_name = args.model_name auc_metric = "auc" f1_score_metric = "f1score" allow_run_cancel = args.allow_run_cancel print("allow_run_cancel") print(allow_run_cancel) # Parameterize the matrices on which the models should be compared # Add golden data set on which all the model performance can be evaluated try: firstRegistration = False tag_name = 'experiment_name' model = get_model(model_name, tag_name, exp.name, ws) cancel_run = False if (model is not None): new_model_run = run.parent new_model_auc = float(new_model_run.get_metrics().get(auc_metric)) new_model_f1score = float( new_model_run.get_metrics().get(f1_score_metric)) # NOQA: E501 if (new_model_auc is not None) and (new_model_f1score is not None): if (auc_metric in model.tags): production_model_auc = float(model.tags[auc_metric])