Exemplo n.º 1
0
    "compute_target_to_use_for_training"].strip().lower()
compute_target_name = settings["compute_target"]["training"][
    compute_target_to_use]["name"]
workspace_config_settings = settings["workspace"]["config"]

# Get workspace
print("Loading Workspace")
cli_auth = AzureCliAuthentication()
ws = Workspace.from_config(path=workspace_config_settings["path"],
                           auth=cli_auth,
                           _file_name=workspace_config_settings["file_name"])
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

# Attach Experiment
print("Loading Experiment")
exp = Experiment(workspace=ws, name=experiment_settings["name"])
print(exp.name, exp.workspace.name, sep="\n")

# Load compute target
print("Loading Compute Target")
compute_target = ComputeTarget(workspace=ws, name=compute_target_name)

# Create image registry configuration
if experiment_settings["docker"]["custom_image"]:
    container_registry = ContainerRegistry()
    container_registry.address = experiment_settings["docker"][
        "custom_image_registry_details"]["address"]
    container_registry.username = experiment_settings["docker"][
        "custom_image_registry_details"]["username"]
    container_registry.password = experiment_settings["docker"][
        "custom_image_registry_details"]["password"]
Exemplo n.º 2
0
automl_config = AutoMLConfig(
    task='regression',
    debug_log='automl_errors.log',
    primary_metric='r2_score',
    iteration_timeout_minutes=10,
    iterations=4,
    max_concurrent_iterations=4,  #change it based on number of worker nodes
    verbosity=logging.INFO,
    spark_context=sc,
    enable_cache=True,
    path=project_folder,
    preprocess=True,
    X=train_X,
    y=train_Y,
    X_valid=valid_X,
    y_valid=valid_Y)

# COMMAND ----------

# MAGIC %md
# MAGIC ### Start the AutoML Tasks
# MAGIC Finally, we'll instantiate and Experiment and submit the `automl_config`. This will run for the specified number of iterations.

# COMMAND ----------

# Create AML Experiment - use the name from ./99-Shared-Functions-and-Settings notebook
experiment = Experiment(ws, automl_experiment_name)

# Submit AutoML Run
run = experiment.submit(automl_config)
run.wait_for_completion(show_output=True)
Exemplo n.º 3
0
# COMMAND ----------

# MAGIC %md
# MAGIC #### Create Azure Machine Learning Experiment
# MAGIC 
# MAGIC The Experiment object will contain 1-to-many 'Runs'. Each of these Runs can capture track logged data, images, results, and/or trained models. 
# MAGIC 
# MAGIC The Run objects can be a submitted `*.py` script or it can be submitted via an interactive notebook session.
# MAGIC 
# MAGIC We'll also create an evaluator to that will calculate performance metrics of the models.

# COMMAND ----------

# Use the experiment name from the ./99-Shared-Functions-and-Settings notebook
experiment = Experiment(ws, pyspark_experiment_name)

# Create evaluator object to assess model performance
evaluator = RegressionEvaluator(labelCol='duration_minutes')

# COMMAND ----------

# MAGIC %md
# MAGIC ### Linear Regression Model
# MAGIC The first model that we'll try is a Linear Regression model. 

# COMMAND ----------

with experiment.start_logging() as run:

  print("==============================================")
Exemplo n.º 4
0
def analyse_with_gordo():
    ws = Workspace.from_config()  # Azure ML
    # Get an experiment object from Azure Machine Learning
    experiment_name = "dummy_test"
    experiment = Experiment(workspace=ws, name=experiment_name)  # Azure ML
    mlflow.set_experiment(experiment_name)  # MLFlow

    resamples_for_model = ["1T", "1H"]
    aggregation_methods = ["max", "mean"]
    batch_sizes = [1, 10, 100]
    epochs = [1, 10]
    number_of_permutations = len(
        list(
            itertools.product(
                aggregation_methods, resamples_for_model, batch_sizes, epochs
            )
        )
    )

    resampled_original_data = read_and_resample("2nd_test.hdf", "1S")

    if PLOTTING:
        plotnum = 0
        f, axarr = plt.subplots(number_of_permutations + 1, sharex=True)
        axarr[plotnum].plot(
            resampled_original_data, linewidth=1, label="sensor_data_1S_mean"
        )
        axarr[plotnum].legend(loc="upper left")
        plotnum += 1

    for aggregation_method, interval, batch_size, epoch in itertools.product(
        aggregation_methods, resamples_for_model, batch_sizes, epochs
    ):
        run = experiment.start_logging()
        with mlflow.start_run():
            mlflow.log_param("interval", interval)  # MLFlow
            mlflow.log_param("aggregation_method", aggregation_method)  # MLFlow
            mlflow.log_param("batch_size", batch_size)  # MLFlow
            mlflow.log_param("epochs", epoch)  # MLFlow

            run.log("interval", interval)  # Azure ML
            run.log("aggregation_method", aggregation_method)  # Azure ML
            run.log("batch_size", batch_size)  # Azure ML
            run.log("epochs", epoch)  # Azure ML

            print(
                f"Build model for data resampled with {interval} resolution,  method {aggregation_method}, batch size {batch_size} and number of epochs {epoch}"
            )
            resampled = read_and_resample(
                "2nd_test.hdf", interval, aggregation_method=aggregation_method
            )
            anomalies, avg_train_anomaly, predicted_data, train_until_index = build_model(
                resampled, epoch, batch_size
            )

            r2_train, expl_train, r2_test, expl_test = calc_scores(
                resampled, predicted_data, train_until_index
            )
            run.log("r2_train", r2_train)  # Azure ML
            run.log("explained_variance_train", expl_train)  # Azure ML
            run.log("r2_test", r2_test)  # Azure ML
            run.log("explained_variance_test", expl_test)  # Azure ML

            mlflow.log_metric("r2_train", r2_train)  # MLFlow
            mlflow.log_metric("explained_variance_train", expl_train)  # MLFlow
            mlflow.log_metric("r2_test", r2_test)  # MLFlow
            mlflow.log_metric("explained_variance_test", expl_test)  # MLFlow

            anomalies = anomalies.rolling(
                resamples_for_model[-1]
            ).mean()  # Use the last of the experiment resamples as the anomaly resample
            if PLOTTING:
                axarr[plotnum].plot(
                    anomalies, label=interval + "-" + aggregation_method + "-model"
                )
                axarr[plotnum].axhline(avg_train_anomaly, color="r")
                axarr[plotnum].legend(loc="upper left")
                plotnum += 1

        run.complete()  # Azure ML

    if PLOTTING:
        plt.show()
args = parser.parse_args()

output_folder_path = args.outputs_folder
expname = args.experiment_name

os.makedirs(f'{output_folder_path}/{expname}', exist_ok=True)
submissions_download_folder = f'{output_folder_path}/{expname}/submissions'
tblogs_download_folder = f'{output_folder_path}/{expname}/tb_logs'

os.makedirs(submissions_download_folder, exist_ok=True)
if args.collect_tensorboard_logs:
    os.makedirs(tblogs_download_folder, exist_ok=True)

# check workspace
ws = Workspace.from_config('aml_config/config.json')
print(f'Using Azure ML Workspace {ws.name} in location {ws.location}')

experiment = Experiment(ws, expname)

for run in experiment.get_runs():
    for file in run.get_file_names():
        if file.endswith('submission.csv'):
            print(f'Downloading {file}')
            run.download_file(file, submissions_download_folder)
        if 'tfevents' in file and args.collect_tensorboard_logs:
            _, _, folder, _ = file.split('/')
            folder_path = f'{tblogs_download_folder}/{folder}'
            os.makedirs(folder_path, exist_ok=True)
            print(f'Downloading {file}')
            run.download_file(file, folder_path)
Exemplo n.º 6
0
# get root of git repo
prefix = Path(git.Repo(".", search_parent_directories=True).working_tree_dir)

# training script
script_dir = prefix.joinpath("code", "train", "xgboost", "iris")
script_name = "train.py"

# environment file
environment_file = prefix.joinpath("environments", "xgboost.txt")

# azure ml settings
environment_name = "xgboost-iris-example"
experiment_name = "xgboost-iris-example"
compute_target = "cpu-cluster"

# create environment
env = Environment.from_pip_requirements(environment_name, environment_file)

# create job config
src = ScriptRunConfig(
    source_directory=script_dir,
    script=script_name,
    environment=env,
    compute_target=compute_target,
)

# submit job
run = Experiment(ws, experiment_name).submit(src)
print(run)
run.wait_for_completion(show_output=True)
Exemplo n.º 7
0
def main():
    logging.warning("Loading environment variables...")
    e = Env()
    e.load_environment_variables(env_file_path='local.env',
                                 fallback_to_os=True)

    # Get Azure machine learning workspace
    logging.warning(
        "Getting reference to existing Azure Machine Learning workspace...")
    auth = InteractiveLoginAuthentication(tenant_id=e.tenant_id)
    ws = get_workspace(e.workspace_name, auth, e.subscription_id,
                       e.resource_group)

    # Get compute target. It has to be a GPU compute as such unit is requested by the 'Feature Extraction Inference' step
    compute_target = get_compute_target(ws,
                                        compute_name=e.gpu_compute_name,
                                        vm_size=e.gpu_vm_size)

    # Create run configuration
    run_config = create_run_configuration(ws)

    # -------
    # Step 1
    # -------

    # Define input 'prepared datasets'
    input_prepared_datasets = []
    experiment_configuration = ExperimentConfigurationWrapper()
    experiment_configuration.load(
        os.path.join(cfg.StepsStructure.SNAPSHOT_ROOT_DIR,
                     cfg.StepsStructure.get_experiments_config_filepath())
    )  # noqa: E501
    for data_config in experiment_configuration.json['OBJECT_DETECTION'][
            'inference']['data']:
        dataset_name = data_config['input']['dataset_name']
        dataset = ws.datasets.get(dataset_name)
        input_prepared_datasets.extend([dataset.as_named_input(dataset_name)])

    # Create pipeline datastore objects to create links between steps, so they are executed in a sequence, not in parallel
    pipeline_datastore = ws.get_default_datastore()
    object_detection_inference_output = PipelineData(
        name="centers", datastore=pipeline_datastore, is_directory=True)

    step_object_detection_inference = PythonScriptStep(
        name="Object Detection - Inference",
        source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR,
        script_name=cfg.StepsStructure.ObjectDetection.
        INFERENCE_STEP_SCRIPT_PATH,
        arguments=[
            '--subscription_id', e.subscription_id, '--resource_group',
            e.resource_group, '--workspace_name', e.workspace_name,
            '--experiments_config_filepath',
            cfg.StepsStructure.get_experiments_config_filepath(),
            '--model_name', cfg.MLModelNames.OBJECT_DETECTION_MODEL,
            '--model_version',
            cfg.MLModelNames.OBJECT_DETECTION_MODEL_BEST_VERSION,
            '--output_folder', object_detection_inference_output,
            '--should_register_dataset', True
        ],
        inputs=input_prepared_datasets,
        outputs=[object_detection_inference_output],
        compute_target=compute_target,
        runconfig=run_config,
        allow_reuse=True)

    # -------
    # Step 2
    # -------

    # input should contain 'prepared datasets' and centers
    object_extraction_input = object_detection_inference_output.as_input(
        'centers')
    object_extraction_inputs = [object_extraction_input]

    object_extraction_output = PipelineData(name="cropped_objects",
                                            datastore=pipeline_datastore,
                                            is_directory=True)

    step_object_extraction = PythonScriptStep(
        name="Object Extraction",
        source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR,
        script_name=cfg.StepsStructure.ObjectExtraction.STEP_SCRIPT_PATH,
        arguments=[
            "--subscription_id",
            e.subscription_id,
            "--resource_group",
            e.resource_group,
            "--workspace_name",
            e.workspace_name,
            "--experiments_config_filepath",
            cfg.StepsStructure.get_experiments_config_filepath(),
            "--output_folder",
            object_extraction_output,
            "--should_register_dataset",
            True,
            # This flag might be handy when we really want to recreate a cropped objects dataset (e.g. changed implementation
            # of the NucleiExtractor, although there are no changes in the input datasets).
            "--force_dataset_recreation",
            True
        ],
        inputs=object_extraction_inputs,
        outputs=[object_extraction_output],
        compute_target=compute_target,
        runconfig=run_config,
        allow_reuse=True,
    )

    # -------
    # Step 3a
    # -------

    step_object_images_upload = PythonScriptStep(
        name="Cropped Object Images Upload to Blob Storage",
        source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR,
        script_name=cfg.StepsStructure.ObjectImagesUpload.STEP_SCRIPT_PATH,
        arguments=[
            # '--subscription_id', e.subscription_id,
            # '--resource_group', e.resource_group,
            # '--workspace_name', e.workspace_name,
            '--experiments_config_filepath',
            cfg.StepsStructure.get_experiments_config_filepath(),
            # '--model_name', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL,
            # '--model_version', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL_BEST_VERSION,
            # '--output_folder', feature_extraction_inference_output,
            # '--should_register_dataset', True
        ],
        inputs=[object_extraction_output.as_input('cropped_objects')],
        outputs=[],
        compute_target=compute_target,
        runconfig=run_config,
        allow_reuse=True)

    # -------
    # Step 3b
    # -------

    feature_extraction_inference_input = object_extraction_output.as_input(
        'cropped_objects')
    feature_extraction_inference_inputs = [feature_extraction_inference_input]

    feature_extraction_inference_output = PipelineData(
        name="latent_dims", datastore=pipeline_datastore, is_directory=True)

    step_feature_extraction_inference = PythonScriptStep(
        name="Feature Extraction - Inference",
        source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR,
        script_name=cfg.StepsStructure.FeatureExtraction.
        INFERENCE_STEP_SCRIPT_PATH,
        arguments=[
            '--subscription_id', e.subscription_id, '--resource_group',
            e.resource_group, '--workspace_name', e.workspace_name,
            '--experiments_config_filepath',
            cfg.StepsStructure.get_experiments_config_filepath(),
            '--model_name', cfg.MLModelNames.FEATURE_EXTRACTION_MODEL,
            '--model_version',
            cfg.MLModelNames.FEATURE_EXTRACTION_MODEL_BEST_VERSION,
            '--output_folder', feature_extraction_inference_output,
            '--should_register_dataset', True
        ],
        inputs=feature_extraction_inference_inputs,
        outputs=[feature_extraction_inference_output],
        compute_target=compute_target,
        runconfig=run_config,
        allow_reuse=True)

    # -------
    # Pipeline composition
    # -------

    pipeline_steps = [
        step_object_detection_inference, step_object_extraction,
        step_object_images_upload, step_feature_extraction_inference
    ]
    pipeline = Pipeline(workspace=ws, steps=pipeline_steps)

    # Create and submit an experiment
    logging.warning("Submitting experiment...")
    experiment = Experiment(ws, cfg.ExperimentNames.INFERENCE_REMOTE)
    experiment.submit(
        pipeline, regenerate_outputs=False)  # Allow data reuse for this run
    logging.warning('Experiment submitted!')
Exemplo n.º 8
0
    parser.add_argument("--learning-rate", type=float, default=0.001)
    parser.add_argument("--gamma", type=float, default=0.1)
    parser.add_argument("--momentum", type=float, default=0.9)
    parser.add_argument("--step-size", type=int, default=7)
    args = parser.parse_args()

    workspace = Workspace(
        subscription_id=args.subscription_id,
        resource_group=args.resource_group,
        workspace_name=args.workspace_name,
    )
    compute_target, compute_target_created = get_compute_target(
        workspace, "lowpriority")
    dataset = Dataset.get_by_name(workspace=workspace, name=args.dataset_name)
    data_directory = dataset.as_mount()
    experiment = Experiment(workspace, name=args.experiment_name)
    script_params = {
        "--action": "final_layer",
        "--epochs": args.epochs,
        "--learning-rate": args.learning_rate,
        "--gamma": args.gamma,
        "--momentum": args.momentum,
        "--step-size": args.step_size,
        "--environment": "azure",
        "--model-dir": "./outputs",
        "--data-dir": data_directory,
    }
    estimator = PyTorch(
        source_directory="hymenoptera",
        script_params=script_params,
        compute_target=compute_target,
def main():
    """
    Run the experiment for training
    """
    interactive_auth = InteractiveLoginAuthentication(
        tenant_id=os.getenv("TENANT_ID"))
    work_space = Workspace.from_config(auth=interactive_auth)

    # Set up the dataset for training
    datastore = work_space.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist"))

    # Set up the experiment for training
    experiment = Experiment(workspace=work_space, name="keras-lenet-train")
    #     azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000
    config = ScriptRunConfig(
        source_directory=".",
        script="train_keras.py",
        compute_target="cpu-cluster",
        arguments=[
            "--data_folder",
            dataset.as_named_input("input").as_mount(),
            "--log_folder",
            "./logs",
        ],
    )

    # Set up the Tensoflow/Keras environment
    environment = Environment("keras-environment")

    # environment = Environment.from_conda_specification(
    #     name='keras-environment',
    #     file_path='keras-environment.yml'
    # )
    environment.python.conda_dependencies = CondaDependencies.create(
        python_version="3.7.7",
        pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"])
    config.run_config.environment = environment

    # Run the experiment for training
    run = experiment.submit(config)
    aml_url = run.get_portal_url()
    print(
        "Submitted to an Azure Machine Learning compute cluster. Click on the link below"
    )
    print("")
    print(aml_url)

    tboard = Tensorboard([run])
    # If successful, start() returns a string with the URI of the instance.
    tboard.start(start_browser=True)
    run.wait_for_completion(show_output=True)
    # After your job completes, be sure to stop() the streaming otherwise it will continue to run.
    print("Press enter to stop")
    input()
    tboard.stop()

    # Register Model
    metrics = run.get_metrics()
    run.register_model(
        model_name="keras_mnist",
        tags={
            "data": "mnist",
            "model": "classification"
        },
        model_path="outputs/keras_lenet.h5",
        model_framework=Model.Framework.TENSORFLOW,
        model_framework_version="2.3.1",
        properties={
            "train_loss": metrics["train_loss"][-1],
            "train_accuracy": metrics["train_accuracy"][-1],
            "val_loss": metrics["val_loss"][-1],
            "val_accuracy": metrics["val_accuracy"][-1],
        },
    )
Exemplo n.º 10
0
from azureml.core.conda_dependencies import CondaDependencies

ws = Workspace.from_config()
fra_eng_ds = ws.datasets['fra-eng-translation']

environment = Environment.get(ws, "sentiment-env")

estimator = TensorFlow(
    source_directory="translator",
    entry_script="experiment.py",
    framework_version="2.1",
    environment_definition=environment,
    compute_target="local",
    #script_params={'--data-size': 3000},
    inputs=[fra_eng_ds.as_named_input('in_data')])

experiment = Experiment(workspace=ws, name="translator-fr-en")
run = experiment.submit(config=estimator)

run.wait_for_completion(show_output=True)

run.register_model(model_name='translator-fr-en',
                   model_path='outputs/',
                   description='A translation model from english to french',
                   tags={
                       'source_language': 'eng',
                       'target_language': 'fr'
                   },
                   model_framework=Model.Framework.TENSORFLOW,
                   model_framework_version='2.2.0',
                   properties={'BLEU Score': run.get_metrics()['bleu_score']})
Exemplo n.º 11
0
)
print("evaluateStep created")

evaluateStep.run_after(trainStep)
steps = [evaluateStep]

pipeline = Pipeline(workspace=ws, steps=steps)
print ("Pipeline is built")

pipeline.validate()
print("Simple validation complete")

run = Run.get_context()
experiment_name = run.experiment.name

pipeline_run = Experiment(ws, experiment_name).submit(pipeline)
print("Pipeline is submitted for execution")

pipeline_run.wait_for_completion(show_output=True, timeout_seconds=43200)

print("Downloading evaluation results...")
# access the evaluate_output
data = pipeline_run.find_step_run('evaluate')[0].get_output_data('evaluate_output')
# download the predictions to local path
data.download('.', show_progress=True)

import json
# load the eval info json
with open(os.path.join('./', data.path_on_datastore, 'eval_info.json')) as f:
    eval_info = json.load(f)
print("Printing evaluation results...")
Exemplo n.º 12
0
            best_loss = loss
            best_run_id = run
    except Exception as e:
        print("WARNING: Could get val_los for run_id", run)
        pass

print("best run", best_run_id, best_loss)

# start an Azure ML run
run = Run.get_context()
run_details = run.get_details()

experiment_name = run_details['runDefinition']['environment']['name'].split(
)[1]

exp = Experiment(ws, name=experiment_name)
best_run = Run(exp, best_run_id)

# register the model
if best_run_id:
    tags = {}
    tags['run_id'] = best_run_id
    tags['val_loss'] = metrics[best_run_id]['val_loss'][-1]
    model = best_run.register_model(model_name=experiment_name,
                                    model_path='outputs',
                                    tags=tags)

else:
    raise Exception(
        "Couldn't not find a model to register.  Probably because no run completed"
    )
Exemplo n.º 13
0
    runconfig=aml_run_config,
    allow_reuse=True
)

# Build pipeline

pipeline_steps = [
    dataprep_step,
    train_step
]

pipeline = Pipeline(workspace=workspace, steps=[pipeline_steps])

# Run pipeline

run = Experiment(workspace=workspace, name='gensim_lda-pipeline').submit(pipeline)

run.wait_for_completion(show_output=True)

# Get training step

run_train_step = [s for s in run.get_steps() if s.name == 'train.py'][0]

print(run_train_step.get_metrics())
print(run_train_step.get_file_names())

# Register model

model = run_train_step.register_model(model_name='gensim_lda', model_path='outputs')

print(model.name, model.id, model.version, sep='\t')
Exemplo n.º 14
0
# Get batch size and epochs
batch_size = args.batch_size
epochs = args.epochs

# Get the current run.
run = Run.get_context()

# Offline run. Download the sample dataset and run locally. Still push results to Azure.
if (run.id.startswith("OfflineRun")):
    print("Running in offline mode...")

    # Access workspace.
    print("Accessing workspace...")
    workspace = Workspace.from_config()
    experiment = Experiment(workspace, "training-junkyard")
    run = experiment.start_logging(outputs=None, snapshot_directory=".")

    # Get dataset.
    print("Accessing dataset...")
    if os.path.exists("dataset") == False:
        dataset_name = "anon-depthmap-npy"
        dataset = workspace.datasets[dataset_name]
        dataset.download(target_path='dataset', overwrite=False)
    dataset_path = "dataset"

# Online run. Use dataset provided by training notebook.
else:
    print("Running in online mode...")
    experiment = run.experiment
    workspace = experiment.workspace
# 06-run-pytorch-data.py
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core import Dataset
import os

if __name__ == "__main__":
    ws = Workspace.from_config()
    datastore = ws.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, 'datasets/locations'))

    experiment = Experiment(workspace=ws, name='mic-999')

    config = ScriptRunConfig(
        source_directory='./src',
        script='model.py',
        compute_target='cpu-cluster',
        arguments=['--data_path',
                   dataset.as_named_input('input').as_mount()],
    )
    # set up pytorch environment
    env = Environment.from_conda_specification(
        name='monografia-env',
        file_path='./.azureml/multiclass-image-classification.yml')
    config.run_config.environment = env

    run = experiment.submit(config)
    aml_url = run.get_portal_url()
Exemplo n.º 16
0
def main():

    run = Run.get_context()
    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        # For local development, set values in this section
        load_dotenv()
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        build_id = os.environ.get('BUILD_BUILDID')
        # run_id useful to query previous runs
        run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
        aml_workspace = Workspace.get(name=workspace_name,
                                      subscription_id=subscription_id,
                                      resource_group=resource_group)
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        ws = run.experiment.workspace
        exp = run.experiment
        run_id = 'amlcompute'

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--build_id",
        type=str,
        help="The Build ID of the build triggering this pipeline run",
    )
    parser.add_argument(
        "--run_id",
        type=str,
        help="Training run ID",
    )
    parser.add_argument(
        "--model_name",
        type=str,
        help="Name of the Model",
        default="sklearn_regression_model.pkl",
    )

    args = parser.parse_args()
    if (args.build_id is not None):
        build_id = args.build_id
    if (args.run_id is not None):
        run_id = args.run_id
    if (run_id == 'amlcompute'):
        run_id = run.parent.id
    model_name = args.model_name

    if (build_id is None):
        register_aml_model(model_name, exp, run_id)
    else:
        run.tag("BuildId", value=build_id)
        builduri_base = os.environ.get("BUILDURI_BASE")
        if (builduri_base is not None):
            build_uri = builduri_base + build_id
            run.tag("BuildUri", value=build_uri)
            register_aml_model(model_name, exp, run_id, build_id, build_uri)
        else:
            register_aml_model(model_name, exp, run_id, build_id)
# PREPARE LOGGING

logger = logging.getLogger()
logger.setLevel("INFO")
ch = logging.StreamHandler()
ch.setLevel(logging.INFO)
formatter = logging.Formatter(
    "%(asctime)s - %(name)s - %(levelname)s - %(message)s")
ch.setFormatter(formatter)
logger.addHandler(ch)

# GET WS, EXP, ENV and COMPUTE TARGET

ws = Workspace.from_config()
experiment = Experiment(ws,
                        "MaxFreezerTemperatureExceededPipeline",
                        _create_in_cloud=True)
compute_target = ComputeTarget(ws, "freezertrain")
run_config = RunConfiguration()
freezer_environment = ws.environments["sktime_freezer_environment"]
run_config.environment = freezer_environment
logger.info("Environment complete")

# PIPELINE PARAMS

output_df_long = PipelineData("output_df_long",
                              datastore=ws.get_default_datastore())
output_df_nested = PipelineData("output_df_nested",
                                datastore=ws.get_default_datastore())
time_series_length_param = PipelineParameter(name="time_series_length",
                                             default_value=10)
Exemplo n.º 18
0
def main():

    run = Run.get_context()

    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        load_dotenv()
        sources_dir = os.environ.get("SOURCES_DIR_TRAIN")
        if (sources_dir is None):
            sources_dir = 'MLOps'
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        build_id = os.environ.get('BUILD_BUILDID')
        aml_workspace = Workspace.get(name=workspace_name,
                                      subscription_id=subscription_id,
                                      resource_group=resource_group)
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        exp = run.experiment

    e = Env()

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--build_id",
        type=str,
        help="The Build ID of the build triggering this pipeline run",
    )
    parser.add_argument("--output_model_version_file",
                        type=str,
                        default="model_version.txt",
                        help="Name of a file to write model version to")

    args = parser.parse_args()
    if (args.build_id is not None):
        build_id = args.build_id
    model_name = e.model_name

    try:
        tag_name = 'BuildId'
        model = get_model(model_name=model_name,
                          tag_name=tag_name,
                          tag_value=build_id,
                          aml_workspace=exp.workspace)

        if (model is not None):
            print("Model was registered for this build.")
        if (model is None):
            print("Model was not registered for this run.")
            sys.exit(1)
    except Exception as e:
        print(e)
        print("Model was not registered for this run.")
        sys.exit(1)

    # Save the Model Version for other AzDO jobs after script is complete
    if args.output_model_version_file is not None:
        with open(args.output_model_version_file, "w") as out_file:
            out_file.write(str(model.version))
Exemplo n.º 19
0
def main():
    # Loading azure credentials
    print("::debug::Loading azure credentials")
    azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}")
    try:
        azure_credentials = json.loads(azure_credentials)
    except JSONDecodeError:
        print(
            "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS"
        )
        raise AMLConfigurationException(
            "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-workspace/blob/master/README.md"
        )

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=azure_credentials,
                  schema=azure_credentials_schema,
                  input_name="AZURE_CREDENTIALS")

    # Mask values
    print("::debug::Masking parameters")
    mask_parameter(parameter=azure_credentials.get("tenantId", ""))
    mask_parameter(parameter=azure_credentials.get("clientId", ""))
    mask_parameter(parameter=azure_credentials.get("clientSecret", ""))
    mask_parameter(parameter=azure_credentials.get("subscriptionId", ""))

    # Loading parameters file
    print("::debug::Loading parameters file")
    parameters_file = os.environ.get("INPUT_PARAMETERS_FILE",
                                     default="run.json")
    parameters_file_path = os.path.join(".cloud", ".azure", parameters_file)
    try:
        with open(parameters_file_path) as f:
            parameters = json.load(f)
    except FileNotFoundError:
        print(
            f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/run.json)."
        )
        parameters = {}

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=parameters,
                  schema=parameters_schema,
                  input_name="PARAMETERS_FILE")

    # Define target cloud
    if azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.usgovcloudapi.net"):
        cloud = "AzureUSGovernment"
    elif azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.chinacloudapi.cn"):
        cloud = "AzureChinaCloud"
    else:
        cloud = "AzureCloud"

    # Loading Workspace
    print("::debug::Loading AML Workspace")
    sp_auth = ServicePrincipalAuthentication(
        tenant_id=azure_credentials.get("tenantId", ""),
        service_principal_id=azure_credentials.get("clientId", ""),
        service_principal_password=azure_credentials.get("clientSecret", ""),
        cloud=cloud)
    config_file_path = os.environ.get("GITHUB_WORKSPACE",
                                      default=".cloud/.azure")
    config_file_name = "aml_arm_config.json"
    try:
        ws = Workspace.from_config(path=config_file_path,
                                   _file_name=config_file_name,
                                   auth=sp_auth)
    except AuthenticationException as exception:
        print(
            f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}"
        )
        raise AuthenticationException
    except AuthenticationError as exception:
        print(f"::error::Microsoft REST Authentication Error: {exception}")
        raise AuthenticationError
    except AdalError as exception:
        print(
            f"::error::Active Directory Authentication Library Error: {exception}"
        )
        raise AdalError
    except ProjectSystemException as exception:
        print(f"::error::Workspace authorizationfailed: {exception}")
        raise ProjectSystemException

    # Create experiment
    print("::debug::Creating experiment")
    try:
        # Default experiment name
        repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1]
        branch_name = os.environ.get("GITHUB_REF").split("/")[-1]
        default_experiment_name = f"{repository_name}-{branch_name}"

        experiment = Experiment(
            workspace=ws,
            name=parameters.get("experiment_name",
                                default_experiment_name)[:36])
    except TypeError as exception:
        experiment_name = parameters.get("experiment", None)
        print(
            f"::error::Could not create an experiment with the specified name {experiment_name}: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not create an experiment with the specified name {experiment_name}: {exception}"
        )
    except UserErrorException as exception:
        experiment_name = parameters.get("experiment", None)
        print(
            f"::error::Could not create an experiment with the specified name {experiment_name}: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not create an experiment with the specified name {experiment_name}: {exception}"
        )

    # Loading run config
    print("::debug::Loading run config")
    run_config = None
    if run_config is None:
        # Loading run config from runconfig yaml file
        print("::debug::Loading run config from runconfig yaml file")
        run_config = load_runconfig_yaml(runconfig_yaml_file=parameters.get(
            "runconfig_yaml_file", "code/train/run_config.yml"))
    if run_config is None:
        # Loading run config from pipeline yaml file
        print("::debug::Loading run config from pipeline yaml file")
        run_config = load_pipeline_yaml(workspace=ws,
                                        pipeline_yaml_file=parameters.get(
                                            "pipeline_yaml_file",
                                            "code/train/pipeline.yml"))
    if run_config is None:
        # Loading run config from python runconfig file
        print("::debug::Loading run config from python runconfig file")
        run_config = load_runconfig_python(
            workspace=ws,
            runconfig_python_file=parameters.get("runconfig_python_file",
                                                 "code/train/run_config.py"),
            runconfig_python_function_name=parameters.get(
                "runconfig_python_function_name", "main"))
    if run_config is None:
        # Loading values for errors
        pipeline_yaml_file = parameters.get("pipeline_yaml_file",
                                            "code/train/pipeline.yml")
        runconfig_yaml_file = parameters.get("runconfig_yaml_file",
                                             "code/train/run_config.yml")
        runconfig_python_file = parameters.get("runconfig_python_file",
                                               "code/train/run_config.py")
        runconfig_python_function_name = parameters.get(
            "runconfig_python_function_name", "main")

        print(
            f"::error::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file})."
        )
        print(
            f"::error::Error when loading pipeline yaml definition your repository (Path: /{pipeline_yaml_file})."
        )
        print(
            f"::error::Error when loading python script or function in your repository which defines the experiment config (Script path: '/{runconfig_python_file}', Function: '{runconfig_python_function_name}()')."
        )
        print(
            "::error::You have to provide either a yaml definition for your run, a yaml definition of your pipeline or a python script, which returns a runconfig (Pipeline, ScriptRunConfig, AutoMlConfig, Estimator, etc.). Please read the documentation for more details."
        )
        raise AMLExperimentConfigurationException(
            "You have to provide a yaml definition for your run, a yaml definition of your pipeline or a python script, which returns a runconfig. Please read the documentation for more details."
        )

    # Submit run config
    print("::debug::Submitting experiment config")
    try:
        # Defining default tags
        print("::debug::Defining default tags")
        default_tags = {
            "GITHUB_ACTOR": os.environ.get("GITHUB_ACTOR"),
            "GITHUB_REPOSITORY": os.environ.get("GITHUB_REPOSITORY"),
            "GITHUB_SHA": os.environ.get("GITHUB_SHA"),
            "GITHUB_REF": os.environ.get("GITHUB_REF")
        }

        run = experiment.submit(config=run_config,
                                tags=dict(parameters.get("tags", {}),
                                          **default_tags))
    except AzureMLException as exception:
        print(
            f"::error::Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )
    except TypeError as exception:
        print(
            f"::error::Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )

    # Create outputs
    print("::debug::Creating outputs")
    print(f"::set-output name=experiment_name::{run.experiment.name}")
    print(f"::set-output name=run_id::{run.id}")
    print(f"::set-output name=run_url::{run.get_portal_url()}")

    # Waiting for run to complete
    print("::debug::Waiting for run to complete")
    if parameters.get("wait_for_completion", True):
        run.wait_for_completion(show_output=True)

        # Creating additional outputs of finished run
        run_metrics = run.get_metrics(recursive=True)
        print(f"::set-output name=run_metrics::{run_metrics}")
        run_metrics_markdown = convert_to_markdown(run_metrics)
        print(
            f"::set-output name=run_metrics_markdown::{run_metrics_markdown}")

        # Download artifacts if enabled
        if parameters.get("download_artifacts", False):
            # Defining artifacts folder
            print("::debug::Defining artifacts folder")
            root_path = os.environ.get("GITHUB_WORKSPACE", default=None)
            folder_name = f"aml_artifacts_{run.id}"
            artifact_path = os.path.join(root_path, folder_name)

            # Downloading artifacts
            print("::debug::Downloading artifacts")
            run.download_files(
                output_directory=os.path.join(artifact_path, "parent"))
            children = run.get_children(recursive=True)
            for i, child in enumerate(children):
                child.download_files(
                    output_directory=os.path.join(artifact_path, f"child_{i}"))

            # Creating additional outputs
            print(f"::set-output name=artifact_path::{artifact_path}")

    # Publishing pipeline
    print("::debug::Publishing pipeline")
    if type(run) is PipelineRun and parameters.get("publish_pipeline", False):
        # Default pipeline name
        repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1]
        branch_name = os.environ.get("GITHUB_REF").split("/")[-1]
        default_pipeline_name = f"{repository_name}-{branch_name}"

        published_pipeline = run.publish_pipeline(
            name=parameters.get("pipeline_name", default_pipeline_name),
            description="Pipeline registered by GitHub Run Action",
            version=parameters.get("pipeline_version", None),
            continue_on_step_failure=parameters.get(
                "pipeline_continue_on_step_failure", False))

        # Creating additional outputs
        print(
            f"::set-output name=published_pipeline_id::{published_pipeline.id}"
        )
        print(
            f"::set-output name=published_pipeline_status::{published_pipeline.status}"
        )
        print(
            f"::set-output name=published_pipeline_endpoint::{published_pipeline.endpoint}"
        )
    elif parameters.get("publish_pipeline", False):
        print(
            "::error::Could not register pipeline because you did not pass a pipeline to the action"
        )

    print("::debug::Successfully finished Azure Machine Learning Train Action")
Exemplo n.º 20
0
run_details["experiment_name"] = os.environ.get("EXPERIMENT_NAME",
                                                default=None)

# Get workspace
print("Loading Workspace")
cli_auth = AzureCliAuthentication()
config_file_path = os.environ.get("GITHUB_WORKSPACE", default="aml_service")
config_file_name = "aml_arm_config.json"
ws = Workspace.from_config(path=config_file_path,
                           auth=cli_auth,
                           _file_name=config_file_name)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

# Loading Run
print("Loading Run")
experiment = Experiment(workspace=ws, name=run_details["experiment_name"])
run = Run(experiment=experiment, run_id=run_details["run_id"])

# Only register model, if it performs better than the production model
print("Register model only if it performs better.")
try:
    # Loading run of production model
    print("Loading Run of Production Model to evaluate new model")
    production_model = Model(workspace=ws,
                             name=deployment_settings["model"]["name"])
    production_model_run_id = production_model.tags.get(["run_id"])
    production_model_run = Run(experiment=experiment,
                               run_id=production_model_run_id)

    # Comparing models
    print("Comparing Metrics of production and newly trained model")
Exemplo n.º 21
0
#conda activate py36



import sys
from azureml.core import VERSION


print("python version: " , sys.version)
print("azureml version: ", VERSION)

# enable logging 
# https://docs.microsoft.com/en-us/azure/machine-learning/service/how-to-enable-logging 


from azureml.core import Workspace, Experiment, Run

Workspace.create( )

exp = Experiment(workspace=, name='test_experiment')
run = exp.start_logging()
run.log("test-val", 10)
Exemplo n.º 22
0
def get_automl_environment(workspace: Workspace, training_pipeline_run_id: str, training_experiment_name: str):
    from azureml.core import Experiment, Run
    experiment = Experiment(workspace, training_experiment_name)
    run = Run(experiment, training_pipeline_run_id)
    step_run = list(run.get_children())[0]
    return step_run.get_environment()
Exemplo n.º 23
0
def main():

    run = Run.get_context()
    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        # For local development, set values in this section
        load_dotenv()
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        # run_id useful to query previous runs
        run_id = "bd184a18-2ac8-4951-8e78-e290bef3b012"
        aml_workspace = Workspace.get(name=workspace_name,
                                      subscription_id=subscription_id,
                                      resource_group=resource_group)
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        ws = run.experiment.workspace
        exp = run.experiment
        run_id = 'amlcompute'

    parser = argparse.ArgumentParser("register")

    parser.add_argument(
        "--run_id",
        type=str,
        help="Training run ID",
    )

    parser.add_argument(
        "--model_name",
        type=str,
        help="Name of the Model",
        default="CMPE258United_model.pkl",
    )

    parser.add_argument("--step_input",
                        type=str,
                        help=("input from previous steps"))

    args = parser.parse_args()
    if (args.run_id is not None):
        run_id = args.run_id
    if (run_id == 'amlcompute'):
        run_id = run.parent.id
    model_name = args.model_name
    model_path = args.step_input

    print("Getting registration parameters")

    # Load the registration parameters from the parameters file
    with open("parameters.json") as f:
        pars = json.load(f)
    try:
        register_args = pars["registration"]
    except KeyError:
        print("Could not load registration values from file")
        register_args = {"tags": []}

    model_tags = {}
    for tag in register_args["tags"]:
        try:
            mtag = run.parent.get_metrics()[tag]
            model_tags[tag] = mtag
        except KeyError:
            print(f"Could not find {tag} metric on parent run.")

    # load the model
    print("Loading model from " + model_path)
    model_file = os.path.join(model_path, model_name)
    model = joblib.load(model_file)
    parent_tags = run.parent.get_tags()
    try:
        build_id = parent_tags["BuildId"]
    except KeyError:
        build_id = None
        print("BuildId tag not found on parent run.")
        print(f"Tags present: {parent_tags}")
    try:
        build_uri = parent_tags["BuildUri"]
    except KeyError:
        build_uri = None
        print("BuildUri tag not found on parent run.")
        print(f"Tags present: {parent_tags}")

    if (model is not None):
        dataset_id = parent_tags["dataset_id"]
        if (build_id is None):
            register_aml_model(model_file, model_name, model_tags, exp, run_id,
                               dataset_id)
        elif (build_uri is None):
            register_aml_model(model_file, model_name, model_tags, exp, run_id,
                               dataset_id, build_id)
        else:
            register_aml_model(model_file, model_name, model_tags, exp, run_id,
                               dataset_id, build_id, build_uri)
    else:
        print("Model not found. Skipping model registration.")
        sys.exit(0)
Exemplo n.º 24
0
    filepath = "environments/data_validation_subset/RunConfig/runconfig_data_validation.yml"
    input_name_train = 'newsgroups_raw_subset_train'
    input_name_test = 'newsgroups_raw_subset_test'

dataset_train = Dataset.get_by_name(workspace, name=input_name_train)
dataset_test = Dataset.get_by_name(workspace, name=input_name_test)

# Load run Config file for data prep
run_config = RunConfiguration.load(path=os.path.join(
    os.path.join(
        os.path.dirname(os.path.realpath(__file__)),
        "../..",
        filepath,
    )),
                                   name="datavalidation")

est = ScriptRunConfig(
    source_directory=os.path.dirname(os.path.realpath(__file__)),
    run_config=run_config,
    arguments=[
        '--data_folder_train',
        dataset_train.as_named_input('train').as_mount(), '--data_folder_test',
        dataset_test.as_named_input('test').as_mount(), '--local', 'no'
    ],
)

# Define the ML experiment
experiment = Experiment(workspace, "data-validation")
# Submit experiment run, if compute is idle, this may take some time')
run = experiment.submit(est)
Exemplo n.º 25
0
cli_auth = AzureCliAuthentication()

# Get workspace
ws = Workspace.from_config(auth=cli_auth)

# Paramaterize the matrics on which the models should be compared

# Add golden data set on which all the model performance can be evaluated

# Get the latest run_id
with open("aml_config/run_id.json") as f:
    config = json.load(f)

new_model_run_id = config["run_id"]
experiment_name = config["experiment_name"]
exp = Experiment(workspace=ws, name=experiment_name)

try:
    # Get most recently registered model, we assume that is the model in production. Download this model and compare it with the recently trained model by running test with same data set.
    model_list = Model.list(ws)
    production_model = next(
        filter(
            lambda x: x.created_time == max(model.created_time
                                            for model in model_list),
            model_list,
        ))
    production_model_run_id = production_model.tags.get("run_id")
    run_list = exp.get_runs()
    # production_model_run = next(filter(lambda x: x.id == production_model_run_id, run_list))

    # Get the run history for both production model and newly trained model and compare mse
# Step 3: Train Model
train_step, train_outputs = train_step(data_preprocess_outputs['train_dir'],
                                       cpu_compute_target)

# Step 4: Evaluate Model
evaluate_step, evaluate_outputs = evaluate_step(
    train_outputs['model_dir'], data_preprocess_outputs['test_dir'],
    cpu_compute_target)

# Step 5: Deploy Model
deploy_step, deploy_outputs = deploy_step(train_outputs['model_dir'],
                                          evaluate_outputs['accuracy_file'],
                                          data_preprocess_outputs['test_dir'],
                                          cpu_compute_target)

# Submit pipeline
print('Submitting pipeline ...')
pipeline_parameters = {'max_depth': 5, 'n_estimators': 500}

# Submit pipeline
print('Submitting pipeline ...')

pipeline = Pipeline(workspace=ws,
                    steps=[
                        data_ingestion_step, data_preprocess_step, train_step,
                        evaluate_step, deploy_step
                    ])
pipeline_run = Experiment(ws, 'turbofan-pipeline').submit(
    pipeline, pipeline_parameters=pipeline_parameters)
def main():
    e = Env()

    print('********************')
    print(e.source_directory)

    files = os.listdir('./aml_pipeline')
    for f in files:
        print(f)

    print('***************')

    workspace_name = e.workspace_name
    subscription_id = e.subscription_id
    resource_group = e.resource_group

    #Connect to AML Workspace
    print('workspace_name = ' + workspace_name)
    print('subscription_id = ' + subscription_id)
    print('resource_group = ' + resource_group)

    ws = Workspace.get(
        name=workspace_name,
        subscription_id=subscription_id,
        resource_group=resource_group,
    )

    print('Ready to use Azure ML {} to work with {}'.format(
        azureml.core.VERSION, ws.name))

    default_ds = ws.get_default_datastore()

    if 'diabetes dataset' not in ws.datasets:
        default_ds.upload_files(
            files=['diabetes.csv',
                   'diabetes2.csv'],  # Upload the diabetes csv files in /data
            target_path=
            'diabetes-data/',  # Put it in a folder path in the datastore
            overwrite=True,  # Replace existing files of the same name
            show_progress=True)

        #Create a tabular dataset from the path on the datastore (this may take a short while)
        tab_data_set = Dataset.Tabular.from_delimited_files(
            path=(default_ds, 'diabetes-data/*.csv'))

        # Register the tabular dataset
        try:
            tab_data_set = tab_data_set.register(workspace=ws,
                                                 name='diabetes dataset',
                                                 description='diabetes data',
                                                 tags={'format': 'CSV'},
                                                 create_new_version=True)
            print('Dataset registered.')
        except Exception as ex:
            print(ex)
    else:
        print('Dataset already registered.')

    # Create a folder for the pipeline step files
    experiment_folder = 'diabetes_pipeline'
    os.makedirs(experiment_folder, exist_ok=True)

    print(experiment_folder)

    cluster_name = "mmcomputecluster"

    try:
        # Check for existing compute target
        pipeline_cluster = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing cluster, use it.')
    except ComputeTargetException:
        # If it doesn't already exist, create it
        try:
            compute_config = AmlCompute.provisioning_configuration(
                vm_size='STANDARD_DS11_V2', max_nodes=2)
            pipeline_cluster = ComputeTarget.create(ws, cluster_name,
                                                    compute_config)
            pipeline_cluster.wait_for_completion(show_output=True)
        except Exception as ex:
            print(ex)

    # Create a Python environment for the experiment
    diabetes_env = Environment("diabetes-pipeline-env")
    diabetes_env.python.user_managed_dependencies = False  # Let Azure ML manage dependencies
    diabetes_env.docker.enabled = True  # Use a docker container

    # Create a set of package dependencies
    diabetes_packages = CondaDependencies.create(
        conda_packages=[
            'scikit-learn', 'ipykernel', 'matplotlib', 'pandas', 'pip'
        ],
        pip_packages=[
            'azureml-defaults', 'azureml-dataprep[pandas]', 'pyarrow'
        ])

    # Add the dependencies to the environment
    diabetes_env.python.conda_dependencies = diabetes_packages

    # Register the environment
    diabetes_env.register(workspace=ws)
    registered_env = Environment.get(ws, 'diabetes-pipeline-env')

    # Create a new runconfig object for the pipeline
    pipeline_run_config = RunConfiguration()

    # Use the compute you created above.
    pipeline_run_config.target = pipeline_cluster

    # Assign the environment to the run configuration
    pipeline_run_config.environment = registered_env

    print("Run configuration created.")

    # Get the training dataset
    diabetes_ds = ws.datasets.get("diabetes dataset")

    # Create a PipelineData (temporary Data Reference) for the model folder
    prepped_data_folder = PipelineData("prepped_data_folder",
                                       datastore=ws.get_default_datastore())

    # Step 1, Run the data prep script
    prep_step = PythonScriptStep(name="Prepare Data",
                                 script_name="prep_diabetes.py",
                                 source_directory='./aml_pipeline',
                                 arguments=[
                                     '--input-data',
                                     diabetes_ds.as_named_input('raw_data'),
                                     '--prepped-data', prepped_data_folder
                                 ],
                                 outputs=[prepped_data_folder],
                                 compute_target=pipeline_cluster,
                                 runconfig=pipeline_run_config,
                                 allow_reuse=True)

    # Step 2, run the training script
    train_step = PythonScriptStep(
        name="Train and Register Model",
        source_directory='./aml_pipeline',
        script_name="train_diabetes.py",
        arguments=['--training-folder', prepped_data_folder],
        inputs=[prepped_data_folder],
        compute_target=pipeline_cluster,
        runconfig=pipeline_run_config,
        allow_reuse=True)

    print("Pipeline steps defined")

    pipeline_steps = [prep_step, train_step]
    pipeline = Pipeline(workspace=ws, steps=pipeline_steps)
    print("Pipeline is built.")

    # Create an experiment and run the pipeline
    experiment = Experiment(workspace=ws, name='jlg-exp')
    pipeline_run = experiment.submit(pipeline, regenerate_outputs=True)
    print("Pipeline submitted for execution.")
    pipeline_run.wait_for_completion(show_output=True)

    for run in pipeline_run.get_children():
        print(run.name, ':')
        metrics = run.get_metrics()
        for metric_name in metrics:
            print('\t', metric_name, ":", metrics[metric_name])

    for model in Model.list(ws):
        print(model.name, 'version:', model.version)
        for tag_name in model.tags:
            tag = model.tags[tag_name]
            print('\t', tag_name, ':', tag)
        for prop_name in model.properties:
            prop = model.properties[prop_name]
            print('\t', prop_name, ':', prop)
        print('\n')

    # Publish the pipeline from the run
    published_pipeline = pipeline_run.publish_pipeline(
        name="diabetes-training-pipeline",
        description="Trains diabetes model",
        version="1.0")

    published_pipeline

    rest_endpoint = published_pipeline.endpoint
    print(rest_endpoint)
Exemplo n.º 28
0
except ComputeTargetException:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                                max_nodes=5,
                                                                idle_seconds_before_scaledown=1800)

    # create the cluster
    gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config)

    # can poll for a minimum number of nodes and for a specific timeout. 
    # if no min node count is provided it uses the scale settings for the cluster
    gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

est = TensorFlow(source_directory=script_folder,
                compute_target=gpu_compute_target,
                pip_packages=['keras==2.0.8', 'theano', 'tensorflow==1.8.0', 'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod', 'hickle'],
                entry_script='train.py', 
                use_gpu=True,
                node_count=1,
                script_params={"--remote_execution": None, "--data-folder": config["data_folder"]}
                )
                
experiment_name = "prednet_train"

exp = Experiment(ws, experiment_name)

run = exp.submit(est)

run.wait_for_completion(show_output=True)

print("done")
if args.local_run:
    #from subprocess import call, run
    import sys
    import subprocess
    env = os.environ.copy()
    if args.process_count == 1:
        env['CUDA_VISIBLE_DEVICES'] = '0'
        cmd_arry = [sys.executable, model_run_scripts[0]] + model_run_args_config
    else:
        cmd_arry = [sys.executable, '-m', 'torch.distributed.launch', '--nproc_per_node', args.process_count, model_run_scripts[0]] + model_run_args_config
    cmd_arry = [str(s) for s in cmd_arry]
    cmd = ' '.join(cmd_arry)
    subprocess.run(cmd_arry, env=env)
else:
    # Create experiment for model
    model_experiment = Experiment(ws, name=model_experiment_name)
    distr_config = PyTorchConfiguration(process_count=args.process_count, node_count=args.node_count)
    # create script run config for the model+config
    model_run_config = ScriptRunConfig(source_directory='.',
        script=model_run_scripts[0],
        arguments=model_run_args_config,
        compute_target=gpu_compute_target,
        environment=hf_ort_env,
        distributed_job_config=distr_config)
    
    print(f"Submitting run for model: {args.hf_model}, config: {args.run_config}")
    run = model_experiment.submit(model_run_config)
    cuda_version = "10.2" if args.use_cu102 else "11.1"
    run.set_tags({'model' : args.hf_model, 'config' : args.run_config, 'bs' : model_batchsize, 'gpus' : str(args.process_count), 'cuda': cuda_version})
    print(f"Job submitted to {run.get_portal_url()}")
# Control script for a training run
from azureml.core import Workspace
from azureml.core import Experiment
from azureml.core import Environment
from azureml.core import ScriptRunConfig
from azureml.core import Dataset

if __name__ == "__main__":
    ws = Workspace.from_config()
    dataset = Dataset.get_by_name(workspace=ws, name='bananas_dataset')

    experiment = Experiment(workspace=ws, name='bananas-experiment')

    config = ScriptRunConfig(
        source_directory='.',
        script='train.py',
        compute_target='gpu1',
        arguments=[
            '--data-path', dataset.as_named_input('input').as_mount(),
            '--output-path', './outputs',
            '--epochs', 3,
            '--batch-size', 2,
            '--learning-rate', 0.001,
            '--scale', 0.5,
            '--to-bgr'
        ],
    )
    # set up the training environment
    env = Environment.from_conda_specification(
        name='train-env',
        file_path='./train-env.yml'