def main():
    logging.warning("Loading environment variables...")
    e = Env()
    e.load_environment_variables(env_file_path="local.env")

    # Get Azure machine learning workspace
    logging.warning("Getting reference to existing Azure Machine Learning workspace...")
    auth = InteractiveLoginAuthentication(tenant_id=e.tenant_id)
    ws = get_workspace(e.workspace_name, auth, e.subscription_id, e.resource_group)

    # Get compute target.
    # The compute target is explicitely specified here to mitigate risk of choosing an incorrect machine, that would execute
    # heavy load experiments by triggering pipeline via REST API.
    compute_target = get_compute_target(ws, compute_name='cpu-high-load', vm_size='STANDARD_F64S_V2')

    # Create pipeline datastore
    pipeline_datastore = ws.get_default_datastore()
    step_output = PipelineData(
        name="step_output_data",
        datastore=pipeline_datastore,
        is_directory=True,
    )

    # Create run configuration
    run_config = create_run_configuration(ws)

    latent_dim_dataset_name_param = PipelineParameter(name="latent_dim_dataset_name", default_value='')
    channel_names_param = PipelineParameter(name="channels_names", default_value='')
    hyperparameters_param = PipelineParameter(name="hyperparameters", default_value='{}')

    # Define step
    step = PythonScriptStep(
        name=cfg.ExperimentNames.DIM_REDUCTION_REMOTE,
        source_directory=cfg.StepsStructure.SNAPSHOT_ROOT_DIR,
        script_name=cfg.StepsStructure.DimReduction.STEP_SCRIPT_PATH,
        arguments=[
            '--latent_dim_dataset_name', latent_dim_dataset_name_param,
            '--hyperparameters', hyperparameters_param,
            '--channels_names', channel_names_param,
            '--experiments_config_filepath', cfg.StepsStructure.get_experiments_config_filepath(),
            '--output_folder', step_output
        ],
        inputs=[],
        outputs=[step_output],
        compute_target=compute_target,
        runconfig=run_config,
        allow_reuse=False
    )

    pipeline_steps = [step]
    pipeline = Pipeline(workspace=ws, steps=pipeline_steps)

    # Create and submit an experiment
    logging.warning("Submitting experiment...(v0003)")
    experiment = Experiment(ws, cfg.ExperimentNames.DIM_REDUCTION_REMOTE)
    experiment.submit(
        pipeline,
        pipeline_parameters={
            "latent_dim_dataset_name": 'dataset_001',
            # TODO: default channel names should be taken from the config.json file.
            "channels_names": "a,b,c,d",
            # TODO: default hyperparameters should be taken from the config.json file.
            "hyperparameters": "{ 'a': 10, 'b': 0.0 }"
        },
        regenerate_outputs=False)  # Allow data reuse for this run
    print_green('Experiment submitted!')
Exemplo n.º 2
0
    def __init__(self, file_name):
        if not os.path.exists(file_name):
            raise ValueError(
                'Cannot find configuration file "{0}"'.format(file_name))

        with open(file_name, 'r') as f:
            conf = json.load(f)

        try:
            # region AMLConfig Section
            amlconf = conf['AMLConfig']
            resource_group = self._encode(amlconf['resource_group'])
            location = self._encode(amlconf['location'])
            workspace = self._encode(amlconf['workspace'])
            experimentation = self._encode(amlconf['experimentation'])
            self.AMLConfig = self.__AMLConfig(resource_group=resource_group,
                                              location=location,
                                              workspace=workspace,
                                              experimentation=experimentation)
            # endregion

            # region Credentials Section
            creds = conf['Credentials']
            self.cred_type = creds['type']
            self.subscription_id = creds['subscription_id']
            if self.cred_type == 'sp_credentials' and 'sp_credentials' in creds:
                spcreds = creds['sp_credentials']
                aad_client_id = spcreds['aad_client_id']
                aad_secret_key = spcreds['aad_secret']
                aad_tenant = spcreds['aad_tenant']
                self.Credentials = ServicePrincipalAuthentication(
                    tenant_id=aad_tenant,
                    username=aad_client_id,
                    password=aad_secret_key)
            elif self.cred_type == 'userpass_credentials':
                self.Credentials = InteractiveLoginAuthentication(force=False)
            # endregion

            # region ClusterProperties Section
            clusterProperties = conf['ClusterProperties']
            vmPriority = clusterProperties['vm_priority']
            vmSize = clusterProperties['vm_size']
            cluster_name = self._encode(clusterProperties['cluster_name'])
            scaling_method = clusterProperties['scaling']['scaling_method']
            scaling = clusterProperties['scaling'][scaling_method]
            if scaling_method == "manual":
                minimumNodeCount = scaling['target_node_count']
                maximumNodeCount = minimumNodeCount
            elif scaling_method == "auto_scale":
                minimumNodeCount = scaling['minimum_node_count']
                maximumNodeCount = scaling['maximum_node_count']
            else:
                raise (
                    "Parsing error, scaling undefined - needs to be manual or auto_scale"
                )
            self.ClusterProperties = self.__AMLClusterProperties(
                vm_size=vmSize,
                vm_priority=vmPriority,
                scaling_method=scaling_method,
                minimumNodeCount=minimumNodeCount,
                maximumNodeCount=maximumNodeCount,
                cluster_name=cluster_name)

            # endregion

            # region JobProperties Section
            jobProperties = conf['JobProperties']
            jobNamePrefix = str(jobProperties['jobNamePrefix'])
            jobNodeCount = int(jobProperties['nodeCount'])
            jobProcessCount = int(jobProperties['processCount'])
            jobEstimator = jobProperties['estimator']
            jobEstimatorType = jobEstimator['estimatorType']
            jobScript = jobEstimator['script']
            jobScriptPath = jobEstimator['scriptPath']
            jobScriptArgs = jobEstimator['scriptArgsDict']
            jobDistributedBackEnd = jobEstimator['distributedBackEnd']
            jobPipPackages = jobEstimator['pipPackages']
            # Create the estimator based on the type (they might be tensorflow, pytorch or base)
            self.JobProperties = self.__AMLJobProperties(
                jobNamePrefix=jobNamePrefix,
                jobEstimatorType=jobEstimatorType,
                jobNodeCount=jobNodeCount,
                jobProcessCount=jobProcessCount,
                jobScriptPath=jobScriptPath,
                jobScript=jobScript,
                jobScriptArgs=jobScriptArgs,
                jobDistributedBackEnd=jobDistributedBackEnd,
                jobPipPackages=jobPipPackages)
            # endregion

            # region DataReference Section
            dataReference = conf['DataReferences']
            # Loop through list of Files Directories
            localDirectoryBlobList = []
            try:
                for ref in dataReference['localDirectoryBlob']:
                    localDirectoryBlobList.append(
                        self.__AMLBlobDataRef(
                            dataref_id=ref['dataref_id'],
                            localDirectoryName=ref['localDirectoryName'],
                            remoteMountPath=ref['remoteMountPath'],
                            downloadToComputeNodeBeforeExecution=ref[
                                'downloadToComputeNodeBeforeExecution'].upper(
                                ) == "TRUE",
                            remoteBlobContainer=ref['remoteBlobContainer'],
                            uploadContentBeforeRun=ref[
                                'uploadContentBeforeRun'].upper() == "TRUE",
                            overwriteOnUpload=ref['overwriteOnUpload'].upper()
                            == "TRUE",
                            downloadContentAfterRun=ref[
                                'downloadContentAfterRun'].upper() == "TRUE",
                            storageAccountName=ref['storageAccountName'],
                            storageAccountKey=ref['storageAccountKey']))
            except KeyError as err:
                # Key not present in json config
                pass
            localDirectoryFilesList = []
            try:
                for ref in dataReference['localDirectoryFiles']:
                    localDirectoryFilesList.append(
                        self.__AMLFilesDataRef(
                            dataref_id=ref['dataref_id'],
                            localDirectoryName=ref['localDirectoryName'],
                            remoteMountPath=ref['remoteMountPath'],
                            downloadToComputeNodeBeforeExecution=ref[
                                'downloadToComputeNodeBeforeExecution'].upper(
                                ) == "TRUE",
                            remoteFileShare=ref['remoteFileShare'],
                            uploadContentBeforeRun=ref[
                                'uploadContentBeforeRun'].upper() == "TRUE",
                            overwriteOnUpload=ref['overwriteOnUpload'].upper()
                            == "TRUE",
                            downloadContentAfterRun=ref[
                                'downloadContentAfterRun'].upper() == "TRUE",
                            storageAccountName=ref['storageAccountName'],
                            storageAccountKey=ref['storageAccountKey']))
            except KeyError as err:
                # Key not present in json config
                pass
            self.DataReference = self.__DataReference(
                localDirectoryBlobList=localDirectoryBlobList,
                localDirectoryFilesList=localDirectoryFilesList)
            # endregion

        except KeyError as err:
            raise AttributeError(
                'Please provide a value for "{0}" configuration key'.format(
                    err.args[0]))
        except Exception as err:
            raise ('Error in config parsing : ' + str(err))
Exemplo n.º 3
0
def get_mlflow_client(
    workspace_kwargs: dict = {}, service_principal_kwargs: dict = {}
) -> MlflowClient:
    """
    Set remote tracking URI for mlflow to AzureML workspace

    Parameters
    ----------
    workspace_kwargs: dict
        AzureML Workspace configuration to use for remote MLFlow tracking. An
        empty dict will result in local logging by the MlflowClient.
        Example::

            `{
                 "subscription_id":<value>,
                 "resource_group":<value>,
                 "workspace_name":<value>
             }`
    service_principal_kwargs: dict
        AzureML ServicePrincipalAuthentication keyword arguments. An empty dict
        will result in interactive authentication.
        Example::

            `{
                 "tenant_id":<value>,
                 "service_principal_id":<value>,
                 "service_principal_password":<value>
             }`

    Returns
    -------
    client: mlflow.tracking.MlflowClient
        Client with tracking uri set to AzureML if configured.
    """
    logger.info("Creating MLflow tracking client.")

    tracking_uri = None

    # Get AzureML tracking_uri if using Azure as backend
    if workspace_kwargs:
        required_keys = ["subscription_id", "resource_group", "workspace_name"]
        _validate_dict(workspace_kwargs, required_keys)

        msg = "Configuring AzureML backend with {auth_type} authentication."
        if service_principal_kwargs:
            required_keys = [
                "tenant_id",
                "service_principal_id",
                "service_principal_password",
            ]
            _validate_dict(service_principal_kwargs, required_keys)

            logger.info(msg.format(auth_type="ServicePrincipalAuthentication"))
            workspace_kwargs["auth"] = ServicePrincipalAuthentication(
                **service_principal_kwargs
            )
        else:
            logger.info(msg.format(auth_type="interactive"))
            workspace_kwargs["auth"] = InteractiveLoginAuthentication(force=True)

        tracking_uri = Workspace(**workspace_kwargs).get_mlflow_tracking_uri()

    mlflow.set_tracking_uri(tracking_uri)

    return MlflowClient(tracking_uri)
print("SDK version:", azureml.core.VERSION)

# COMMAND ----------

subscription_id = "31a7429d-17c4-4831-9ab1-6f817b82e456"
resource_group = "StatkraftDemo"
workspace_name = "statkraftamlte8756027149"
workspace_region = "westus2"

# COMMAND ----------

# import the Workspace class and check the azureml SDK version
# exist_ok checks if workspace exists or not.
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core.workspace import Workspace
my_auth = InteractiveLoginAuthentication(tenant_id="72f988bf-86f1-41af-91ab-2d7cd011db47")
#Workspace.create(name=<workspace_name>, auth=my_auth, subscription_id=< subscription_id >, resource_group=< resource_group >)

ws = Workspace.create(name = workspace_name,
                     auth=my_auth,
                      subscription_id = subscription_id,
                      resource_group = resource_group, 
                      location = workspace_region,
                      exist_ok=True)

ws.get_details()

# COMMAND ----------

ws = Workspace(workspace_name = workspace_name,
               subscription_id = subscription_id,
Exemplo n.º 5
0

#publish pipeline

published_pipeline = pipeline_run.publish_pipeline(
    name='Diabetes_Parallel_Batch_Pipeline', description='Batch scoring of diabetes data', version='1.0')

published_pipeline

rest_endpoint = published_pipeline.endpoint
print(rest_endpoint)


from azureml.core.authentication import InteractiveLoginAuthentication

interactive_auth = InteractiveLoginAuthentication()
auth_header = interactive_auth.get_authentication_header()
print('Authentication header ready.')


import requests

rest_endpoint = published_pipeline.endpoint
response = requests.post(rest_endpoint, 
                         headers=auth_header, 
                         json={"ExperimentName": "Batch_Pipeline_via_REST"})
run_id = response.json()["Id"]
run_id


from azureml.pipeline.core.run import PipelineRun
Exemplo n.º 6
0
def main():
    e = Env()
    
    from azureml.core.authentication import InteractiveLoginAuthentication

    myten=os.environ.get("AZURE_TENANT_ID")
    interactive_auth = InteractiveLoginAuthentication(tenant_id=os.environ.get("AZURE_TENANT_ID"))
    subscription=os.environ.get("CSUBSCRIPTION")
    workspace_name=e.workspace_name
    resource_group=e.resource_group

    aml_workspace = Workspace.get(
        name = workspace_name,
        subscription_id = subscription,
        resource_group=resource_group,
        auth=interactive_auth
    )

    from ml_service.util.attach_compute import get_compute

    # Get Azure machine learning cluster
    # If not present then get_compute will create a compute based on environment variables

    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    print("SDK version: ", azureml.core.VERSION)

    ## Variable names that can be passed in as parameter values
    from azureml.pipeline.core.graph import PipelineParameter
    from azureml.core import Datastore

    model_name_param = PipelineParameter(
        name="model_name", default_value=e.model_name)
    dataset_version_param = PipelineParameter(
        name="dataset_version", default_value=e.dataset_version)
    data_file_path_param = PipelineParameter(
        name="data_file_path", default_value="none")
    caller_run_id_param = PipelineParameter(
        name="caller_run_id", default_value="none")
    #model_path = PipelineParameter(
    #    name="model_path", default_value=e.model_path)    

    if (e.datastore_name):
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name

    # Get the datastore whether it is the default or named store
    datastore = Datastore.get(aml_workspace, datastore_name)
    dataset_name = e.dataset_name

    # Create a reusable Azure ML environment
    from ml_service.util.manage_environment import get_environment
    from azureml.core import Environment

    # RUN Configuration
    ## Must have this process to work with AzureML-SDK 1.0.85
    from azureml.core.runconfig import RunConfiguration, DEFAULT_CPU_IMAGE
    from azureml.core.conda_dependencies import CondaDependencies

    try:
        app_env=Environment(name="smartschedule_env")
        app_env.register(workspace=aml_workspace)
    except:
        print("Environment not found")
    
    # Create a new runconfig object
    aml_run_config = RunConfiguration()

    aml_run_config.environment.environment_variables["DATASTORE_NAME"] = e.datastore_name  # NOQA: E501

    # Use the aml_compute you created above. 
    aml_run_config.target = aml_compute

    # Enable Docker
    aml_run_config.environment.docker.enabled = True

    # Set Docker base image to the default CPU-based image
    aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    #aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1"

    # Use conda_dependencies.yml to create a conda environment in the Docker image for execution
    aml_run_config.environment.python.user_managed_dependencies = False

    app_conda_deps=CondaDependencies.create(
        conda_packages=['pandas','scikit-learn', 'libgcc','pyodbc', 'sqlalchemy', 'py-xgboost==0.90'], 
        pip_packages=['azureml-sdk[automl,explain,contrib,interpret]==1.4.0', 'xgboost==0.90', 'azureml-dataprep==1.4.6', 'pyarrow', 'azureml-defaults==1.4.0', 'azureml-train-automl-runtime==1.4.0'], pin_sdk_version=False)

    # Specify CondaDependencies obj, add necessary packages
    aml_run_config.environment.python.conda_dependencies = app_conda_deps

    print ("Run configuration created.")
    from azure.common.credentials import ServicePrincipalCredentials
    #from azure.keyvault import KeyVaultClient, KeyVaultAuthentication

    from azure.keyvault.secrets import SecretClient
    from azure.identity import DefaultAzureCredential
    import pandas as pd
    #import sqlalchemy as sql
    import pyodbc

    def get_data(sql_string, columns):
        credentials = None
        credential = DefaultAzureCredential()

        secret_client = SecretClient("https://smrtschd-aml-kv.vault.azure.net", credential=credential)    
        secret = secret_client.get_secret("database-connection")

        #client = KeyVaultClient(KeyVaultAuthentication(auth_callback))
        #secret_bundle = client.get_secret("https://smrtschd-aml-kv.vault.azure.net", "database-connection", "")

        server = 'starlims-sql.database.windows.net'
        database = 'QM12_DATA_AUTOMATION'
        username = '******'
        password = secret.value
        driver= '{ODBC Driver 17 for SQL Server}'
        conn = pyodbc.connect('Driver='+driver+';'+
                            'Server='+server+';'+
                            'Database='+database+';'+
                            'PORT=1433;'+
                            'UID='+username+';'+
                            'PWD='+password+'; MARS_Connection=Yes'
        )

        try:
            SQL_Query = pd.read_sql_query(sql_string, conn)

            df = pd.DataFrame(SQL_Query, columns=columns)
            return df
        except Exception as e:
            print(e)
            raise

    sql_str = "SELECT " \
            "  Dept " \
            ", Method " \
            ", Servgrp " \
            ", Runno " \
            ", TestNo " \
            ", Testcode " \
            ", Total_Duration_Min " \
            ", Total_Duration_Hr " \
            ", Usrnam " \
            ", Eqid " \
            ", Eqtype " \
        "FROM dbo.Draft " \
        "order by TESTCODE, RUNNO, dept, method;"

    columns = ["Dept", "Method", "Servgrp", "Runno", "TestNo", "Testcode", "Total_Duration_Min", "Total_Duration_Hr", "Usrnam", "Eqid","Eqtype"]

    from azureml.core import Dataset
    from sklearn.model_selection import train_test_split

    if (e.train_dataset_name not in aml_workspace.datasets):

        
        df = get_data(sql_str, columns)

        train_df, test_df=train_test_split(df, test_size=0.2)

        MY_DIR = "data"

        CHECK_FOLDER = os.path.isdir(MY_DIR)

        if not CHECK_FOLDER:
            os.makedirs(MY_DIR)
        else:
            print("Folder ", MY_DIR, " is already created")

        #files = ["data/analyst_tests.csv"]
        files = ["data/train_data.csv","data/test_data.csv"]

        def_file_store = Datastore(aml_workspace, "workspacefilestore")

        dtfrm = df.to_csv(files[0], header=True, index=False)

        train_dataframe=train_df.to_csv(files[0], header=True, index=False)
        test_dataframe=test_df.to_csv(files[1], header=True, index=False)
        datastore.upload_files(
            files=files,
            target_path='data/',
            overwrite=True
        )

        from azureml.data.data_reference import DataReference

        blob_input_data_test=DataReference(
            datastore=datastore,
            data_reference_name="smartschedulertest",
            path_on_datastore="data/test_data.csv"
        )
        test_data=Dataset.Tabular.from_delimited_files(blob_input_data_test)
        test_data.register(aml_workspace, e.test_dataset_name, create_new_version=True)

        blob_input_data_train=DataReference(
            datastore=datastore,
            data_reference_name="smartschedulertrain",
            path_on_datastore="data/train_data.csv"
        )
        train_data=Dataset.Tabular.from_delimited_files(blob_input_data_train)
        train_data.register(aml_workspace, e.train_dataset_name, create_new_version=True)

    else:
        from azureml.data.data_reference import DataReference
        print("getting from the datastore instead of uploading")

        train_data=Dataset.get_by_name(aml_workspace, name=e.train_dataset_name)
        test_data=Dataset.get_by_name(aml_workspace, name=e.test_dataset_name)

    # check the training dataset to make sure it has at least 50 records.
    tdf=train_data.to_pandas_dataframe().head(5)

    print(tdf.shape)
    print(tdf)

    # display the first five rows of the data
    # create a variable that can be used for other purposes
    df=train_data.to_pandas_dataframe().head()

    label_column="Total_Duration_Min"

    import random
    import string

    def randomString(stringLength=15):
        letters = string.ascii_lowercase
        return ''.join(random.choice(letters) for i in range(stringLength))

    from azureml.core import Experiment

    experiment = Experiment(aml_workspace, "SmartScheduler_Pipeline")


    import logging

    aml_name = 'smart_scheduler_' + randomString(5)
    print(aml_name)

    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.ticker import StrMethodFormatter

    print(df.head(5))
    print(df.shape)
    print(df.dtypes)

    #df.hist(column='Dept')
    list(df.columns.values)

    # Remove Features that are not necessary.
    #df.hist(column="Servgrp", bins=4)
    train_data=train_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"])
    test_data=test_data.drop_columns(["Runno","TestNo","Total_Duration_Hr"])

    print(train_data.to_pandas_dataframe())
    print(test_data.to_pandas_dataframe())

    from azureml.automl.core.featurization import FeaturizationConfig

    # some of the columns could be change to one hot encoding especially if the categorical column
    featurization_config=FeaturizationConfig()
    featurization_config.blocked_transformers=['LabelEncoder']
    featurization_config.add_column_purpose('Dept', 'CategoricalHash')
    featurization_config.add_transformer_params('HashOneHotEncoder',['Method'], {"number_of_bits":3})
    featurization_config.add_column_purpose('Servgrp', 'CategoricalHash')
    featurization_config.add_column_purpose('Testcode', 'Numeric')
    featurization_config.add_column_purpose('Usrnam', 'CategoricalHash')
    featurization_config.add_column_purpose('Eqid', 'CategoricalHash')
    featurization_config.add_column_purpose('Eqtype', 'CategoricalHash')

    from azureml.pipeline.core import Pipeline, PipelineData
    from azureml.pipeline.steps import PythonScriptStep

    #train_model_folder = './scripts/trainmodel'

    automl_settings = {
        "iteration_timeout_minutes": 5,
        "iterations": 5,
        "enable_early_stopping": True,
        "primary_metric": 'spearman_correlation',
        "verbosity": logging.INFO,
        "n_cross_validation":5
    }

    automl_config = AutoMLConfig(task="regression",
                    debug_log='automated_ml_errors.log',
                    #path = train_model_folder,
                    training_data=train_data,
                    featurization=featurization_config,
                    blacklist_models=['XGBoostRegressor'],
                    label_column_name=label_column,
                    compute_target=aml_compute,
                    **automl_settings)

    from azureml.pipeline.steps import AutoMLStep
    from azureml.pipeline.core import TrainingOutput

    metrics_output_name = 'metrics_output'
    best_model_output_name='best_model_output'

    metrics_data = PipelineData(name = 'metrics_data',
                    datastore = datastore,
                    pipeline_output_name=metrics_output_name,
                    training_output=TrainingOutput(type='Metrics'))

    model_data = PipelineData(name='model_data',
                datastore=datastore,
                pipeline_output_name=best_model_output_name,
                training_output=TrainingOutput(type='Model'))

    trainWithAutomlStep = AutoMLStep(
                        name=aml_name,
                        automl_config=automl_config,
                        passthru_automl_config=False,
                        outputs=[metrics_data, model_data],
                        allow_reuse=True
    )

    evaluate_step = PythonScriptStep(
        name="Evaluate Model",
        script_name='./evaluate/evaluate_model.py',
        #  e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory='../app',
        arguments=[
            "--model_name", model_name_param,
            "--allow_run_cancel", e.allow_run_cancel
        ]
    )

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name='register/register_model2.py', #e.register_script_path,
        compute_target=aml_compute,
        source_directory='../app',
        inputs=[model_data],
        arguments=[
            "--model_name", model_name_param,
            "--model_path", model_data,
            "--ds_name", e.train_dataset_name
        ],
        runconfig=aml_run_config,
        allow_reuse=False
    )

    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(trainWithAutomlStep)
        register_step.run_after(evaluate_step)
        pipeline_steps = [ trainWithAutomlStep, evaluate_step, register_step ]
    else:
        print("Exclude the evaluation step and run register step")
        register_step.run_after(trainWithAutomlStep)
        pipeline_steps = [ trainWithAutomlStep, register_step ]

    print( "this is the value for execute pipeline: {}".format(e.execute_pipeline))

    if( (e.execute_pipeline).lower() =='true' ):
        # Execute the pipe normally during testing and debugging
        print("Pipeline submitted for execution.")
        pipeline = Pipeline(workspace = aml_workspace, steps=pipeline_steps)
        pipeline_run = experiment.submit(pipeline)
        pipeline_run.wait_for_completion()
        print("Pipeline is built.")
    else:
        # Generates pipeline that will be called in ML Ops
        train_pipeline = Pipeline(workspace=aml_workspace, steps=pipeline_steps)
        train_pipeline._set_experiment_name
        train_pipeline.validate()
        published_pipeline = train_pipeline.publish(
            name=e.pipeline_name,
            description="Model training/retraining pipeline",
            version=e.build_id
        )
        print(f'Published pipeline: {published_pipeline.name}')
        print(f'for build {published_pipeline.version}')
Exemplo n.º 7
0
#     name: ***
#     vm_size : ***
#     vm_priority : ***
#     min_nodes : 0
#     max_nodes : 1
#     idle_seconds_before_scaledown: 1200
#
#   ...

with open(cfg_file) as f:
    amlsetup = yaml.safe_load(f)

###############################################################################
# Login to Azure
###############################################################################
interactive_auth = InteractiveLoginAuthentication(
    tenant_id=amlsetup["Environment"]["tenant_id"])
# sp_auth = ServicePrincipalAuthentication(tenant_id="***", service_principal_id="***", service_principal_password="******", _enable_caching=False)

###############################################################################
# Create workspace
###############################################################################
print(f"Setting up workspace:")

subscription_id = os.getenv("SUBSCRIPTION_ID",
                            default=amlsetup["Environment"]["subscription_id"])
resource_group = os.getenv("RESOURCE_GROUP",
                           default=amlsetup["Environment"]["resource_group"])
workspace_name = os.getenv("WORKSPACE_NAME",
                           default=amlsetup["Environment"]["workspace_name"])
workspace_region = os.getenv(
    "WORKSPACE_REGION", default=amlsetup["Environment"]["workspace_region"])
Exemplo n.º 8
0
# tutorial/01-create-workspace.py
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace

interactive_auth = InteractiveLoginAuthentication(tenant_id="99e1e721-7184-498e-8aff-b2ad4e53c1c2")
ws = Workspace.create(name='azure-ml',
            subscription_id='59e1d56a-8a2d-48a7-9cd3-a52c1a268c55',
            resource_group='cloud-ml',
            create_resource_group=True,
            location='eastus2',
            auth=interactive_auth
            )
            
# write out the workspace details to a configuration file: .azureml/config.json
ws.write_config(path='.azureml')    
Exemplo n.º 9
0
from azureml.core import Workspace
from azureml.core.authentication import InteractiveLoginAuthentication

forced_interactive_auth = InteractiveLoginAuthentication(
    tenant_id="my-tenant-id", force=True)

ws = Workspace.from_config()
# COMMAND ----------

# MAGIC %run ./99-Shared-Functions-and-Settings

# COMMAND ----------

# MAGIC %md
# MAGIC ### Instantiate the Workspace and Experiment objects

# COMMAND ----------

from azureml.core import Workspace, Experiment, Run
from azureml.core.authentication import InteractiveLoginAuthentication

up = InteractiveLoginAuthentication()
up.get_authentication_header()

ws = Workspace(**AZURE_ML_CONF, auth=up)
experiment = Experiment(ws, pyspark_experiment_name)

# COMMAND ----------

# MAGIC %md
# MAGIC ### Find best performing run
# MAGIC To find the best performing model run from our experiment, we have several options.
# MAGIC
# MAGIC We can:
# MAGIC 1. Use the Azure Portal to compare runs
# MAGIC 1. Use Python to compare runs
Exemplo n.º 11
0
# tutorial/01-create-workspace.py
from azureml.core.authentication import InteractiveLoginAuthentication
from azureml.core import Workspace

interactive_auth = InteractiveLoginAuthentication(tenant_id="xxxxx")
ws = Workspace.create(name='azure-ml',
            subscription_id='xxxx',
            resource_group='xxxx',
            create_resource_group=False,
            location='eastus2',
            auth=interactive_auth
            )
            
# write out the workspace details to a configuration file: .azureml/config.json
ws.write_config(path='.azureml')