def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME")+"-AML-WS"
    resource_group = os.environ.get("RESOURCE_GROUP")
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    deploy_script_path = os.environ.get("DEPLOY_PROD_SCRIPT_PATH")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("DEPLOY_PROD_PIPELINE_NAME")
    service_name = os.environ.get("DEPLOY_PROD_SERVICE_NAME")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(
        workspace_name,
        resource_group,
        subscription_id,
        tenant_id,
        app_id,
        app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(
        aml_workspace,
        compute_name,
        vm_size)
    if aml_compute is not None:
        print(aml_compute)

    conda_dependencies = CondaDependencies.create(
        conda_packages=[
            'numpy',
            'pandas',
            'scikit-learn'
        ],
        pip_packages=[
            'azureml-core==1.0.72.*',
            'azureml-sdk==1.0.72.*',
            'azure-storage',
            'azure-storage-blob',
            'azureml-dataprep',
            'azureml-datadrift==1.0.72.*'
        ],
        pin_sdk_version=False
    )

    print(conda_dependencies.serialize_to_string())

    run_config = RunConfiguration(
        framework='Python',
        conda_dependencies=conda_dependencies
    )
    run_config.environment.docker.enabled = True

    model_name = PipelineParameter(
        name="model_name", default_value=model_name
    )
    print(model_name)
    release_id = PipelineParameter(
        name="release_id", default_value="0"
    )
    print(release_id)
    service_name = PipelineParameter(
        name="service_name", default_value=service_name
    )
    print(service_name)

    deploy_step = PythonScriptStep(
        name="Deploy Prod Model",
        script_name=deploy_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--release_id", release_id,
            "--model_name", model_name,
            "--service_name", service_name
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Deploy Prod created")

    steps = [deploy_step]

    deploy_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    deploy_pipeline.validate()
    published_pipeline = deploy_pipeline.publish(
        name=pipeline_name,
        description="Model deploy Prod pipeline",
        version=build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 2
0
    script_name='model_registration.py',
    arguments=['--input_dir', data_metrics, '--output_dir', data_output],
    compute_target=gpu_compute_target,
    inputs=[data_metrics],
    outputs=[data_output],
    source_directory=script_folder,
    runconfig=gpu_compute_run_config,
    allow_reuse=True,
    hash_paths=['.'])
registration_step.run_after(hd_step)

pipeline = Pipeline(
    workspace=ws, steps=[get_logits_from_xception, hd_step, registration_step])
print("Pipeline is built")

pipeline.validate()
print("Simple validation complete")

pipeline_name = 'kd_teach_the_student'

# We need to disable (delete) previously published pipelines, because we can't have two published pipelines with the same name
from utils.azure import disable_pipeline
disable_pipeline(pipeline_name=pipeline_name, prefix='', dry_run=False)

published_pipeline = pipeline.publish(name=pipeline_name)
print("Student pipeline published")

schedule = Schedule.create(workspace=ws,
                           name=pipeline_name + "_sch",
                           pipeline_id=published_pipeline.id,
                           experiment_name=pipeline_name,
Exemplo n.º 3
0
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" % (dataset, ws.name))

    base_dir = '.'
        
    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)
    
    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_create.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)
    
    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except:# ComputeTargetException:
        print("creating new compute target")
        
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D2_V2', 
                                                                    max_nodes=4,
                                                                    idle_seconds_before_scaledown=1800)    
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name, provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
        
    # use get_status() to get a detailed status for the current cluster. 
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except: 
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', 
                                                                    max_nodes=10,
                                                                    idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name, provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout. 
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster. 
    try:
        print(gpu_compute_target.get_status().serialize())
    except BaseException as e:
        print("Could not get status of compute target.")
        print(e)

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"], pip_indexurl='https://azuremlsdktestpypi.azureedge.net/sdk-release/Candidate/604C89A437BA41BD942B4F46D9A3591D', pip_packages=["azure-storage-blob==1.5.0", "hickle==3.4.3", "requests==2.21.0", "sklearn", "pandas==0.24.2", "azureml-sdk", "numpy==1.16.2", "pillow==6.0.0"])
    
    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    # DataReference to where video data is stored.
    video_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="video_data",
        path_on_datastore=os.path.join("prednet", "data", "video", dataset))
    print("DataReference object created")
        
    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames", datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py", 
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    print("video_decode step created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name='prepare_data',
        script_name="data_preparation.py", 
        arguments=["--input_data", raw_data, "--output_data", preprocessed_data],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target, 
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.']
    )
    data_prep.run_after(video_decoding)

    print("data_prep step created")


    # configure access to ACR for pulling our custom docker image
    acr = ContainerRegistry()
    acr.address = config['acr_address']
    acr.username = config['acr_username']
    acr.password = config['acr_password']
    
    est = Estimator(source_directory=script_folder,
                    compute_target=gpu_compute_target,
                    entry_script='train.py', 
                    use_gpu=True,
                    node_count=1,
                    custom_docker_image = "wopauli_1.8-gpu:1",
                    image_registry_details=acr,
                    user_managed=True
                    )

    ps = RandomParameterSampling(
        {
            '--batch_size': choice(1, 2, 4, 8),
            '--filter_sizes': choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
            '--stack_sizes': choice("48, 96, 192", "36, 72, 144", "12, 24, 48"), #, "48, 96"),
            '--learning_rate': loguniform(-6, -1),
            '--lr_decay': loguniform(-9, -1),
            '--freeze_layers': choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
            '--transfer_learning': choice("True", "False")
        }
    )

    policy = BanditPolicy(evaluation_interval=2, slack_factor=0.1, delay_evaluation=10)

    hdc = HyperDriveConfig(estimator=est, 
                            hyperparameter_sampling=ps, 
                            policy=policy, 
                            primary_metric_name='val_loss', 
                            primary_metric_goal=PrimaryMetricGoal.MINIMIZE, 
                            max_total_runs=10,
                            max_concurrent_runs=5, 
                            max_duration_minutes=60*6
                            )

    hd_step = HyperDriveStep(
        name="train_w_hyperdrive",
        hyperdrive_run_config=hdc,
        estimator_entry_script_arguments=[
            '--data-folder', preprocessed_data, 
            '--remote_execution',
            '--dataset', dataset
            ],
        inputs=[preprocessed_data],
        metrics_output = data_metrics,
        allow_reuse=True
    )
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=cpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.']
    )
    registration_step.run_after(hd_step)

    pipeline = Pipeline(workspace=ws, steps=[video_decoding, data_prep, hd_step, registration_step])
    print ("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete") 

    pipeline_name = 'prednet_' + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)
    

    schedule = Schedule.create(workspace=ws, name=pipeline_name + "_sch",
                            pipeline_id=published_pipeline.id, 
                            experiment_name=pipeline_name,
                            datastore=def_blob_store,
                            wait_for_provisioning=True,
                            description="Datastore scheduler for Pipeline" + pipeline_name,
                            path_on_datastore=os.path.join('prednet/data/video', dataset, 'Train'),
                            polling_interval=1
                            )

    return pipeline_name
Exemplo n.º 4
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group,
    )
    print(f"get_workspace:{aml_workspace}")

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print(f"aml_compute:{aml_compute}")

    # Create a reusable Azure ML environment
    environment = get_environment(
        aml_workspace,
        e.aml_env_name,
        create_new=e.rebuild_env,
        enable_docker=True,
        dockerfile='ml_model/preprocess/Dockerfile'
    )  #
    run_config = RunConfiguration()
    run_config.environment = environment

    if e.datastore_name:
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name
    run_config.environment.environment_variables["DATASTORE_NAME"] = datastore_name  # NOQA: E501

    datastore = Datastore(aml_workspace, name=datastore_name)
    data_file_path_param = PipelineParameter(name="data_file_path", default_value=e.dataset_name)  # NOQA: E501

    # The version of the input/output dataset can't be determined at pipeline publish time, only run time.  # NOQA: E501
    # Options to store output data:
    # Option 1: Use blob API to write output data. Otherwise, no way to dynamically change the output dataset based on PipelineParameter, # NOQA: E501
    #     The following will not work. It generate a path like "PipelineParameter_Name:data_file_path_Default:gear_images"  # NOQA: E501
    #         output_ds = OutputFileDatasetConfig(destination=(datastore, data_file_path_param))  # NOQA: E501
    #     This option means writing a file locally and upload to the datastore. Fewer dataset, more code.  # NOQA: E501
    # Option 2: Use a dynamic path in OutputFileDatasetConfig, and register a new dataset at completion  # NOQA: E501
    #     Output dataset can be mounted, so more dataset to maintain, less code.   # NOQA: E501
    # Using Option 2 below.
    output_dataset = OutputFileDatasetConfig(
        name=e.processed_dataset_name,
        destination=(datastore, "/dataset/{output-name}/{run-id}")
    ).register_on_complete(
        name=e.processed_dataset_name)

    preprocess_step = PythonScriptStep(
        name="Preprocess Data with OS cmd",
        script_name='preprocess/preprocess_os_cmd_aml.py',
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--dataset_name", e.dataset_name,
            "--datastore_name", datastore_name,
            "--data_file_path", data_file_path_param,
            "--output_dataset", output_dataset,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Preprocess OS cmd created")

    steps = [preprocess_step]
    preprocess_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    preprocess_pipeline._set_experiment_name
    preprocess_pipeline.validate()
    published_pipeline = preprocess_pipeline.publish(
        name=e.preprocessing_pipeline_name,
        description="Data preprocessing OS cmd pipeline",
        version=e.build_id,
    )
    print(f"Published pipeline: {published_pipeline.name}")
    print(f"for build {published_pipeline.version}")
def main():
    load_dotenv()
    workspace_name = os.environ.get("WS_NAME")
    resource_group = os.environ.get("RG_NAME")
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    aks_name = os.environ.get("AKS_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
    experiment_name = os.environ.get("EXPERIMENT_NAME")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(
        workspace_name,
        resource_group,
        subscription_id,
        tenant_id,
        app_id,
        app_secret)

    print('Now accessing:')
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(
        aml_workspace,
        compute_name,
        vm_size)
    if aml_compute is not None:
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=['numpy', 'pandas',
                        'scikit-learn', 'keras'],
        pip_packages=['azureml-core==1.25.0',
                      'azureml-defaults==1.25.0',
                      'azureml-telemetry==1.25.0',
                      'azureml-train-restclients-hyperdrive==1.25.0',
                      'azureml-train-core==1.25.0',
                      'azureml-dataprep',
                      'tensorflow-gpu==2.0.0',
                      'transformers==2.0.0',
                      'absl-py',
                      'azureml-dataprep',
                      'h5py<3.0.0'])
    )
    # run_config.environment.docker.enabled = True

    datastore_name = 'mtcseattle'
    container_name = 'azure-service-classifier'
    account_name = 'mtcseattle'
    sas_token = '?sv=2020-04-08&st=2021-05-26T04%3A39%3A46Z&se=2022-05-27T04%3A39%3A00Z&sr=c&sp=rl&sig=CTFMEu24bo2X06G%2B%2F2aKiiPZBzvlWHELe15rNFqULUk%3D'

    try:
        existing_datastore = Datastore.get(aml_workspace, datastore_name)
    except:  # noqa: E722
        existing_datastore = Datastore \
            .register_azure_blob_container(workspace=aml_workspace,
                                           datastore_name=datastore_name,
                                           container_name=container_name,
                                           account_name=account_name,
                                           sas_token=sas_token,
                                           overwrite=True)

    azure_dataset = Dataset.File.from_files(
        path=(existing_datastore, 'data'))

    azure_dataset = azure_dataset.register(
        workspace=aml_workspace,
        name='Azure Services Dataset',
        description='Dataset containing azure related posts on Stackoverflow',
        create_new_version=True)

    azure_dataset.to_path()
    input_data = azure_dataset.as_named_input('azureservicedata').as_mount(
        '/tmp/data')

    model_name = PipelineParameter(
        name="model_name", default_value=model_name)
    max_seq_length = PipelineParameter(
        name="max_seq_length", default_value=128)
    learning_rate = PipelineParameter(
        name="learning_rate", default_value=3e-5)
    num_epochs = PipelineParameter(
        name="num_epochs", default_value=1)
    export_dir = PipelineParameter(
        name="export_dir", default_value="./outputs/model")
    batch_size = PipelineParameter(
        name="batch_size", default_value=32)
    steps_per_epoch = PipelineParameter(
        name="steps_per_epoch", default_value=1)

    # initialize the PythonScriptStep
    train_step = PythonScriptStep(
        name='Train Model',
        script_name=train_script_path,
        arguments=['--data_dir', input_data,
                   '--max_seq_length', max_seq_length,
                   '--batch_size', batch_size,
                   '--learning_rate', learning_rate,
                   '--steps_per_epoch', steps_per_epoch,
                   '--num_epochs', num_epochs,
                   '--export_dir',export_dir],
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        runconfig=run_config,
        allow_reuse=True)
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=evaluate_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--model_name", model_name,
            "--build_id", build_id,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    # Currently, the Evaluate step will automatically register
    # the model if it performs better. This step is based on a
    # previous version of the repo which utilized JSON files to
    # track evaluation results.

    evaluate_step.run_after(train_step)
    steps = [evaluate_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=pipeline_name,
        description="Model training/retraining pipeline.",
        version=build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

    response = published_pipeline.submit(  # noqa: F841
               workspace=aml_workspace,
               experiment_name=experiment_name)
Exemplo n.º 6
0
class BatchScoringContext(BaseContext):
    # Data and script information
    batch_data_directory = './paths/batch/scoring'
    batch_data_file = 'data.txt'
    batch_scoring_script = 'batch.py'
    bach_scoring_results_file = "Results.txt"

    # Data store information
    input_store_name = "inputdata"
    input_reference_name = "inputdataref"
    output_store_name = "outputdata"
    output_reference_name = "outputdataref"

    # Pipeline information
    pip_packages = []
    python_version = "3.6.7"
    '''
        Contains the context needed to perform the tasks. 
    '''
    def __init__(self, programArgs, userAuthorization):
        super().__init__(programArgs, userAuthorization)
        self.computeTarget = None
        self.inputDataStore = None
        self.inputDataReference = None
        self.outputDataStore = None
        self.outputDataReference = None
        self.pipelineStep = None
        self.pipeLine = None
        self.publishedPipeline = None

    def generateStorageContainers(self):
        '''
            We are using the storage associated with the actual AMLS workspace. 

            So, we need to create the container that has the data to be "scored" and 
            a container where results will end up. 
        '''
        storage_container_names = []
        storage_container_names.append(self.programArguments.source_container)
        storage_container_names.append(self.programArguments.result_container)

        storage_details = self.workspace.get_default_datastore()

        createStorageContainer(storage_details.account_name,
                               storage_details.account_key,
                               storage_container_names)

    def uploadDataFiles(self):
        '''
            Upload the data files into the source container, these are the 
            files that will be processed by the AML compute cluster.
        '''
        storage_details = self.workspace.get_default_datastore()
        data_files = BatchScoringContext.batch_data_file.split(",")

        uploadStorageBlobs(storage_details.account_name,
                           storage_details.account_key,
                           self.programArguments.source_container,
                           BatchScoringContext.batch_data_directory,
                           data_files)

    def generateCompute(self):
        '''
            Generate the AML compute cluster. 
        '''
        if self.computeTarget:
            return self.computeTarget

        self.computeTarget = createBatchComputeCluster(
            self.workspace, self.programArguments.batch_compute_name,
            self.programArguments.batch_vm_size,
            self.programArguments.batch_vm_max,
            self.programArguments.batch_vm_min)

        if not self.computeTarget:
            raise Exception("Cannot create compute target.")

    def createPipelineDataReferences(self):
        '''
            Datastores identify where data is coming from and going to in the process. This 
            function checks to see if a data store with the same name has already been registered.

            If not, it registers it, if so it uses the exsiting store.

            Stores are then wrapped in a DataReference object that will be used in the pipeline steps. 

            For this example, we need two references. One for the input data file, one for the output results file. 
            Both reside in the storage account from the AMLS workspace.
        '''

        storage_details = self.workspace.get_default_datastore()
        '''
            Have to create one for input and one for output. 
            self.programArguments.source_container - identifies the container name for the data file
            self.programArguments.result_container - identifies the container name for the results file(s)
        '''
        requested_datasets = {}

        requested_datasets["in"] = (self.programArguments.source_container,
                                    BatchScoringContext.input_store_name,
                                    BatchScoringContext.input_reference_name)
        requested_datasets["out"] = (self.programArguments.result_container,
                                     BatchScoringContext.output_store_name,
                                     BatchScoringContext.output_reference_name)

        for requested in requested_datasets:
            store, reference = createDataReference(
                self.workspace, storage_details.account_name,
                storage_details.account_key, requested_datasets[requested][0],
                requested_datasets[requested][1],
                requested_datasets[requested][2])
            '''
                Put the reference into the class variables. 
            '''
            if requested == "in":
                self.inputDataStore = store
                self.inputDataReference = reference
            else:
                self.outputDataStore = store
                self.outputDataReference = reference

    def _createPipelineSteps(self):
        '''
            You first need the conda dependencies that will be baked into the image to 
            be pushed down to the batch compute cluster for a working environment.

            In this example we don't need anything other than Python.
        '''
        conda_dependencies = CondaDependencies.create(
            pip_packages=BatchScoringContext.pip_packages,
            python_version=BatchScoringContext.python_version)

        run_config = RunConfiguration(conda_dependencies=conda_dependencies)
        run_config.environment.docker.enabled = True
        '''
            Next we need to let the pipeline know which store the output is going. This is expected
            to be a PipelineData object. That object expects:

            name = The directory on the cluster machine in which output is expected.
            datastore = Identifies the end storage. In this case an Azure Storage account complete with 
                        container name and file name in which to deposit in the storage account. 
        '''
        prediction_ref = PipelineData(name="preds",
                                      datastore=self.outputDataStore,
                                      is_directory=True)
        '''
            Next we create a step for a pipeline. 

            WE tell it where out script is, 
                Script information:
                    The directory in which the python script is located on the local machine
                    The file name of the script that will be uploaded.
                Script Arguments:
                    This is what arguments the script will accept. In our example they are:
                        input file, input directory, output file, output directory
                inputs: 
                    This is a list of data inputs. In this example it is the Azure Storage account/container/file
                    combination that holds our data file.  
                outputs: 
                    This is a list of outputs. In this example it is the Azure Storage account/container/file 
                    combination that the script creates.  
                compute_target:
                    The compute target we attached to the AML service that will process requests. 
                run_config: 
                    This is the conda / python depenencies that the resultant container requires to execute succesfully. 
        '''
        self.pipelineStep = PythonScriptStep(
            name="basic_pipeline_step",
            source_directory=BatchScoringContext.batch_data_directory,
            script_name=BatchScoringContext.batch_scoring_script,
            arguments=[
                BatchScoringContext.batch_data_file, self.inputDataReference,
                BatchScoringContext.bach_scoring_results_file, prediction_ref
            ],
            inputs=[self.inputDataReference],
            outputs=[prediction_ref],
            compute_target=self.computeTarget,
            runconfig=run_config,
            allow_reuse=False,
        )

        if self.pipelineStep == None:
            raise Exception("Unable to create python step.")

    def createPipeline(self):
        '''
            A pipeline is a series of steps but also requires DataReference objects in those steps so 
            that it where to get data from and where to deposit outputs. 

            In this step, if a PublishedPipeline exists by name, a new pipeline is not created. 

            If it is created a new docker conainer is generated in the ACR instance associated with this 
            AMLS workspace. 
        '''
        self.publishedPipeline = getExistingPipeline(
            self.workspace, self.programArguments.pipeline_name)

        if self.publishedPipeline:
            print("Found existing pipeline - ",
                  self.programArguments.pipeline_name)
        else:
            print("Creating  pipeline - ", self.programArguments.pipeline_name)

            print("Creating pipeline steps .....")
            self._createPipelineSteps()
            self.pipeLine = Pipeline(workspace=self.workspace,
                                     steps=self.pipelineStep)
            self.pipeLine.validate()

            print("Publishing pipeline .....")
            self.publishedPipeline = self.pipeLine.publish(
                name=self.programArguments.pipeline_name,
                description="Dummy Pipeline")
            '''
                Now we schedule it. This step on it's own will create the AMLS experiment tied to this 
                service. 

                Unlike with the RTS example, no model is creted in this step. 

                Next we generate the schedule recurrence, when this pipeline should run, and finally create
                the schedule by identifying the published pipeline that is being requested. 
            '''
            print("Scheduling pipeline .....")
            experiment_name = "exp_" + self.programArguments.pipeline_name
            recurrence = ScheduleRecurrence(
                frequency=self.programArguments.schedule_frequency,
                interval=self.programArguments.schedule_interval)

            self.Schedule = Schedule.create(
                workspace=self.workspace,
                name="{}_sched".format(self.programArguments.pipeline_name),
                pipeline_id=self.publishedPipeline.id,
                experiment_name=experiment_name,
                recurrence=recurrence,
                description="Pipeline schedule for {}".format(
                    self.programArguments.pipeline_name),
            )
        '''
            Print out what we know of the pipeline. In particular it's status and the endpoint. 
        '''
        print("Pipeline : ", self.publishedPipeline.name)
        print("Pipeline Endpoint: ", self.publishedPipeline.endpoint)
        print("Pipeline Status: ", self.publishedPipeline.status)
Exemplo n.º 7
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = get_workspace(e.workspace_name, e.resource_group,
                                  e.subscription_id, e.tenant_id, e.app_id,
                                  e.app_secret)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True

    model_name = PipelineParameter(name="model_name",
                                   default_value=e.model_name)
    release_id = PipelineParameter(name="release_id", default_value="0")

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--release_id",
            release_id,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--release_id",
            release_id,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    evaluate_step.run_after(train_step)
    steps = [evaluate_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 8
0
def build_prednet_pipeline(dataset, ws):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    base_dir = "."

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = "./scripts"
    os.makedirs(script_folder)

    shutil.copytree(os.path.join(base_dir, "models"),
                    os.path.join(base_dir, script_folder, "models"))
    shutil.copy(os.path.join(base_dir, "train.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "data_preparation.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_prednet.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "batch_scoring.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "train_clf.py"), script_folder)
    shutil.copy(os.path.join(base_dir, "register_clf.py"), script_folder)

    cpu_compute_name = args.cpu_compute_name
    cpu_compute_target = AmlCompute(ws, cpu_compute_name)
    print("found existing compute target: %s" % cpu_compute_name)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = args.gpu_compute_name

    gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
    print(gpu_compute_target.get_status().serialize())

    env = Environment.get(ws, "prednet")

    # Runconfigs
    runconfig = RunConfiguration()
    runconfig.environment = env
    print("PipelineData object created")

    # DataReference to where raw data is stored.
    raw_data = DataReference(
        datastore=def_blob_store,
        data_reference_name="raw_data",
        path_on_datastore=os.path.join("prednet", "data", "raw_data"),
    )
    print("DataReference object created")

    # Naming the intermediate data as processed_data and assigning it to the
    # variable processed_data.
    preprocessed_data = PipelineData("preprocessed_data",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    hd_child_cwd = PipelineData("prednet_model_path", datastore=def_blob_store)
    # prednet_path = PipelineData("outputs", datastore=def_blob_store)
    scored_data = PipelineData("scored_data", datastore=def_blob_store)
    model_path = PipelineData("model_path", datastore=def_blob_store)

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(
        name="prepare_data",
        script_name="data_preparation.py",
        arguments=[
            "--raw_data",
            raw_data,
            "--preprocessed_data",
            preprocessed_data,
            "--dataset",
            dataset,
        ],
        inputs=[raw_data],
        outputs=[preprocessed_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    # data_prep.run_after(video_decoding)

    print("data_prep step created")

    est = Estimator(
        source_directory=script_folder,
        compute_target=gpu_compute_target,
        entry_script="train.py",
        node_count=1,
        environment_definition=env,
    )

    ps = BayesianParameterSampling({
        "--batch_size":
        choice(1, 2, 4, 10),
        "--filter_sizes":
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        "--stack_sizes":
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),
        "--learning_rate":
        uniform(1e-6, 1e-3),
        "--lr_decay":
        uniform(1e-9, 1e-2),
        "--freeze_layers":
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "3"),
        # "--fine_tuning": choice("True", "False"),
    })

    hdc = HyperDriveConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        primary_metric_name="val_loss",
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=3,
        max_concurrent_runs=3,
        max_duration_minutes=60 * 6,
    )

    train_prednet = HyperDriveStep(
        "train_w_hyperdrive",
        hdc,
        estimator_entry_script_arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--remote_execution",
            "--dataset",
            dataset,
        ],
        inputs=[preprocessed_data],
        outputs=[hd_child_cwd],
        metrics_output=data_metrics,
        allow_reuse=True,
    )
    train_prednet.run_after(data_prep)

    register_prednet = PythonScriptStep(
        name="register_prednet",
        script_name="register_prednet.py",
        arguments=[
            "--data_metrics",
            data_metrics,
        ],
        compute_target=cpu_compute_target,
        inputs=[data_metrics, hd_child_cwd],
        source_directory=script_folder,
        allow_reuse=True,
    )
    register_prednet.run_after(train_prednet)

    batch_scoring = PythonScriptStep(
        name="batch_scoring",
        script_name="batch_scoring.py",
        arguments=[
            "--preprocessed_data",
            preprocessed_data,
            "--scored_data",
            scored_data,
            "--dataset",
            dataset,
            # "--prednet_path",
            # prednet_path
        ],
        compute_target=gpu_compute_target,
        inputs=[preprocessed_data],
        outputs=[scored_data],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    batch_scoring.run_after(register_prednet)

    train_clf = PythonScriptStep(
        name="train_clf",
        script_name="train_clf.py",
        arguments=[
            "--preprocessed_data", preprocessed_data, "--scored_data",
            scored_data, "--model_path", model_path
        ],
        compute_target=cpu_compute_target,
        inputs=[preprocessed_data, scored_data],
        outputs=[model_path],
        source_directory=script_folder,
        runconfig=runconfig,
        allow_reuse=True,
    )
    train_clf.run_after(batch_scoring)

    register_clf = PythonScriptStep(
        name="register_clf",
        script_name="register_clf.py",
        arguments=["--model_path", model_path],
        inputs=[model_path],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        allow_reuse=True,
        runconfig=runconfig,
    )
    register_clf.run_after(train_clf)

    pipeline = Pipeline(
        workspace=ws,
        steps=[
            data_prep,
            train_prednet,
            register_prednet,
            batch_scoring,
            train_clf,
            register_clf,
        ],
    )
    pipeline.validate()

    pipeline_name = "prednet_" + dataset
    published_pipeline = pipeline.publish(name=pipeline_name)

    _ = Schedule.create(
        workspace=ws,
        name=pipeline_name + "_sch",
        pipeline_id=published_pipeline.id,
        experiment_name=pipeline_name,
        datastore=def_blob_store,
        wait_for_provisioning=True,
        description="Datastore scheduler for Pipeline" + pipeline_name,
        path_on_datastore=os.path.join("prednet/data/raw_data", dataset,
                                       "Train"),
        polling_interval=60 * 24,
    )

    published_pipeline.submit(ws, pipeline_name)
Exemplo n.º 9
0
def main():
    load_dotenv()
    workspace_name = os.environ.get("WORKSPACE_NAME")
    resource_group = os.environ.get("RESOURCE_GROUP_NAME")
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
    generate_report_path = os.environ.get("GENERATE_REPORT_PATH")
    generate_report_name = os.environ.get("GENERATE_REPORT_NAME")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_GPU_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")
    ckpt_path = os.environ.get("MODEL_CHECKPOINT_PATH")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
    epis_datastore = os.environ.get("EPIS_DATASTORE")
    epis_container = os.environ.get("EPIS_CONTAINER")

    aml_workspace = get_workspace(
        workspace_name,
        resource_group,
        subscription_id,
        tenant_id,
        app_id,
        app_secret)
    print(aml_workspace)

    aml_compute = get_compute(
        aml_workspace,
        compute_name,
        vm_size)
    if aml_compute is not None:
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=['numpy==1.18.1', 'pandas', 'tensorflow-gpu==2.0.0'],
        pip_packages=['azure', 'azureml-core==1.0.60', 'azureml-tensorboard', 'azure-storage==0.36.0',
                    'tqdm==4.41.1', 'opencv-python==4.1.2.30', 'easydict==1.9', 'matplotlib==3.1.3'])
    )
    run_config.environment.docker.enabled = True
    run_config.environment.docker.gpu_support = True
    run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE

    model_name = PipelineParameter(
        name="model_name", default_value=model_name)
    release_id = PipelineParameter(
        name="release_id", default_value=build_id)

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=train_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--release_id", release_id,
            "--model_name", model_name,
            "--ckpt_path", ckpt_path,
            "--datastore", epis_datastore,
            "--storage_container", epis_container,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model",
        script_name=evaluate_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--release_id", release_id,
            "--model_name", model_name,
            "--ckpt_path", ckpt_path,
            "--datastore", epis_datastore,
            "--storage_container", epis_container,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    generate_report_step = PythonScriptStep(
        name="Generate Report Model",
        script_name=generate_report_name,
        compute_target=aml_compute,
        source_directory=generate_report_path,
        arguments=[
            "--release_id", release_id,
            "--model_name", model_name,
            "--ckpt_path", ckpt_path,
            "--datastore", epis_datastore,
            "--storage_container", epis_container,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step generate report created")

    evaluate_step.run_after(train_step)
    generate_report_step.run_after(evaluate_step)
    steps = [train_step, evaluate_step, generate_report_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=pipeline_name,
        description="Model training/retraining pipeline",
        version=build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 10
0
def main():
    e = Env()
    print(e.__dict__)
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable Azure ML environment
    environment = get_environment(aml_workspace,
                                  e.aml_env_name,
                                  create_new=e.rebuild_env)  #
    run_config = RunConfiguration()
    run_config.environment = environment

    if e.datastore_name:
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name
    run_config.environment.environment_variables[
        "DATASTORE_NAME"] = datastore_name  # NOQA: E501

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    dataset_version_param = PipelineParameter(name="dataset_version",
                                              default_value=e.dataset_version)
    data_file_path_param = PipelineParameter(name="data_file_path",
                                             default_value="none")
    caller_run_id_param = PipelineParameter(name="caller_run_id",
                                            default_value="none")

    # Get dataset name
    dataset_name = e.dataset_name

    # Check to see if dataset exists
    if dataset_name not in aml_workspace.datasets:
        raise ValueError(
            f"can't find dataset {dataset_name} in datastore {datastore_name}")

    # Create PipelineData to pass data between steps
    model_data = PipelineData("model_data",
                              datastore=aml_workspace.get_default_datastore())
    train_ds = (PipelineData("train_ds",
                             datastore=aml_workspace.get_default_datastore()).
                as_dataset().parse_delimited_files().register(
                    name="train", create_new_version=True))
    test_ds = (PipelineData(
        "test_ds", datastore=aml_workspace.get_default_datastore()).as_dataset(
        ).parse_delimited_files().register(name="test",
                                           create_new_version=True))

    prepare_step = PythonScriptStep(
        name="Prepare Data",
        script_name=e.prepare_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        outputs=[train_ds, test_ds],
        arguments=[
            "--dataset_version", dataset_version_param, "--data_file_path",
            data_file_path_param, "--dataset_name", dataset_name,
            "--caller_run_id", caller_run_id_param, "--train_ds", train_ds,
            "--test_ds", test_ds
        ],
        runconfig=run_config,
        allow_reuse=True,
    )
    print("Step Prepare created")

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[
            train_ds.as_named_input("training_data"),
            test_ds.as_named_input("testing_data")
        ],
        outputs=[model_data],
        arguments=[
            "--model_name", model_name_param, "--model_data", model_data
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--model_name",
            model_name_param,
            "--allow_run_cancel",
            e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[model_data],
        arguments=[
            "--model_name", model_name_param, "--step_input", model_data
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if (e.run_evaluation).lower() == "true":
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [prepare_step, train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [prepare_step, train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f"Published pipeline: {published_pipeline.name}")
    print(f"for build {published_pipeline.version}")
Exemplo n.º 11
0
def main():
    env = EnvironmentVariables()
    args = add_arguments()

    workspace = get_workspace()

    cpu_cluster_name = env.cpu_cluster_name
    compute = get_or_create_compute(workspace, cpu_cluster_name,
                                    env.compute_vm_size, env.max_nodes)

    environment = Environment.load_from_directory(env.sources_directory_train)
    environment.register(workspace)
    run_configuration = RunConfiguration()
    run_configuration.environment = environment

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=env.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=env.build_id)
    should_tune_hyperparameters_param = PipelineParameter(
        name="should_tune_hyperparameters",
        default_value=env.should_tune_hyperparameters)
    parallelism_level_param = PipelineParameter(
        name="parallelism_level", default_value=env.parallelism_level)
    force_register_param = PipelineParameter(name="force_register",
                                             default_value=env.force_register)

    datastore = get_datastore()

    dataset_name = env.dataset_name
    dataset_path = env.dataset_path
    print(
        f"Creating new dataset version for {dataset_name} in datastore {datastore} from file {dataset_path}"
    )
    temp_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore,
                                                               dataset_path)])
    dataset = temp_dataset.register(workspace=workspace,
                                    name=dataset_name,
                                    description=dataset_name,
                                    tags={'format': 'CSV'},
                                    create_new_version=True)

    train_output = PipelineData('train_output',
                                output_name='train_output',
                                datastore=datastore)

    train_step = PythonScriptStep(
        name="Train model",
        compute_target=compute,
        script_name=env.train_script_name,
        runconfig=run_configuration,
        inputs=[dataset.as_named_input('training')],
        outputs=[train_output],
        arguments=[
            "--build_id", build_id_param, "--model_name", model_name_param,
            "--parallelism_level", parallelism_level_param,
            "--should_tune_hyperparameters", should_tune_hyperparameters_param
        ],
        allow_reuse=False)

    evaluate_step = PythonScriptStep(name="Evaluate model",
                                     compute_target=compute,
                                     script_name=env.evaluate_script_name,
                                     runconfig=run_configuration,
                                     inputs=[train_output],
                                     arguments=[
                                         "--build_id", build_id_param,
                                         "--model_name", model_name_param,
                                         "--train_output", train_output,
                                         "--force_register",
                                         force_register_param
                                     ],
                                     allow_reuse=False)

    register_step = PythonScriptStep(name="Register model",
                                     compute_target=compute,
                                     script_name=env.register_script_name,
                                     runconfig=run_configuration,
                                     inputs=[train_output],
                                     arguments=[
                                         "--build_id", build_id_param,
                                         "--model_name", model_name_param,
                                         "--train_output", train_output
                                     ],
                                     allow_reuse=False)

    evaluate_step.run_after(train_step)
    register_step.run_after(evaluate_step)

    steps = [train_step, evaluate_step, register_step]

    train_pipeline = Pipeline(workspace=workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=env.pipeline_name,
        description="Train/Eval/Register if better pipeline",
        version=env.build_id)

    output_file_name = args.output_file_name
    if output_file_name:
        with open(output_file_name, "w") as output_file:
            output_file.write(published_pipeline.id)

    print(
        f"Published pipeline {published_pipeline.name} for build {published_pipeline.version}"
    )
Exemplo n.º 12
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print(f"get_workspace: {aml_workspace}")

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print(f"aml_compute: {aml_compute}")

    # Prepare the dataset input
    data_store = aml_workspace.get_default_datastore()
    print("data_store: %s" % data_store.name)

    train_ds_name = e.dataset_name
    train_data_path = e.datafile_path
    sources_directory_train = e.sources_directory_train
    pipeline_name = e.pipeline_name
    build_id = e.build_id

    # Register the train dataset
    if (train_ds_name not in aml_workspace.datasets):
        train_path_on_datastore = train_data_path  # +'/*.csv'
        train_ds_data_path = [(data_store, train_path_on_datastore)]
        train_ds = Dataset.File.from_files(path=train_ds_data_path,
                                           validate=False)
        train_ds = train_ds.register(workspace=aml_workspace,
                                     name=train_ds_name,
                                     description='train data',
                                     tags={'format': 'CSV'},
                                     create_new_version=True)
    else:
        train_ds = Dataset.get_by_name(aml_workspace, train_ds_name)

    train_input = train_ds.as_named_input('train_input')

    # Conda environment
    environment = Environment.from_conda_specification(
        "myenv", os.path.join(sources_directory_train,
                              "conda_dependencies.yml"))
    # Logging into Azure Application Insights
    env = {
        "APPLICATIONINSIGHTS_CONNECTION_STRING":
        e.applicationinsights_connection_string
    }
    env['AZUREML_FLUSH_INGEST_WAIT'] = ''
    env['DISABLE_ENV_MISMATCH'] = True
    environment.environment_variables = env

    from ff.util.helper import build_parallel_run_config

    # PLEASE MODIFY the following three settings based on your compute and
    # experiment timeout.
    process_count_per_node = 6
    node_count = 3
    # this timeout(in seconds) is inline with AutoML experiment timeout or (no
    # of iterations * iteration timeout)
    run_invocation_timeout = 3700

    parallel_run_config = build_parallel_run_config(sources_directory_train,
                                                    environment, aml_compute,
                                                    node_count,
                                                    process_count_per_node,
                                                    run_invocation_timeout)

    from azureml.pipeline.core import PipelineData

    output_dir = PipelineData(name="training_output", datastore=data_store)

    #from azureml.contrib.pipeline.steps import ParallelRunStep
    from azureml.pipeline.steps import ParallelRunStep

    parallel_run_step = ParallelRunStep(
        name="many-models-training",
        parallel_run_config=parallel_run_config,
        allow_reuse=False,
        inputs=[train_input],
        output=output_dir
        # models=[],
        # arguments=[]
    )

    pipeline = Pipeline(workspace=aml_workspace, steps=parallel_run_step)
    pipeline._set_experiment_name
    pipeline.validate()
    published_pipeline = pipeline.publish(name=pipeline_name,
                                          description="FF AutomML pipeline",
                                          version=build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 13
0
def create_experiment_config(workspace):
    ########################################
    ### Creating data prep Pipeline Step ###
    ########################################

    # Load settings
    print("Loading settings")
    data_prep_step_path = os.path.join("steps", "data_prep")
    with open(os.path.join(data_prep_step_path, "step.json")) as f:
        data_prep_settings = json.load(f)

    # Setup datasets of first step
    print("Setting up datasets")
    data_prep_input = Dataset.get_by_name(workspace=workspace,
                                          name=data_prep_settings.get(
                                              "dataset_input_name",
                                              None)).as_named_input(
                                                  data_prep_settings.get(
                                                      "dataset_input_name",
                                                      None)).as_mount()
    data_prep_output = PipelineData(
        name=data_prep_settings.get("dataset_output_name", None),
        datastore=Datastore(workspace=workspace,
                            name=data_prep_settings.get(
                                "datastore_output_name",
                                "workspaceblobstore")),
        output_mode="mount").as_dataset()
    # Uncomment next lines, if you want to register intermediate dataset
    #data_prep_output.register(
    #    name=data_prep_settings.get("dataset_output_name", None),
    #    create_new_version=True
    #)

    # Create conda dependencies
    print("Creating conda dependencies")
    data_prep_dependencies = CondaDependencies.create(
        pip_packages=data_prep_settings.get("pip_packages", []),
        conda_packages=data_prep_settings.get("conda_packages", []),
        python_version=data_prep_settings.get("python_version", "3.6.2"))

    # Create run configuration
    print("Creating RunConfiguration")
    data_prep_run_config = RunConfiguration(
        conda_dependencies=data_prep_dependencies,
        framework=data_prep_settings.get("framework", "Python"))

    # Loading compute target
    print("Loading ComputeTarget")
    data_prep_compute_target = ComputeTarget(workspace=workspace,
                                             name=data_prep_settings.get(
                                                 "compute_target_name", None))

    # Create python step
    print("Creating Step")
    data_prep = PythonScriptStep(
        name=data_prep_settings.get("step_name", None),
        script_name=data_prep_settings.get("script_name", None),
        arguments=data_prep_settings.get("arguments", []),
        compute_target=data_prep_compute_target,
        runconfig=data_prep_run_config,
        inputs=[data_prep_input],
        outputs=[data_prep_output],
        params=data_prep_settings.get("parameters", []),
        source_directory=data_prep_step_path,
        allow_reuse=data_prep_settings.get("allow_reuse", True),
        version=data_prep_settings.get("version", None),
    )

    ###############################################
    ### Creating data model train Pipeline Step ###
    ###############################################

    # Load settings
    print("Loading settings")
    model_train_step_path = os.path.join("steps", "model_train")
    with open(os.path.join(model_train_step_path, "step.json")) as f:
        model_train_settings = json.load(f)
    hyperparameter_sampling_settings = model_train_settings.get(
        "hyperparameter_sampling", {})

    # Setup datasets of first step
    print("Setting up datasets")
    model_train_input = data_prep_output.as_named_input(
        name=model_train_settings.get("dataset_input_name", None))
    model_train_output = PipelineData(
        name=model_train_settings.get("dataset_output_name", None),
        datastore=Datastore(workspace=workspace,
                            name=model_train_settings.get(
                                "datastore_output_name", None)),
        output_mode="mount",
    ).as_dataset()
    # Uncomment next lines, if you want to register intermediate dataset
    #model_train_output.register(
    #    name=model_train_settings.get("dataset_output_name", None),
    #    create_new_version=True
    #)

    # Create conda dependencies
    print("Creating conda dependencies")
    model_train_dependencies = CondaDependencies.create(
        pip_packages=model_train_settings.get("pip_packages", []),
        conda_packages=model_train_settings.get("conda_packages", []),
        python_version=model_train_settings.get("python_version", "3.6.2"))

    # Create run configuration
    print("Creating RunConfiguration")
    model_train_run_config = RunConfiguration(
        conda_dependencies=model_train_dependencies,
        framework=model_train_settings.get("framework", "Python"))

    # Loading compute target
    print("Loading ComputeTarget")
    model_train_compute_target = ComputeTarget(workspace=workspace,
                                               name=model_train_settings.get(
                                                   "compute_target_name",
                                                   None))

    # Create distributed training backend
    print("Creating distributed training backend")
    distributed_training_backend = get_distributed_backend(
        backend_name=model_train_settings.get("distributed_backend", None))

    # Create Estimator for Training
    print("Creating Estimator for training")
    model_train_estimator = Estimator(
        source_directory=model_train_step_path,
        entry_script=model_train_settings.get("script_name", None),
        environment_variables=model_train_settings.get("parameters", None),
        compute_target=model_train_compute_target,
        node_count=model_train_settings.get("node_count", None),
        distributed_training=distributed_training_backend,
        conda_packages=model_train_settings.get("conda_packages", None),
        pip_packages=model_train_settings.get("pip_packages", None),
    )

    try:
        # Create parameter sampling
        print("Creating Parameter Sampling")
        parameter_dict = {}
        parameters = hyperparameter_sampling_settings.get(
            "parameters",
            {}) if "parameters" in hyperparameter_sampling_settings else {}
        for parameter_name, parameter_details in parameters.items():
            parameter_distr = get_parameter_distribution(
                distribution=parameter_details.get("distribution", None),
                **parameter_details.get("settings", {}))
            parameter_dict[f"--{parameter_name}"] = parameter_distr
        model_train_ps = get_parameter_sampling(
            sampling_method=hyperparameter_sampling_settings.get(
                "method", None),
            parameter_dict=parameter_dict)

        # Get Policy definition
        policy_settings = hyperparameter_sampling_settings.get("policy", {})
        kwargs = {
            key: value
            for key, value in policy_settings.items() if key not in
            ["policy_method", "evaluation_interval", "delay_evaluation"]
        }

        # Create termination policy
        print("Creating early termination policy")
        model_train_policy = get_policy(
            policy_method=policy_settings.get("method", ""),
            evaluation_interval=policy_settings.get("evaluation_interval",
                                                    None),
            delay_evaluation=policy_settings.get("delay_evaluation", None),
            **kwargs)

        # Create HyperDriveConfig
        print("Creating HyperDriveConfig")
        model_train_hyperdrive_config = HyperDriveConfig(
            estimator=model_train_estimator,
            hyperparameter_sampling=model_train_ps,
            policy=model_train_policy,
            primary_metric_name=hyperparameter_sampling_settings.get(
                "primary_metric", None),
            primary_metric_goal=PrimaryMetricGoal.MINIMIZE
            if "min" in hyperparameter_sampling_settings.get(
                "primary_metric_goal", None) else PrimaryMetricGoal.MAXIMIZE,
            max_total_runs=hyperparameter_sampling_settings.get(
                "max_total_runs", 1),
            max_concurrent_runs=hyperparameter_sampling_settings.get(
                "max_concurrent_runs", 1),
            max_duration_minutes=hyperparameter_sampling_settings.get(
                "max_duration_minutes", None))

        # Create HyperDriveStep
        print("Creating HyperDriveStep")
        model_train = HyperDriveStep(
            name=model_train_settings.get("step_name", None),
            hyperdrive_config=model_train_hyperdrive_config,
            estimator_entry_script_arguments=model_train_settings.get(
                "arguments", None),
            inputs=[model_train_input],
            outputs=[model_train_output],
            allow_reuse=model_train_settings.get("allow_reuse", True),
            version=model_train_settings.get("version", True))
    except:
        print("Not all required parameters specified for HyperDrive step")

        # Create EstimatorStep
        print("Creating EstimatorStep")
        model_train = EstimatorStep(
            name=model_train_settings.get("step_name", None),
            estimator=model_train_estimator,
            estimator_entry_script_arguments=model_train_settings.get(
                "arguments", None),
            inputs=[model_train_input],
            outputs=[model_train_output],
            compute_target=model_train_compute_target,
            allow_reuse=model_train_settings.get("allow_reuse", True),
            version=model_train_settings.get("version", True))

    #########################
    ### Creating Pipeline ###
    #########################

    # Create Pipeline
    print("Creating Pipeline")
    pipeline = Pipeline(
        workspace=workspace,
        steps=[model_train],
        description="Training Pipeline",
    )

    # Validate pipeline
    print("Validating pipeline")
    pipeline.validate()

    return pipeline
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group,
    )
    print(f"get_workspace:{aml_workspace}")

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print(f"aml_compute:{aml_compute}")

    # Create a reusable Azure ML environment
    environment = get_environment(
        aml_workspace,
        e.aml_env_name,
        conda_dependencies_file=e.aml_env_train_conda_dep_file,
        create_new=e.rebuild_env,
    )  #
    run_config = RunConfiguration()
    run_config.environment = environment

    if e.datastore_name:
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name
    run_config.environment.environment_variables[
        "DATASTORE_NAME"] = datastore_name  # NOQA: E501

    # datastore and dataset names are fixed for this pipeline, however
    # data_file_path can be specified for registering new versions of dataset
    # Note that AML pipeline parameters don't take empty string as default, "" won't work  # NOQA: E501
    model_name_param = PipelineParameter(
        name="model_name", default_value=e.model_name)  # NOQA: E501
    data_file_path_param = PipelineParameter(
        name="data_file_path", default_value="nopath")  # NOQA: E501
    ml_params = PipelineParameter(name="ml_params",
                                  default_value="default")  # NOQA: E501

    # Create a PipelineData to pass data between steps
    pipeline_data = PipelineData(
        "pipeline_data", datastore=aml_workspace.get_default_datastore())

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train/train_aml.py",
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        outputs=[pipeline_data],
        arguments=[
            "--model_name",
            model_name_param,
            "--step_output",
            pipeline_data,
            "--data_file_path",
            data_file_path_param,
            "--dataset_name",
            e.processed_dataset_name,
            "--datastore_name",
            datastore_name,
            "--ml_params",
            ml_params,
        ],
        runconfig=run_config,
        allow_reuse=True,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name="evaluate/evaluate_model.py",
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--model_name",
            model_name_param,
            "--ml_params",
            ml_params,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name="register/register_model.py",
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[pipeline_data],
        arguments=[
            "--model_name",
            model_name_param,
            "--step_input",
            pipeline_data,
            "--ml_params",
            ml_params,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")

    evaluate_step.run_after(train_step)
    register_step.run_after(evaluate_step)
    steps = [train_step, evaluate_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.training_pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id,
    )
    print(f"Published pipeline: {published_pipeline.name}")
    print(f"for build {published_pipeline.version}")
Exemplo n.º 15
0
def main():
    e = Env()
    print(e.workspace_name)

    svc_pr = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID"),
        service_principal_id=os.environ.get("AZURE_SP_ID"),
        service_principal_password=os.environ.get("AZURE_SP_PASSWORD"))

    # Get Azure machine learning workspace
    ws = Workspace.get(name=os.environ.get("WORKSPACE_NAME"),
                       subscription_id=os.environ.get("SUBSCRIPTION_ID"),
                       resource_group=os.environ.get("AZURE_RESOURCE_GROUP"),
                       auth=svc_pr)

    #ex = Experiment(ws, 'iris-pipeline')
    #ex.archive()

    print("get_workspace:")
    print(ws)
    ws.write_config(path="", file_name="config.json")
    print("writing config.json.")

    # Get Azure machine learning cluster
    aml_compute = get_compute(ws, "train-cluster", "STANDARD_DS2_V2")
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azureml-pipeline', 'azure-storage',
            'azure-storage-blob', 'azureml-dataprep'
        ]))
    run_config.environment.docker.enabled = True

    ######### TRAIN ################
    train_step = PythonScriptStep(
        name="Train",
        source_directory="models/python/iris/train",
        script_name="train.py",
        compute_target=aml_compute,
        arguments=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Train Step created")

    ######### EVALUATE ################
    evaluate_step = PythonScriptStep(
        name="Evaluate",
        source_directory="models/python/iris/evaluate",
        script_name="evaluate.py",
        compute_target=aml_compute,
        arguments=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Evaluate Step created")

    ######### REGISTER ################
    register_step = PythonScriptStep(
        name="Register",
        source_directory="models/python/iris/register",
        script_name="register.py",
        compute_target=aml_compute,
        arguments=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Register Step created")

    #evaluate_step.run_after(train_step)
    register_step.run_after(train_step)
    steps = [train_step, register_step]
    train_pipeline = Pipeline(workspace=ws, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()

    published_pipeline = train_pipeline.publish(name="iris-pipeline",
                                                description="")
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

    pipeline_parameters = {"model_name": "iris-pipeline-param"}
    run = published_pipeline.submit(ws, "iris-pipeline-experiment",
                                    pipeline_parameters)
Exemplo n.º 16
0
def main():
    # e = Env()
    # print(e.workspace_name)

    # svc_pr = ServicePrincipalAuthentication(
    # tenant_id=os.environ.get("TENANT_ID"),
    # service_principal_id=os.environ.get("AZURE_SP_ID"),
    # service_principal_password=os.environ.get("AZURE_SP_PASSWORD"))

    # # Get Azure machine learning workspace
    # ws = Workspace.get(
    #     name=os.environ.get("WORKSPACE_NAME"),
    #     subscription_id=os.environ.get("SUBSCRIPTION_ID"),
    #     resource_group=os.environ.get("AZURE_RESOURCE_GROUP")
    #     ,auth=svc_pr
    # )

    #ex = Experiment(ws, 'iris-pipeline')
    #ex.archive()

    ws = Workspace.from_config()

    print("get_workspace:")
    print(ws)
    # ws.write_config(path="", file_name="config.json")
    print("writing config.json.")

    # Get Azure machine learning cluster
    aml_compute = get_compute(ws, compute_name='cpu1', vm_size='STANDARD_D1')

    # Data stores
    data_dir = "pipelines/modelout"
    def_data_store = ws.get_default_datastore()
    output_dir = PipelineData(name="scores",
                              datastore=def_data_store,
                              output_path_on_compute=data_dir)

    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azureml-pipeline', 'azure-storage',
            'azure-storage-blob', 'azureml-dataprep'
        ]))
    run_config.environment.docker.enabled = True

    ######### TRAIN ################

    # model_path  = "outputs/model.pkl"
    # data_dir = "./outputs/pipelines/modelout/"
    # train_step = PythonScriptStep(
    #     name="Train",
    #     source_directory="./",
    #     script_name="train.py",
    #     compute_target=aml_compute,
    #     arguments=["--model_path", model_path,
    #      "--data_dir",data_dir],
    #     outputs=[output_dir],
    #     runconfig=run_config,
    #     allow_reuse=False,
    # )
    # print("Train Step created")

    ######### REGISTER ################
    # model_path = "trained-model/model.pkl"
    # register_step = PythonScriptStep(
    #     name="Register",
    #     source_directory="./",
    #     script_name="register.py",
    #     compute_target=aml_compute,
    #     arguments=["--model_path", model_path],
    #     inputs=[output_dir],
    #     runconfig=run_config,
    #     allow_reuse=False,
    # )
    # print("Register Step created")

    ######### DEPLOY ################

    # print("Uploading entry script")
    # score_path = "./deploy/deploy.py"
    # datastore = ws.get_default_datastore()
    # datastore.upload_files(files = [model_path], target_path = 'deploy/', overwrite = True,show_progress = True)
    # print("done!")

    deploy_step = PythonScriptStep(
        name="Deploy",
        source_directory="./deploy",
        script_name="deploy.py",
        compute_target=aml_compute,
        arguments=[],
        inputs=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Deploy Step created")

    #evaluate_step.run_after(train_step)
    # register_step.run_after(deploy_step)
    steps = [deploy_step]
    train_pipeline = Pipeline(workspace=ws, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()

    published_pipeline = train_pipeline.publish(name="aks-deployment-pipeline",
                                                description="")
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

    pipeline_parameters = {"model_name": "sklearn_regression_model.pkl"}
    run = published_pipeline.submit(ws, "compute-instance-pipeline-experiment",
                                    pipeline_parameters)
def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
    resource_group = os.environ.get("BASE_NAME") + "-AML-RG"
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
    data_path = os.environ.get("DATA_PATH_DATASTORE")
    model_data_path = os.environ.get("MODEL_DATA_PATH_DATASTORE")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, compute_name, vm_size)
    if aml_compute is not None:
        print(aml_compute)

    model_name = PipelineParameter(name="model_name", default_value=model_name)
    release_id = PipelineParameter(name="release_id", default_value="0")

    ds = aml_workspace.get_default_datastore()

    dataref_folder = ds.path(data_path).as_mount()
    model_dataref = ds.path(model_data_path).as_mount()

    # NEED those two folders mounted on datastore and env variables specified in variable groups

    #ds.upload(src_dir='./VOCdevkit', target_path='VOCdevkit', overwrite=True, show_progress=True)
    #ds.upload(src_dir='./model_data', target_path='VOCmodel_data', overwrite=True, show_progress=True)

    yoloEstimator = TensorFlow(
        source_directory=sources_directory_train + '/training',
        compute_target=aml_compute,
        entry_script=train_script_path,
        pip_packages=[
            'keras', 'pillow', 'matplotlib', 'onnxmltools', 'keras2onnx==1.5.1'
        ],  # recent versions of keras2onnx give conversion issues 
        use_gpu=True,
        framework_version='1.13')

    train_step = EstimatorStep(name="Train & Convert Model",
                               estimator=yoloEstimator,
                               estimator_entry_script_arguments=[
                                   "--release_id", release_id, "--model_name",
                                   model_name, "--data_folder", dataref_folder,
                                   "--model_path", model_dataref
                               ],
                               runconfig_pipeline_params=None,
                               inputs=[dataref_folder, model_dataref],
                               compute_target=aml_compute,
                               allow_reuse=False)
    print("Step Train & Convert created")

    train_pipeline = Pipeline(workspace=aml_workspace, steps=[train_step])
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=pipeline_name,
        description="Model training/retraining pipeline",
        version=build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
def main():
    e = Env()
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group
    )
    print("get_workspace:")
    print(aml_workspace)

    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    environment = get_environment(
        aml_workspace, e.aml_env_name, create_new=e.rebuild_env)
    run_config = RunConfiguration()
    run_config.environment = environment

    if (e.datastore_name):
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name

    run_config.environment.environment_variables["DATASTORE_NAME"] \
        = datastore_name

    dataset_name = e.dataset_name
    file_name = e.file_name
    datastore = Datastore.get(aml_workspace, datastore_name)

    if (dataset_name not in aml_workspace.datasets):
        raise Exception("Could not find dataset at \"%s\"." % dataset_name)
    else:
        dataset = Dataset.get_by_name(aml_workspace, name=dataset_name)
        dataset.download(target_path='.', overwrite=True)
        datastore.upload_files([file_name],
                               target_path=dataset_name,
                               overwrite=True)

    raw_data_file = DataReference(datastore=datastore,
                                  data_reference_name="Raw_Data_File",
                                  path_on_datastore=dataset_name + '/'
                                  + file_name)

    clean_data_file = PipelineParameter(name="clean_data_file",
                                        default_value="/clean_data.csv")
    clean_data_folder = PipelineData("clean_data_folder",
                                     datastore=datastore)

    prepDataStep = PythonScriptStep(name="Prepare Data",
                                    source_directory=e.sources_directory_train,
                                    script_name=e.data_prep_script_path,
                                    arguments=["--raw_data_file",
                                               raw_data_file,
                                               "--clean_data_folder",
                                               clean_data_folder,
                                               "--clean_data_file",
                                               clean_data_file],
                                    inputs=[raw_data_file],
                                    outputs=[clean_data_folder],
                                    compute_target=aml_compute,
                                    allow_reuse=False)

    print("Step Prepare Data created")

    new_model_file = PipelineParameter(name="new_model_file ",
                                       default_value='/' + e.model_name
                                       + '.pkl')
    new_model_folder = PipelineData("new_model_folder", datastore=datastore)
    est = SKLearn(source_directory=e.sources_directory_train,
                  entry_script=e.train_script_path,
                  pip_packages=['azureml-sdk', 'scikit-learn==0.20.3',
                                'azureml-dataprep[pandas,fuse]>=1.1.14'],
                  compute_target=aml_compute)

    trainingStep = EstimatorStep(
        name="Model Training",
        estimator=est,
        estimator_entry_script_arguments=["--clean_data_folder",
                                          clean_data_folder,
                                          "--new_model_folder",
                                          new_model_folder,
                                          "--clean_data_file",
                                          clean_data_file.default_value,
                                          "--new_model_file",
                                          new_model_file.default_value],
        runconfig_pipeline_params=None,
        inputs=[clean_data_folder],
        outputs=[new_model_folder],
        compute_target=aml_compute,
        allow_reuse=False)

    print("Step Train created")

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)

    evaluateStep = PythonScriptStep(
        name="Evaluate Model",
        source_directory=e.sources_directory_train,
        script_name=e.evaluate_script_path,
        arguments=["--model_name", model_name_param],
        compute_target=aml_compute,
        allow_reuse=False)

    print("Step Evaluate created")

    registerStep = PythonScriptStep(
        name="Register Model",
        source_directory=e.sources_directory_train,
        script_name=e.register_script_path,
        arguments=["--new_model_folder", new_model_folder,
                   "--new_model_file", new_model_file,
                   "--model_name", model_name_param],
        inputs=[new_model_folder],
        compute_target=aml_compute,
        allow_reuse=False)

    print("Step Register created")

    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        trainingStep.run_after(prepDataStep)
        evaluateStep.run_after(trainingStep)
        registerStep.run_after(evaluateStep)
    else:
        print("Exclude evaluation step and directly run register step.")
        trainingStep.run_after(prepDataStep)
        registerStep.run_after(trainingStep)

    pipeline = Pipeline(workspace=aml_workspace, steps=[registerStep])
    pipeline.validate()
    print("Pipeline is built")

    pipeline._set_experiment_name
    published_pipeline = pipeline.publish(
        name=e.pipeline_name,
        description="Predict Employee Retention Model training pipeline",
        version=e.build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 19
0
                                           destination=def_blob_store)

train_step = PythonScriptStep(
    script_name=train_entry_point,
    source_directory=train_source_dir,
    arguments=["--input_data", ds_input],
    compute_target=compute_target,  # , "--training_results", training_results
    runconfig=aml_run_config,
    allow_reuse=False)

compare_models = [train_step]

# Build the pipeline
pipeline1 = Pipeline(workspace=ws, steps=train_step)

pipeline1.validate()
print("Pipeline validation complete")

# Submit the pipeline to be run
pipeline_run1 = Experiment(ws, 'Titanic_Pipeline_Notebook').submit(pipeline1)
pipeline_run1.wait_for_completion()

# RunDetails(pipeline_run1).show()

step_runs = pipeline_run1.get_children()
for step_run in step_runs:
    status = step_run.get_status()
    print('Script:', step_run.name, 'status:', status)

    # Change this if you want to see details even if the Step has succeeded.
    if status == "Failed":
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a run configuration environment
    conda_deps_file = "diabetes_regression/training_dependencies.yml"
    conda_deps = CondaDependencies(conda_deps_file)
    run_config = RunConfiguration(conda_dependencies=conda_deps)
    run_config.environment.docker.enabled = True
    config_envvar = {}
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        config_envvar["BUILDURI_BASE"] = builduri_base
    run_config.environment.environment_variables = config_envvar

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=e.build_id)

    dataset_name = ""
    if (e.datastore_name is not None and e.datafile_name is not None):
        dataset_name = e.dataset_name
        datastore = Datastore.get(aml_workspace, e.datastore_name)
        data_path = [(datastore, e.datafile_name)]
        dataset = Dataset.Tabular.from_delimited_files(path=data_path)
        dataset.register(workspace=aml_workspace,
                         name=e.dataset_name,
                         description="dataset with training data",
                         create_new_version=True)

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--dataset_name",
            dataset_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--allow_run_cancel",
            e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 21
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group,
    )
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable Azure ML environment
    environment = get_environment(
        aml_workspace,
        e.aml_env_name,
        conda_dependencies_file=e.aml_env_train_conda_dep_file,
        create_new=e.rebuild_env,
    )  #
    run_config = RunConfiguration()
    run_config.environment = environment

    if e.datastore_name:
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name
    run_config.environment.environment_variables[
        "DATASTORE_NAME"] = datastore_name  # NOQA: E501

    model_name_param = PipelineParameter(
        name="model_name", default_value=e.model_name)  # NOQA: E501
    dataset_version_param = PipelineParameter(name="dataset_version",
                                              default_value=e.dataset_version)
    data_file_path_param = PipelineParameter(name="data_file_path",
                                             default_value="none")
    caller_run_id_param = PipelineParameter(name="caller_run_id",
                                            default_value="none")  # NOQA: E501

    # Get dataset name
    dataset_name = e.dataset_name

    # Check to see if dataset exists
    if dataset_name not in aml_workspace.datasets:
        # This call creates an example CSV from sklearn sample data. If you
        # have already bootstrapped your project, you can comment this line
        # out and use your own CSV.
        create_sample_data_csv()

        # Use a CSV to read in the data set.
        file_name = "automobile.csv"

        if not os.path.exists(file_name):
            raise Exception(
                'Could not find CSV dataset at "%s". If you have bootstrapped your project, you will need to provide a CSV.'  # NOQA: E501
                % file_name)  # NOQA: E501

        # Upload file to default datastore in workspace
        datatstore = Datastore.get(aml_workspace, datastore_name)
        target_path = "training-data/"
        datatstore.upload_files(
            files=[file_name],
            target_path=target_path,
            overwrite=True,
            show_progress=False,
        )

        # Register dataset
        path_on_datastore = os.path.join(target_path, file_name)
        dataset = Dataset.Tabular.from_delimited_files(
            path=(datatstore, path_on_datastore))
        dataset = dataset.register(
            workspace=aml_workspace,
            name=dataset_name,
            description="automobile training data",
            tags={"format": "CSV"},
            create_new_version=True,
        )

    # Create a PipelineData to pass data between steps
    pipeline_data = PipelineData(
        "pipeline_data", datastore=aml_workspace.get_default_datastore())

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        outputs=[pipeline_data],
        arguments=[
            "--model_name",
            model_name_param,
            "--step_output",
            pipeline_data,
            "--dataset_version",
            dataset_version_param,
            "--data_file_path",
            data_file_path_param,
            "--caller_run_id",
            caller_run_id_param,
            "--dataset_name",
            dataset_name,
        ],
        runconfig=run_config,
        allow_reuse=True,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--model_name",
            model_name_param,
            "--allow_run_cancel",
            e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[pipeline_data],
        arguments=[
            "--model_name",
            model_name_param,
            "--step_input",
            pipeline_data,
        ],  # NOQA: E501
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if (e.run_evaluation).lower() == "true":
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id,
    )
    print(f"Published pipeline: {published_pipeline.name}")
    print(f"for build {published_pipeline.version}")
Exemplo n.º 22
0
def build_pipeline(dataset, ws, config):
    print("building pipeline for dataset %s in workspace %s" %
          (dataset, ws.name))

    hostname = socket.gethostname()
    if hostname == 'wopauliNC6':
        base_dir = '.'
    else:
        base_dir = '.'

    def_blob_store = ws.get_default_datastore()

    # folder for scripts that need to be uploaded to Aml compute target
    script_folder = './scripts'
    os.makedirs(script_folder, exist_ok=True)

    shutil.copy(os.path.join(base_dir, 'video_decoding.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_submit.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'pipelines_build.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'train.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'prednet.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'keras_utils.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'data_preparation.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'model_registration.py'), script_folder)
    shutil.copy(os.path.join(base_dir, 'config.json'), script_folder)

    cpu_compute_name = config['cpu_compute']
    try:
        cpu_compute_target = AmlCompute(ws, cpu_compute_name)
        print("found existing compute target: %s" % cpu_compute_name)
    except ComputeTargetException:
        print("creating new compute target")

        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_D2_V2',
            max_nodes=4,
            idle_seconds_before_scaledown=1800)
        cpu_compute_target = ComputeTarget.create(ws, cpu_compute_name,
                                                  provisioning_config)
        cpu_compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster.
    print(cpu_compute_target.get_status().serialize())

    # choose a name for your cluster
    gpu_compute_name = config['gpu_compute']

    try:
        gpu_compute_target = AmlCompute(workspace=ws, name=gpu_compute_name)
        print("found existing compute target: %s" % gpu_compute_name)
    except ComputeTargetException:
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6',
            max_nodes=5,
            idle_seconds_before_scaledown=1800)

        # create the cluster
        gpu_compute_target = ComputeTarget.create(ws, gpu_compute_name,
                                                  provisioning_config)

        # can poll for a minimum number of nodes and for a specific timeout.
        # if no min node count is provided it uses the scale settings for the cluster
        gpu_compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=20)

    # use get_status() to get a detailed status for the current cluster.
    print(gpu_compute_target.get_status().serialize())

    # conda dependencies for compute targets
    cpu_cd = CondaDependencies.create(conda_packages=["py-opencv=3.4.2"],
                                      pip_packages=[
                                          "azure-storage-blob==1.5.0",
                                          "hickle==3.4.3", "requests==2.21.0",
                                          "sklearn", "pandas==0.24.2",
                                          "azureml-sdk==1.0.21",
                                          "numpy==1.16.2", "pillow==6.0.0"
                                      ])
    gpu_cd = CondaDependencies.create(pip_packages=[
        "keras==2.0.8", "theano==1.0.4", "tensorflow==1.8.0",
        "tensorflow-gpu==1.8.0", "hickle==3.4.3", "matplotlib==3.0.3",
        "seaborn==0.9.0", "requests==2.21.0", "bs4==0.0.1", "imageio==2.5.0",
        "sklearn", "pandas==0.24.2", "azureml-sdk==1.0.21", "numpy==1.16.2"
    ])

    # Runconfigs
    cpu_compute_run_config = RunConfiguration(conda_dependencies=cpu_cd)
    cpu_compute_run_config.environment.docker.enabled = True
    cpu_compute_run_config.environment.docker.gpu_support = False
    cpu_compute_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE
    cpu_compute_run_config.environment.spark.precache_packages = False

    gpu_compute_run_config = RunConfiguration(conda_dependencies=gpu_cd)
    gpu_compute_run_config.environment.docker.enabled = True
    gpu_compute_run_config.environment.docker.gpu_support = True
    gpu_compute_run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE
    gpu_compute_run_config.environment.spark.precache_packages = False

    print("PipelineData object created")

    video_data = DataReference(datastore=def_blob_store,
                               data_reference_name="video_data",
                               path_on_datastore=os.path.join(
                                   "prednet", "data", "video", dataset))

    # Naming the intermediate data as processed_data1 and assigning it to the variable processed_data1.
    raw_data = PipelineData("raw_video_fames", datastore=def_blob_store)
    preprocessed_data = PipelineData("preprocessed_video_frames",
                                     datastore=def_blob_store)
    data_metrics = PipelineData("data_metrics", datastore=def_blob_store)
    data_output = PipelineData("output_data", datastore=def_blob_store)

    print("DataReference object created")

    # prepare dataset for training/testing prednet
    video_decoding = PythonScriptStep(
        name='decode_videos',
        script_name="video_decoding.py",
        arguments=["--input_data", video_data, "--output_data", raw_data],
        inputs=[video_data],
        outputs=[raw_data],
        compute_target=cpu_compute_target,
        source_directory=script_folder,
        runconfig=cpu_compute_run_config,
        allow_reuse=True,
        hash_paths=['.'])
    print("video_decode created")

    # prepare dataset for training/testing recurrent neural network
    data_prep = PythonScriptStep(name='prepare_data',
                                 script_name="data_preparation.py",
                                 arguments=[
                                     "--input_data", raw_data, "--output_data",
                                     preprocessed_data
                                 ],
                                 inputs=[raw_data],
                                 outputs=[preprocessed_data],
                                 compute_target=cpu_compute_target,
                                 source_directory=script_folder,
                                 runconfig=cpu_compute_run_config,
                                 allow_reuse=True,
                                 hash_paths=['.'])
    data_prep.run_after(video_decoding)

    print("data_prep created")

    est = TensorFlow(source_directory=script_folder,
                     compute_target=gpu_compute_target,
                     pip_packages=[
                         'keras==2.0.8', 'theano', 'tensorflow==1.8.0',
                         'tensorflow-gpu==1.8.0', 'matplotlib', 'horovod',
                         'hickle'
                     ],
                     entry_script='train.py',
                     use_gpu=True,
                     node_count=1)

    ps = RandomParameterSampling({
        '--batch_size':
        choice(2, 4, 8, 16),
        '--filter_sizes':
        choice("3, 3, 3", "4, 4, 4", "5, 5, 5"),
        '--stack_sizes':
        choice("48, 96, 192", "36, 72, 144", "12, 24, 48"),  #, "48, 96"),
        '--learning_rate':
        loguniform(-6, -1),
        '--lr_decay':
        loguniform(-9, -1),
        '--freeze_layers':
        choice("0, 1, 2", "1, 2, 3", "0, 1", "1, 2", "2, 3", "0", "1", "2",
               "3"),
        '--transfer_learning':
        choice("True", "False")
    })

    policy = BanditPolicy(evaluation_interval=2,
                          slack_factor=0.1,
                          delay_evaluation=20)

    hdc = HyperDriveRunConfig(
        estimator=est,
        hyperparameter_sampling=ps,
        policy=policy,
        primary_metric_name='val_loss',
        primary_metric_goal=PrimaryMetricGoal.MINIMIZE,
        max_total_runs=5,  #100,
        max_concurrent_runs=5,  #10,
        max_duration_minutes=60 * 6)

    hd_step = HyperDriveStep(name="train_w_hyperdrive",
                             hyperdrive_run_config=hdc,
                             estimator_entry_script_arguments=[
                                 '--data-folder', preprocessed_data,
                                 '--remote_execution'
                             ],
                             inputs=[preprocessed_data],
                             metrics_output=data_metrics,
                             allow_reuse=True)
    hd_step.run_after(data_prep)

    registration_step = PythonScriptStep(
        name='register_model',
        script_name='model_registration.py',
        arguments=['--input_dir', data_metrics, '--output_dir', data_output],
        compute_target=gpu_compute_target,
        inputs=[data_metrics],
        outputs=[data_output],
        source_directory=script_folder,
        allow_reuse=True,
        hash_paths=['.'])
    registration_step.run_after(hd_step)

    pipeline = Pipeline(
        workspace=ws,
        steps=[video_decoding, data_prep, hd_step, registration_step])
    print("Pipeline is built")

    pipeline.validate()
    print("Simple validation complete")

    pipeline_name = 'prednet_' + dataset
    pipeline.publish(name=pipeline_name)

    return pipeline_name
Exemplo n.º 23
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True

    config_envvar = {}
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        config_envvar["BUILDURI_BASE"] = builduri_base
    run_config.environment.environment_variables = config_envvar

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=e.build_id)
    hyperparameter_alpha_param = PipelineParameter(name="hyperparameter_alpha",
                                                   default_value=0.5)

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--alpha",
            hyperparameter_alpha_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")

    evaluate_step.run_after(train_step)
    register_step.run_after(evaluate_step)
    steps = [train_step, evaluate_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 24
0
def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
    resource_group = os.environ.get("BASE_NAME") + "-AML-RG"
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, compute_name, vm_size)
    if aml_compute is not None:
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True

    model_name = PipelineParameter(name="model_name", default_value=model_name)
    release_id = PipelineParameter(name="release_id", default_value="0")

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=train_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--release_id",
            release_id,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=evaluate_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--release_id",
            release_id,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    evaluate_step.run_after(train_step)
    steps = [evaluate_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=pipeline_name,
        description="Model training/retraining pipeline",
        version=build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemplo n.º 25
0
)

print("trainStep created")


# ### Create and Validate the Pipeline
# 
# Note that the *trainStep* has implicit data dependency with the *processTrainDataStep* and thus you only include the *trainStep* in your Pipeline object. You will observe that when you run the pipeline that it will first run the **processTrainDataStep** followed by the **trainStep**.

# In[ ]:


pipeline = Pipeline(workspace=ws, steps=[trainStep])
print ("Pipeline is built")

pipeline.validate()
print("Simple validation complete")


# ### Submit the Pipeline
# 
# At this point you can run the pipeline and examine the output it produced.

# In[ ]:


pipeline_run = Experiment(ws, experiment_name).submit(pipeline)
print("Pipeline is submitted for execution")


# ### Monitor the Run Details
Exemplo n.º 26
0
def main():
    cluster_id = os.environ.get("DATABRICKS_CLUSTER_ID", None)

    # If databricks_cluster_id is not None, but it's an empty string: its None
    if cluster_id is not None and not cluster_id:
        cluster_id = None

    workspace_name = os.environ.get("AML_WORKSPACE_NAME", None)
    resource_group = os.environ.get("RESOURCE_GROUP", None)
    subscription_id = os.environ.get("SUBSCRIPTION_ID", None)
    tenant_id = os.environ.get("TENANT_ID", None)
    app_id = os.environ.get("SP_APP_ID", None)
    app_secret = os.environ.get("SP_APP_SECRET", None)
    experiment_subfolder = os.environ.get("EXPERIMENT_FOLDER",
                                          'aml_service/experiment')
    sources_directory = os.environ.get("SOURCES_DIR", None)
    experiment_folder = os.path.join(sources_directory, experiment_subfolder)
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH", None)
    databricks_workspace_name = os.environ.get("DATABRICKS_WORKSPACE_NAME",
                                               None)
    databricks_access_token = os.environ.get("DATABRICKS_ACCESS_TOKEN", None)
    databricks_compute_name_aml = os.environ.get("DATABRICKS_COMPUTE_NAME_AML",
                                                 None)
    model_dir = os.environ.get("MODEL_DIR", 'dbfs:/model')
    model_name = os.environ.get("MODEL_NAME", 'torchcnn')

    path_components = model_dir.split("/", 1)
    model_path = "/dbfs/" + path_components[1] + "/" + model_name + ".pth"

    print("The model path will be %s" % (model_path))

    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    databricks_compute = get_compute(aml_workspace,
                                     databricks_compute_name_aml,
                                     resource_group, databricks_workspace_name,
                                     databricks_access_token)
    print(databricks_compute)

    step1 = DatabricksStep(name="DBPythonInLocalMachine",
                           num_workers=1,
                           python_script_name=train_script_path,
                           source_directory=sources_directory,
                           run_name='DB_Python_Local_demo',
                           existing_cluster_id=cluster_id,
                           compute_target=databricks_compute,
                           allow_reuse=False,
                           python_script_params=['--MODEL_PATH', model_path])

    step2 = DatabricksStep(name="RegisterModel",
                           num_workers=1,
                           python_script_name="register_model.py",
                           source_directory=experiment_folder,
                           run_name='Register_model',
                           existing_cluster_id=cluster_id,
                           compute_target=databricks_compute,
                           allow_reuse=False,
                           python_script_params=[
                               '--MODEL_PATH', model_path, '--TENANT_ID',
                               tenant_id, '--APP_ID', app_id, '--APP_SECRET',
                               app_secret, '--MODEL_NAME', model_name
                           ])

    step2.run_after(step1)
    print("Step lists created")

    pipeline = Pipeline(
        workspace=aml_workspace,
        # steps=[step1])
        steps=[step1, step2])
    print("Pipeline is built")

    pipeline.validate()
    print("Pipeline validation complete")

    pipeline_run = pipeline.submit(experiment_name="pipetest")

    print("Pipeline is submitted for execution")

    pipeline_details = pipeline_run.get_details()

    pipeline_run_id = pipeline_details['runId']

    azure_run_url = get_experiment_run_url(subscription_id, resource_group,
                                           workspace_name, pipeline_run_id)

    print("To check details of the Pipeline run, go to " + azure_run_url)

    pipeline_status = pipeline_run.get_status()

    timer_mod = 0

    while pipeline_status == 'Running' or pipeline_status == 'NotStarted':
        timer_mod = timer_mod + 10
        time.sleep(10)
        if (timer_mod % 30) == 0:
            print("Status: %s. %s seconds have passed." %
                  (pipeline_status, timer_mod))
        pipeline_status = pipeline_run.get_status()

    if pipeline_status == 'Failed':
        print("AML Pipelne failed. Check %s for details." % (azure_run_url))
        sys.exit(1)
    else:
        print(pipeline_status)

    print("Pipeline completed")
def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
    resource_group = "AML-RG-" + os.environ.get("BASE_NAME")
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
    vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU")
    compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    aks_name = os.environ.get("AKS_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")
    build_id = os.environ.get("BUILD_BUILDID")
    pipeline_name = os.environ.get("TRAINING_PIPELINE_NAME")
    experiment_name = os.environ.get("EXPERIMENT_NAME")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, compute_name, vm_size)
    if aml_compute is not None:
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=['numpy', 'pandas', 'scikit-learn', 'keras'],
        pip_packages=[
            'azure', 'azureml-sdk', 'azure-storage', 'azure-storage-blob',
            'transformers>=2.1.1', 'tensorflow>=2.0.0', 'tensorflow-gpu>=2.0.0'
        ]))
    run_config.environment.docker.enabled = True

    datastore_name = 'tfworld'
    container_name = 'azure-service-classifier'
    account_name = 'johndatasets'
    sas_token = '?sv=2019-02-02&ss=bfqt&srt=sco&sp=rl&se=2021-06-02T03:40:25Z&st=2020-03-09T19:40:25Z&spr=https&sig=bUwK7AJUj2c%2Fr90Qf8O1sojF0w6wRFgL2c9zMVCWNPA%3D'

    try:
        existing_datastore = Datastore.get(aml_workspace, datastore_name)
    except:  # noqa: E722
        existing_datastore = Datastore \
            .register_azure_blob_container(workspace=aml_workspace,
                                           datastore_name=datastore_name,
                                           container_name=container_name,
                                           account_name=account_name,
                                           sas_token=sas_token
                                           )

    azure_dataset = Dataset.File.from_files(path=(existing_datastore, 'data'))
    azure_dataset = azure_dataset.register(
        workspace=aml_workspace,
        name='Azure Services Dataset',
        description='Dataset containing azure related posts on Stackoverflow',
        create_new_version=True)

    azure_dataset.to_path()
    input_data = azure_dataset.as_named_input('input_data1').as_mount(
        '/tmp/data')

    model_name = PipelineParameter(name="model_name", default_value=model_name)
    max_seq_length = PipelineParameter(name="max_seq_length",
                                       default_value=128)
    learning_rate = PipelineParameter(name="learning_rate", default_value=3e-5)
    num_epochs = PipelineParameter(name="num_epochs", default_value=3)
    export_dir = PipelineParameter(name="export_dir",
                                   default_value="./outputs/exports")
    batch_size = PipelineParameter(name="batch_size", default_value=32)
    steps_per_epoch = PipelineParameter(name="steps_per_epoch",
                                        default_value=100)

    # initialize the TensorFlow estimator
    estimator = TensorFlow(source_directory=sources_directory_train,
                           entry_script=train_script_path,
                           compute_target=aml_compute,
                           framework_version='2.0',
                           use_gpu=True,
                           pip_packages=[
                               'transformers==2.0.0',
                               'azureml-dataprep[fuse,pandas]==1.3.0'
                           ])

    train_step = EstimatorStep(
        name="Train Model",
        estimator=estimator,
        estimator_entry_script_arguments=[
            "--data_dir", input_data, "--max_seq_length", max_seq_length,
            "--learning_rate", learning_rate, "--num_epochs", num_epochs,
            "--export_dir", export_dir, "--batch_size", batch_size,
            "--steps_per_epoch", steps_per_epoch
        ],
        compute_target=aml_compute,
        inputs=[input_data],
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=evaluate_script_path,
        compute_target=aml_compute,
        source_directory=sources_directory_train,
        arguments=[
            "--model_name",
            model_name,
            "--build_id",
            build_id,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    # Currently, the Evaluate step will automatically register
    # the model if it performs better. This step is based on a
    # previous version of the repo which utilized JSON files to
    # track evaluation results.

    evaluate_step.run_after(train_step)
    steps = [evaluate_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=pipeline_name,
        description="Model training/retraining pipeline",
        version=build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

    response = published_pipeline.submit(  # noqa: F841
        workspace=aml_workspace,
        experiment_name=experiment_name)

    # Get AKS cluster for deployment
    aks_compute = get_aks(aml_workspace, aks_name)
    if aks_compute is not None:
        print(aks_compute)
Exemplo n.º 28
0
def main():
    load_dotenv()
    workspace_name = os.environ.get("BASE_NAME") + "-AML-WS"
    resource_group = os.environ.get("BASE_NAME") + "-AML-RG"
    subscription_id = os.environ.get("SUBSCRIPTION_ID")
    tenant_id = os.environ.get("TENANT_ID")
    app_id = os.environ.get("SP_APP_ID")
    app_secret = os.environ.get("SP_APP_SECRET")
    sources_directory_train = os.environ.get("SOURCES_DIR_TRAIN")
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH")
    evaluate_script_path = os.environ.get("EVALUATE_SCRIPT_PATH")
    register_script_path = os.environ.get("REGISTER_SCRIPT_PATH")
    vm_size_cpu = os.environ.get("AML_COMPUTE_CLUSTER_CPU_SKU")
    compute_name_cpu = os.environ.get("AML_COMPUTE_CLUSTER_NAME")
    model_name = os.environ.get("MODEL_NAME")

    # Get Azure machine learning workspace
    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute_cpu = get_compute(aml_workspace, compute_name_cpu, vm_size_cpu)
    if aml_compute_cpu is not None:
        print(aml_compute_cpu)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True

    model_name = PipelineParameter(name="model_name", default_value=model_name)
    def_blob_store = Datastore(aml_workspace, "workspaceblobstore")
    jsonconfigs = PipelineData("jsonconfigs", datastore=def_blob_store)
    config_suffix = datetime.datetime.now().strftime("%Y%m%d%H")

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=train_script_path,
        compute_target=aml_compute_cpu,
        source_directory=sources_directory_train,
        arguments=[
            "--config_suffix",
            config_suffix,
            "--json_config",
            jsonconfigs,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        # inputs=[jsonconfigs],
        outputs=[jsonconfigs],
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=evaluate_script_path,
        compute_target=aml_compute_cpu,
        source_directory=sources_directory_train,
        arguments=[
            "--config_suffix",
            config_suffix,
            "--json_config",
            jsonconfigs,
        ],
        runconfig=run_config,
        inputs=[jsonconfigs],
        # outputs=[jsonconfigs],
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_model_step = PythonScriptStep(
        name="Register New Trained Model",
        script_name=register_script_path,
        compute_target=aml_compute_cpu,
        source_directory=sources_directory_train,
        arguments=[
            "--config_suffix",
            config_suffix,
            "--json_config",
            jsonconfigs,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        inputs=[jsonconfigs],
        # outputs=[jsonconfigs],
        allow_reuse=False,
    )
    print("Step register model created")

    evaluate_step.run_after(train_step)
    register_model_step.run_after(evaluate_step)
    steps = [register_model_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name="training-pipeline",
        description="Model training/retraining pipeline")

    train_pipeline_json = {}
    train_pipeline_json["rest_endpoint"] = published_pipeline.endpoint
    json_file_path = "ml_service/pipelines/train_pipeline.json"
    with open(json_file_path, "w") as outfile:
        json.dump(train_pipeline_json, outfile)
Exemplo n.º 29
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable Azure ML environment
    environment = get_environment(aml_workspace,
                                  e.aml_env_name,
                                  create_new=False)  # NOQA: E501

    run_config = RunConfiguration()
    run_config.environment = environment

    if (e.datastore_name):
        datastore_name = e.datastore_name
    else:
        datastore_name = aml_workspace.get_default_datastore().name
    run_config.environment.environment_variables[
        "DATASTORE_NAME"] = datastore_name  # NOQA: E501

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    dataset_version_param = PipelineParameter(name="dataset_version",
                                              default_value=e.dataset_version)
    data_file_path_param = PipelineParameter(name="data_file_path",
                                             default_value="none")
    caller_run_id_param = PipelineParameter(name="caller_run_id",
                                            default_value="none")

    # Get dataset name
    dataset_name = e.dataset_name

    # # Check to see if dataset exists
    # if (dataset_name not in aml_workspace.datasets):
    #     # Create dataset from lacemlops sample data
    #     sample_data = load_lacemlops()
    #     df = pd.DataFrame(
    #         data=sample_data.data,
    #         columns=sample_data.feature_names)
    #     df['Y'] = sample_data.target
    #     file_name = 'lacemlops.csv'
    #     df.to_csv(file_name, index=False)

    #     # Upload file to default datastore in workspace
    #     datatstore = Datastore.get(aml_workspace, datastore_name)
    #     target_path = 'training-data/'
    #     datatstore.upload_files(
    #         files=[file_name],
    #         target_path=target_path,
    #         overwrite=True,
    #         show_progress=False)

    #     # Register dataset
    #     path_on_datastore = os.path.join(target_path, file_name)
    #     dataset = Dataset.Tabular.from_delimited_files(
    #         path=(datatstore, path_on_datastore))
    #     dataset = dataset.register(
    #         workspace=aml_workspace,
    #         name=dataset_name,
    #         description='lacemlops training data',
    #         tags={'format': 'CSV'},
    #         create_new_version=True)

    # Create a PipelineData to pass data between steps
    pipeline_data = PipelineData(
        'pipeline_data', datastore=aml_workspace.get_default_datastore())

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        outputs=[pipeline_data],
        arguments=[
            "--model_name",
            model_name_param,
            "--step_output",
            pipeline_data,
            "--dataset_version",
            dataset_version_param,
            "--data_file_path",
            data_file_path_param,
            "--caller_run_id",
            caller_run_id_param,
            "--dataset_name",
            dataset_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--model_name",
            model_name_param,
            "--allow_run_cancel",
            e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[pipeline_data],
        arguments=[
            "--model_name",
            model_name_param,
            "--step_input",
            pipeline_data,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable run configuration environment
    # Read definition from diabetes_regression/azureml_environment.json
    environment = Environment.load_from_directory(e.sources_directory_train)
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        environment.environment_variables["BUILDURI_BASE"] = builduri_base
    environment.register(aml_workspace)

    run_config = RunConfiguration()
    run_config.environment = environment

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=e.build_id)

    # Get dataset name
    dataset_name = e.dataset_name

    # Check to see if dataset exists
    if (dataset_name not in aml_workspace.datasets):
        # Create dataset from diabetes sample data
        sample_data = load_diabetes()
        df = pd.DataFrame(data=sample_data.data,
                          columns=sample_data.feature_names)
        df['Y'] = sample_data.target
        file_name = 'diabetes.csv'
        df.to_csv(file_name, index=False)

        # Upload file to default datastore in workspace
        default_ds = aml_workspace.get_default_datastore()
        target_path = 'training-data/'
        default_ds.upload_files(files=[file_name],
                                target_path=target_path,
                                overwrite=True,
                                show_progress=False)

        # Register dataset
        path_on_datastore = os.path.join(target_path, file_name)
        dataset = Dataset.Tabular.from_delimited_files(
            path=(default_ds, path_on_datastore))
        dataset = dataset.register(workspace=aml_workspace,
                                   name=dataset_name,
                                   description='diabetes training data',
                                   tags={'format': 'CSV'},
                                   create_new_version=True)

    # Get the dataset
    dataset = Dataset.get_by_name(aml_workspace, dataset_name)

    # Create a PipelineData to pass data between steps
    pipeline_data = PipelineData(
        'pipeline_data', datastore=aml_workspace.get_default_datastore())

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[dataset.as_named_input('training_data')],
        outputs=[pipeline_data],
        arguments=[
            "--build_id", build_id_param, "--model_name", model_name_param,
            "--step_output", pipeline_data
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--allow_run_cancel",
            e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[pipeline_data],
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--step_input",
            pipeline_data,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')