def load_module(workspace, namespace, name, yaml_file_path): try: module_func = Module.load(workspace=workspace, namespace=namespace, name=name) print('found the module of {}'.format(name)) return module_func except: print('not found the module of {}, register it now...'.format(name)) module_func = Module.register(workspace=workspace, yaml_file=yaml_file_path) return module_func
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: try: mpi_train_module_func = Module.load( workspace, namespace="microsoft.com/azureml/samples", name="Hello World MPI Job") except: mpi_train_module_func = Module.register( workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml')) from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') mpi_train = mpi_train_module_func(input_path=blob_input_data, string_parameter="test1") mpi_train.runsettings.configure(node_count=2, process_count_per_node=2) print(mpi_train.runsettings.node_count)
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2", min_nodes = 1, max_nodes = 4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # modules try: ejoin_module_func = Module.load(ws, namespace='microsoft.com/bing', name='ejoin') eselect_module_func = Module.load(ws, namespace='microsoft.com/bing', name='eselect') except: ejoin_module_func = Module.register(ws, os.path.join('modules', 'ejoin', 'amlmodule.yaml')) eselect_module_func = Module.register(ws, os.path.join('modules', 'eselect', 'amlmodule.yaml')) training_data_name = "Titanic.tsv" if training_data_name not in ws.datasets: print('Registering a training dataset for sample pipeline ...') train_data = Dataset.File.from_files(path=['https://desginerdemo.blob.core.windows.net/demo/titanic.tsv']) train_data.register(workspace = ws, name = training_data_name, description = 'Training data (just for illustrative purpose)') print('Registerd') else: train_data = ws.datasets[training_data_name]
# --------------------------------------------------------- # Copyright (c) Microsoft Corporation. All rights reserved. # --------------------------------------------------------- from azureml.core import Workspace from azureml.pipeline.wrapper import Module, dsl ws = Workspace.from_config() execute_python_script_module = Module.load(ws, namespace='azureml', name='Execute Python Script') @dsl.pipeline(name='external sub0 graph', description='sub0') def external_sub_pipeline0(input): module1 = execute_python_script_module( # should be pipeline input dataset1=input, ) module2 = execute_python_script_module( dataset1=module1.outputs.result_dataset, ) return module2.outputs
# register anonymous modules import os from azureml.pipeline.wrapper._module_registration import _load_anonymous_module local_module = _load_anonymous_module(ws, yaml_file=os.path.join( 'modules', 'hello_world', 'module_spec.yaml')) github_yaml = "https://github.com/sherry1989/sample_modules/blob/master/3_basic_module/basic_module.yaml" github_module = _load_anonymous_module(ws, yaml_file=github_yaml) hello_world_module_id = local_module.module_version_id basic_module_id = github_module.module_version_id # In[ ]: # get modules hello_world_anonymous = Module.load(ws, id=hello_world_module_id) basic_module_anonymous = Module.load(ws, id=basic_module_id) # In[ ]: # get dataset from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path automobile_price_data_raw = get_global_dataset_by_path( ws, 'automobile_price_data_raw', 'GenericCSV/Automobile_price_data_(Raw)') # In[ ]: # define pipeline @dsl.pipeline(name='module_SDK_test Run 8575', description='test local module',
print("Found existing compute target: {}".format(aml_compute_target)) except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # Module select_columns_in_dataset = Module.load(ws, namespace='azureml', name='Select Columns in Dataset') clean_missing_data = Module.load(ws, namespace='azureml', name='Clean Missing Data') split_data = Module.load(ws, namespace='azureml', name='Split Data') join_data = Module.load(ws, namespace='azureml', name='Join Data') # Dataset try: dset = Dataset.get_by_name(ws, 'Automobile_price_data_(Raw)') except Exception: global_datastore = Datastore(ws, name="azureml_globaldatasets") dset = Dataset.File.from_files(global_datastore.path('GenericCSV/Automobile_price_data_(Raw)')) dset.register(workspace=ws, name='Automobile_price_data_(Raw)', create_new_version=True) blob_input_data = dset
aml_compute = AmlCompute(workspace, aml_compute_target) print("Found existing compute target: {}".format(aml_compute_target)) except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) try: mpi_train_module_func = Module.load( workspace, namespace="microsoft.com/azureml/samples", name="Hello World MPI Job") except: mpi_train_module_func = Module.register( workspace, os.path.join('modules', 'mpi_module', 'module_spec.yaml')) from azureml.pipeline.wrapper._dataset import get_global_dataset_by_path blob_input_data = get_global_dataset_by_path( workspace, 'Automobile_price_data', 'GenericCSV/Automobile_price_data_(Raw)') mpi_train = mpi_train_module_func(input_path=blob_input_data, string_parameter="test1") mpi_train.runsettings.configure(node_count=2, process_count_per_node=2) print(mpi_train.runsettings.node_count)
provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: # modules try: ejoin_module_func = Module.load(ws, namespace='microsoft.com/bing', name='ejoin') eselect_module_func = Module.load(ws, namespace='microsoft.com/bing', name='eselect') except: ejoin_module_func = Module.register( ws, os.path.join('modules', 'ejoin', 'amlmodule.yaml')) eselect_module_func = Module.register( ws, os.path.join('modules', 'eselect', 'amlmodule.yaml')) join_data_module_func = Module.load(ws, namespace='azureml', name='Join Data') train_svd_recommender_module_func = Module.load(ws, namespace='azureml', name='Train SVD Recommender')
aml_compute = AmlCompute(ws, aml_compute_target) print("Found existing compute target: {}".format(aml_compute_target)) except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: join_data_module_func = Module.load(ws, namespace='azureml', name='Join Data') execute_python_script_module_func = Module.load(ws, namespace='azureml', name='Execute Python Script') remove_duplicate_rows_module_func = Module.load(ws, namespace='azureml', name='Remove Duplicate Rows') split_data_module_func = Module.load(ws, namespace='azureml', name='Split Data') train_svd_recommender_module_func = Module.load(ws, namespace='azureml', name='Train SVD Recommender') select_columns_module_func = Module.load(ws, namespace='azureml', name='Select Columns in Dataset')
except: print("Creating new compute target: {}".format(aml_compute_target)) provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(ws, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[ ]: try: train_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Train') except: train_module_func = Module.register( ws, os.path.join('modules', 'train-score-eval', 'train.yaml')) try: score_module_func = Module.load(ws, namespace='microsoft.com/aml/samples', name='Score') except: score_module_func = Module.register( ws, os.path.join('modules', 'train-score-eval', 'score.yaml')) try: eval_module_func = Module.load(ws,