def create_run_config(cpu_cluster, docker_proc_type, conda_env_file): """ AzureML requires the run environment to be setup prior to submission. This configures a docker persistent compute. Even though it is called Persistent compute, AzureML handles startup/shutdown of the compute environment. Args: cpu_cluster (str) : Names the cluster for the test In the case of unit tests, any of the following: - Reco_cpu_test - Reco_gpu_test docker_proc_type (str) : processor type, cpu or gpu conda_env_file (str) : filename which contains info to set up conda env Return: run_amlcompute : AzureML run config """ # runconfig with max_run_duration_seconds did not work, check why: # run_amlcompute = RunConfiguration(max_run_duration_seconds=60*30) run_amlcompute = RunConfiguration() run_amlcompute.target = cpu_cluster run_amlcompute.environment.docker.enabled = True run_amlcompute.environment.docker.base_image = docker_proc_type # Use conda_dependencies.yml to create a conda environment in # the Docker image for execution # False means the user will provide a conda file for setup # True means the user will manually configure the environment run_amlcompute.environment.python.user_managed_dependencies = False run_amlcompute.environment.python.conda_dependencies = CondaDependencies( conda_dependencies_file_path=conda_env_file) return run_amlcompute
def _create_default_run_configs(project_directory, compute_target_dict): """ Creates a local.runconfig and docker.runconfig for a project. :return: None """ from azureml.core.runconfig import RunConfiguration # Mocking a project object, as RunConfiguration requires a Project object, but only requires # project_directory field. project_object = empty_function project_object.project_directory = project_directory # Creating a local runconfig. local_run_config = RunConfiguration() local_run_config.save(name="local", path=project_directory) # Creating a docker runconfig. docker_run_config = RunConfiguration() docker_run_config.environment.docker.enabled = True docker_run_config.save(name="docker", path=project_directory) for compute_target_name, compute_target in compute_target_dict.items(): # Creating a compute runconfig. compute_config = RunConfiguration() if compute_target.type == 'HDInsight': compute_config.framework = "PySpark" else: compute_config.framework = "Python" compute_config.environment.docker.enabled = True compute_config.target = compute_target_name compute_config.save(name=compute_target_name, path=project_directory)
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, all_azure_dataset_ids: List[str], all_dataset_mountpoints: List[str], environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run. :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ dataset_consumptions = create_dataset_consumptions( azure_config, all_azure_dataset_ids, all_dataset_mountpoints) # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to( source_config.root_folder).as_posix() logging.info( f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds( azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment( azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration( node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if len(dataset_consumptions) > 0: run_config.data = { dataset.name: dataset for dataset in dataset_consumptions } # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get( WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func( script_run_config) # type: ignore return script_run_config
def __get_run_config(self, compute_target, channels=None, conda_packages=None, pip_packages=None): # Load the "cpu-dsvm.runconfig" file (created by the above attach operation) in memory run_config = RunConfiguration(framework="python") # Set compute target to the Linux DSVM run_config.target = compute_target.name # Use Docker in the remote VM run_config.environment.docker.enabled = False # Ask system to provision a new one based on the conda_dependencies.yml file run_config.environment.python.user_managed_dependencies = False # Prepare the Docker and conda environment automatically when used the first time. run_config.auto_prepare_environment = True # specify dependencies obj conda_dependencies = CondaDependencies.create( conda_packages=conda_packages, pip_packages=pip_packages) if (channels): for channel in channels: conda_dependencies.add_channel(channel) run_config.environment.python.conda_dependencies = conda_dependencies return run_config
def fetch_run_config(compute_target, base_image, sp_username, sp_tenant, sp_password): """ Generates a Run Configuration based on the pipeline parameters, specifying such things as the Compute Target and Conda Dependencies. """ # Inits configuration for Python run_config = RunConfiguration(framework="python") # Specifies compute target run_config.target = compute_target # Configures Docker/Image/Environment Variable parameters run_config.environment.docker.enabled = True run_config.environment.docker.base_image = base_image run_config.environment.environment_variables = { "SP_USERNAME": sp_username, "SP_TENANT": sp_tenant, "SP_PASSWORD": sp_password } # Specifies Conda file location (Auto-injected from preparing staging) run_config.environment.python.conda_dependencies = CondaDependencies( os.path.join("snapshot", "inputs", "environment.yml")) # Returns configuration return run_config
def _write_compute_run_config(source_directory, compute_target_object, compute_yaml): """ :param source_directory: :type source_directory: str :param compute_target_object: :type compute_target_object: azureml.core.compute_target.AbstractComputeTarget :param compute_yaml: :type compute_yaml: dict :return: """ from azureml.core.compute_target import _BatchAITarget # Writing the target.compute file. run_config_dir_name = get_run_config_dir_name(source_directory) file_path = os.path.join(source_directory, run_config_dir_name, compute_target_object.name + COMPUTECONTEXT_EXTENSION) with open(file_path, 'w') as outfile: ruamel.yaml.dump(compute_yaml, outfile, default_flow_style=False) # This creates a run config and writes it in the aml_config/<compute_target_name>.runconfig file run_config_object = RunConfiguration() run_config_object.target = compute_target_object if compute_target_object.type == _BatchAITarget._BATCH_AI_TYPE: run_config_object.environment.docker.enabled = True run_config_object.framework = compute_target_object._default_framework run_config_object.save(name=compute_target_object.name, path=source_directory)
def create_run_config(azure_config: AzureConfig, source_config: SourceConfig, azure_dataset_id: str = "", environment_name: str = "") -> ScriptRunConfig: """ Creates a configuration to run the InnerEye training script in AzureML. :param azure_config: azure related configurations to use for model scale-out behaviour :param source_config: configurations for model execution, such as name and execution mode :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty string to not use any datasets. :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used when running inference for an existing model. :return: The configured script run. """ if azure_dataset_id: azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id) if not azureml_dataset: raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.") named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY) dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download() else: dataset_consumption = None # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix() logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to " f"source directory {source_config.root_folder})") max_run_duration = None if azure_config.max_run_duration: max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration) workspace = azure_config.get_workspace() run_config = RunConfiguration( script=entry_script_relative_path, arguments=source_config.script_params, ) run_config.environment = get_or_create_python_environment(azure_config, source_config, environment_name=environment_name) run_config.target = azure_config.cluster run_config.max_run_duration_seconds = max_run_duration if azure_config.num_nodes > 1: distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes) run_config.mpi = distributed_job_config run_config.framework = "Python" run_config.communicator = "IntelMpi" run_config.node_count = distributed_job_config.node_count if dataset_consumption: run_config.data = {dataset_consumption.name: dataset_consumption} # Use blob storage for storing the source, rather than the FileShares section of the storage account. run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name script_run_config = ScriptRunConfig( source_directory=str(source_config.root_folder), run_config=run_config, ) if azure_config.hyperdrive: script_run_config = source_config.hyperdrive_config_func(script_run_config) # type: ignore return script_run_config
def get_run_config(self, config): environment_config = config.get("environment") environment = self.get_environment(environment_config) cluster_name = config.get("cluster") cluster = ComputeTarget(workspace=self.workspace, name=cluster_name) pipeline_run_config = RunConfiguration() pipeline_run_config.target = cluster pipeline_run_config.environment = environment return pipeline_run_config
def create_runconfig(aml_compute, env=None): # Create a new runconfig object aml_run_config = RunConfiguration() # Use the aml_compute you created above. aml_run_config.target = aml_compute if env: aml_run_config.environment = env else: aml_run_config.environment = create_env_from_requirements() return aml_run_config
def create_runconfig(aml_compute, env=None): # Create a new runconfig object aml_run_config = RunConfiguration() # Use the aml_compute you created above. aml_run_config.target = aml_compute if env is not None: aml_run_config.environment = env else: # Enable Docker aml_run_config.environment.docker.enabled = True # Set Docker base image to the default CPU-based image aml_run_config.environment.docker.base_image = "mcr.microsoft.com/azureml/base:0.2.1" # Use conda_dependencies.yml to create a conda environment in the Docker image for execution aml_run_config.environment.python.user_managed_dependencies = False return aml_run_config
def mi_run_config(ws, compute): whl_url = Environment.add_private_pip_wheel(workspace=ws, file_path=d.WHL_VINX_AZURE_ML, exist_ok=True) run_config = RunConfiguration() run_config.target = compute run_config.environment.docker.enabled = True run_config.environment.docker.base_image = None run_config.environment.docker.base_dockerfile = 'FROM mcr.microsoft.com/azureml/base:latest\nRUN apt-get update && apt-get -y install freetds-dev freetds-bin vim gcc' run_config.environment.python.user_managed_dependencies = False run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=[ 'tqdm', 'cython', 'matplotlib', 'scikit-learn', 'fbprophet' ], pip_packages=[ 'azureml-sdk', 'pandas', 'lightgbm', 'scipy==1.4.1', 'statsmodels', 'mlxtend', 'optuna', 'xgboost', 'CatBoost', 'tensorflow', 'keras', 'jpholiday', 'joblib', 'pymssql==2.1.1' ], pin_sdk_version=False) run_config.environment.python.conda_dependencies.add_pip_package(whl_url) return run_config
def get_run_config(aml_compute, conda_dep): run_amlcompute = RunConfiguration() run_amlcompute.target = aml_compute dockerfile = r""" FROM mcr.microsoft.com/azureml/base:intelmpi2018.3-ubuntu16.04 RUN apt-get update && \ apt-get install -y sudo curl apt-transport-https && \ apt-get update && \ sudo su && \ curl https://packages.microsoft.com/keys/microsoft.asc | apt-key add - && \ curl https://packages.microsoft.com/config/ubuntu/16.04/prod.list > /etc/apt/sources.list.d/mssql-release.list && \ sudo apt-get update && \ sudo ACCEPT_EULA=Y apt-get install -y msodbcsql17 && \ sudo apt-get install -y unixodbc-dev """ run_amlcompute.environment.docker.enabled = True run_amlcompute.environment.docker.base_image = None run_amlcompute.environment.docker.base_dockerfile = dockerfile run_amlcompute.environment.python.conda_dependencies = conda_dep return run_amlcompute
from azureml.core import Workspace ws = Workspace.from_config() from azureml.core.compute import ComputeTarget # refers to an existing compute resource attached to the workspace! hdi_compute = ComputeTarget(workspace=ws, name='sherihdi') #<run_hdi> from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies # use pyspark framework run_hdi = RunConfiguration(framework="pyspark") # Set compute target to the HDI cluster run_hdi.target = hdi_compute.name # specify CondaDependencies object to ask system installing numpy cd = CondaDependencies() cd.add_conda_package('numpy') run_hdi.environment.python.conda_dependencies = cd #</run_hdi> print(run_hdi)
def run(workspace, config, args): compute_target_name = config['train']['compute_target_name'] data_folder = config['train']['data_folder'] try: compute_target = ComputeTarget(workspace=workspace, name=compute_target_name) print('found existing:', compute_target.name) except ComputeTargetException: print('creating new.') compute_config = AmlCompute.provisioning_configuration( vm_size=config['train']['vm_size'], min_nodes=0, max_nodes=1) compute_target = ComputeTarget.create(workspace, compute_target_name, compute_config) compute_target.wait_for_completion(show_output=True) # ds = Datastore.register_azure_blob_container( # workspace, # datastore_name=config['train']['datastore_name'], # account_name=config['train']['account_name'], # account_key=config['train']['account_key'], # container_name=config['train']['container_name'], # overwrite=True) # # # # Upload local "data" folder (incl. files) as "tfdata" folder # ds.upload( # src_dir=config['train']['local_directory'], # target_path=data_folder, # overwrite=True) ds = Datastore.get(workspace, datastore_name=config['train']['datastore_name']) # generate data reference configuration dr_conf = DataReferenceConfiguration( datastore_name=ds.name, path_on_datastore=data_folder, mode='mount' ) # set 'download' if you copy all files instead of mounting run_config = RunConfiguration(framework="python", conda_dependencies=CondaDependencies.create( conda_packages=ast.literal_eval( config['train']['conda_packages']))) run_config.target = compute_target.name run_config.data_references = {ds.name: dr_conf} run_config.environment.docker.enabled = True # run_config.environment.docker.gpu_support = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE src = ScriptRunConfig( source_directory='./script', script='train.py', run_config=run_config, arguments=[ '--datadir', str(ds.as_mount()), '--step', args.step, '--train_on', args.train_on, '--fold', args.fold, '--epochs', args.epochs, '--experiment', args.experiment, '--reference', args.reference, '--batchsize', args.batchsize, '--optimizertype', args.optimizertype, '--convrnn_filters', args.convrnn_filters, '--learning_rate', args.learning_rate, '--pix250m', args.pix250m ]) # exp = Experiment(workspace=ws, name='test20181210-09') exp = Experiment(workspace=workspace, name=config['train']['experiment_name']) run = exp.submit(config=src) run.wait_for_completion(show_output=True)
pip_packages=[ 'azureml-sdk', 'PyYAML', 'azure-storage-blob', 'matplotlib', 'seaborn', 'tensorflow', 'Keras', 'tensorflow-hub', 'joblib', 'tqdm', 'Pillow', 'azureml-dataprep[pandas,fuse]>=1.1.14' ]) diagnoz_env = Environment("diagnoz-pipeline-env") diagnoz_env.python.user_managed_dependencies = False # Let Azure ML manage dependencies diagnoz_env.docker.enabled = True # Use a docker container diagnoz_env.docker.base_image = DEFAULT_GPU_IMAGE diagnoz_env.python.conda_dependencies = packages diagnoz_env.register(workspace=ws) # Runconfigs pipeline_run_config = RunConfiguration() pipeline_run_config.target = compute_target pipeline_run_config.environment = diagnoz_env print("Run configuration created.") shutil.rmtree(script_folder, ignore_errors=True) os.makedirs(script_folder, exist_ok=True) #copy all necessary scripts files = FilesProviders.get_path_files( "../", [os.path.basename(__file__), "__init__.py"]) for f in files: shutil.copy(f, script_folder) #add generated config file to script folder shutil.copy(generated_config_file, script_folder)
cli_auth = AzureCliAuthentication() print('done creating AzureCliAuthentication!') print('get workspace...') ws = Workspace.from_config(path=args.path, auth=cli_auth) print('done getting workspace!') print("looking for existing compute target.") aml_compute = AmlCompute(ws, args.aml_compute_target) print("found existing compute target.") # Create a new runconfig object run_amlcompute = RunConfiguration() # Use the cpu_cluster you created above. run_amlcompute.target = args.aml_compute_target # Enable Docker run_amlcompute.environment.docker.enabled = True # Set Docker base image to the default CPU-based image run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE # Use conda_dependencies.yml to create a conda environment in the Docker image for execution run_amlcompute.environment.python.user_managed_dependencies = False # Auto-prepare the Docker image when used for execution (if it is not already prepared) run_amlcompute.auto_prepare_environment = True # Specify CondaDependencies obj, add necessary packages run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create(
ws = Workspace.from_config(path='./aml_config/config.json') print(ws.name) experiment_name = 'train-on-amlcompute' experiment = Experiment(workspace = ws, name = experiment_name) project_folder = './train-on-amlcompute' os.makedirs(project_folder, exist_ok=True) shutil.copy('/code/training/train.py', project_folder) # create a new runconfig object run_config = RunConfiguration() # signal that you want to use AmlCompute to execute script. run_config.target = "amlcompute" # AmlCompute will be created in the same region as workspace # Set vm size for AmlCompute run_config.amlcompute.vm_size = 'STANDARD_D2_V2' # enable Docker run_config.environment.docker.enabled = True # set Docker base image to the default CPU-based image run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE # use conda_dependencies.yml to create a conda environment in the Docker image for execution run_config.environment.python.user_managed_dependencies = False # auto-prepare the Docker image when used for execution (if it is not already prepared)
run_conf.environment.python.user_managed_dependencies = True compute_target = compute_name # Use AzureML compute target: else: # Create compute target if it doesn't already exist: try: compute_target = ComputeTarget(workspace=ws, name=compute_name) except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', min_nodes=0, max_nodes=6) compute_target = ComputeTarget.create(ws, compute_name, compute_config) compute_target.wait_for_completion(show_output=True) run_conf.target = compute_target run_conf.environment.docker.enabled = True run_conf.environment.docker.base_image = DEFAULT_CPU_IMAGE run_conf.environment.python.conda_dependencies = \ CondaDependencies(conda_dependencies_file_path='env.yml') run_conf.environment.python.user_managed_dependencies = False if cv: run_conf.communicator = 'OpenMPI' run_conf.mpi = MpiConfiguration() run_conf.node_count = cv + 2 exp = Experiment(workspace=ws, name=config['experiment_name']) use_estimator = True if use_estimator: if cv: script_params = {'--cv': cv}
print('..3. completed') print('') print('') print('4. Instantiate AML managed compute ref...') print('.............................................') amlTrainingComputeRef = AmlCompute(amlWs, args.aml_compute_target) print('..4. completed') print('') print('') print("5. Instantiate and configure run object for the managed compute...") print('.............................................') # Create runconfig object amlComputeRunConf = RunConfiguration() # Use the compute provisioned amlComputeRunConf.target = args.aml_compute_target # Enable Docker amlComputeRunConf.environment.docker.enabled = True # Set Docker base image to the default CPU-based image amlComputeRunConf.environment.docker.base_image = DEFAULT_CPU_IMAGE # Use conda_dependencies.yml to create a conda environment in the Docker image for execution amlComputeRunConf.environment.python.user_managed_dependencies = False # Auto-prepare the Docker image when used for execution (if it is not already prepared) amlComputeRunConf.auto_prepare_environment = True # Specify CondaDependencies obj, add necessary packages amlComputeRunConf.environment.python.conda_dependencies = CondaDependencies.create( pip_packages=['numpy', 'pandas', 'scikit-learn', 'azureml-sdk']) print("..5. completed") print('') print('')
experiment = Experiment(workspace=ws, name='automl-diabetes') aml_compute = AmlCompute(ws, compute_target_name) # read in the data print("Getting a reference to default datastore") datastore = ws.get_default_datastore() print("Preparing the 'prep data' step") blob_diabetes_data = DataReference( datastore=datastore, data_reference_name="diabetes_data", path_on_datastore="diabetesdata/diabetes_pima.csv") # Create a new runconfig object aml_run_config = RunConfiguration() aml_run_config.target = aml_compute aml_run_config.environment.docker.enabled = True aml_run_config.environment.docker.base_image = DEFAULT_CPU_IMAGE aml_run_config.environment.python.user_managed_dependencies = False aml_run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['pandas', 'scikit-learn', 'numpy'], pip_packages=[ 'azureml-sdk', 'azureml-dataprep', 'azureml-dataprep[pandas]', 'azureml-train-automl' ], pin_sdk_version=False) scripts_folder = './scripts' prepared_data = PipelineData("diabetes_data_prep", datastore=datastore) prep_data_step = PythonScriptStep(name="Prep diabetes data",
print(f"### Will mount datapath '{dataPathRemote}' on remote compute") dataRef = DataReferenceConfiguration(datastore_name=ds.name, path_on_datastore=dataPathRemote, path_on_compute='/tmp', mode='download', overwrite=False) # Create a new RunConfiguration and attach data runConfig = RunConfiguration() runConfig.data_references = { ds.name: dataRef } # This syntax is not documented! if not os.environ.get('AZML_RUN_LOCAL', 'false') == "true": # Set it up for running in Azure ML compute runConfig.target = computeTarget runConfig.environment.docker.enabled = True runConfig.auto_prepare_environment = True runConfig.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['scikit-learn==0.20.3', 'pandas', 'matplotlib']) print( f"### Will execute script {trainingScriptDir}/{trainingScript} on REMOTE compute" ) else: # OR set up RunConfig to run local, needs a pre-set up Python 3 virtual env runConfig.environment.python.user_managed_dependencies = True runConfig.environment.python.interpreter_path = os.environ[ 'VIRTUAL_ENV'] + "/bin/python" print( f"### Will execute script {trainingScriptDir}/{trainingScript} on LOCAL compute" )
provisioning_config) # Can poll for a minimum number of nodes and for a specific timeout. # If no min_node_count is provided, it will use the scale settings for the cluster. compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # In[41]: #prepare the runtime from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies run_config = RunConfiguration(framework="python") run_config.target = compute_target run_config.environment.docker.enabled = True run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE dependencies = CondaDependencies.create( pip_packages=["scikit-learn", "scipy", "numpy"]) run_config.environment.python.conda_dependencies = dependencies # NOT xplainabe # automl_config = AutoMLConfig(task='classification', # debug_log='automl_errors.log', # path=project_folder, # compute_target=compute_target, # run_configuration=run_config, # X = X, ##use the remote uploaded data # y = y,
history_name = run_history_name, directory = project_folder) print(project.project_directory, project.history.name, sep = '\n') print('copy {} and iris.csv to the project folder.'.format(train_script)) shutil.copy(train_script, os.path.join(project_folder, train_script)) shutil.copy('iris.csv', os.path.join(project_folder, 'iris.csv')) print('create an ACI run config.') # create a new runconfig object run_config = RunConfiguration(project_object = project, run_config_name = 'my-aci-run-config') # signal that you want to use ACI to execute script. run_config.target = "containerinstance" # ACI container group is only supported in certain regions, which can be different than the region the Workspace is in. run_config.container_instance.region = 'eastus' # set the ACI CPU and Memory run_config.container_instance.cpu_cores = 1 run_config.container_instance.memory_gb = 2 # enable Docker run_config.environment.docker.enabled = True # set Docker base image to the default CPU-based image run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_MMLSPARK_CPU_IMAGE print('base image is', run_config.environment.docker.base_image) #run_config.environment.docker.base_image = 'microsoft/mmlspark:plus-0.9.9'
vm_size='STANDARD_D2_V2', max_nodes=4) cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True) #</cpu_cluster> #<run_amlcompute> from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies from azureml.core.runconfig import DEFAULT_CPU_IMAGE # Create a new runconfig object run_amlcompute = RunConfiguration() # Use the cpu_cluster you created above. run_amlcompute.target = cpu_cluster # Enable Docker run_amlcompute.environment.docker.enabled = True # Set Docker base image to the default CPU-based image run_amlcompute.environment.docker.base_image = DEFAULT_CPU_IMAGE # Use conda_dependencies.yml to create a conda environment in the Docker image for execution run_amlcompute.environment.python.user_managed_dependencies = False # Specify CondaDependencies obj, add necessary packages run_amlcompute.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['scikit-learn']) #</run_amlcompute>
project_folder = './sample_projects/automl-remote-attach' experiment = Experiment(ws, experiment_name) automl_runs = list(experiment.get_runs(type='automl')) assert (len(automl_runs) == 1) compute_name = 'mydsvmb' dsvm_compute = ws.compute_targets[compute_name] # create a new RunConfig object conda_run_config = RunConfiguration(framework="python") # Set compute target to the Linux DSVM conda_run_config.target = dsvm_compute cd = CondaDependencies.create(pip_packages=['azureml-sdk[automl]'], conda_packages=['numpy']) conda_run_config.environment.python.conda_dependencies = cd automl_settings = { "iteration_timeout_minutes": 60, "iterations": 100, "n_cross_validations": 5, "primary_metric": 'AUC_weighted', "preprocess": True, "max_cores_per_iteration": 2 } automl_config = AutoMLConfig(task='classification',
def main(): train_file = r"EdwardFry_Microsoft_issueDataset.csv" ws = Workspace.from_config() # Default datastore def_data_store = ws.get_default_datastore() # Loads config.json # Get the blob storage associated with the workspace def_blob_store = Datastore(ws, "workspaceblobstore") # Get file storage associated with the workspace def_file_store = Datastore(ws, "workspacefilestore") # Set data input and output xyz_phishing_dataset = Dataset.File.from_files([(def_blob_store, train_file)]) output_data1 = OutputFileDatasetConfig( destination=(datastore, 'outputdataset/{run-id}')) output_data_dataset = output_data1.register_on_complete( name='prepared_output_data') # Set compute compute_name = "aml-compute" vm_size = "STANDARD_NC6" if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found compute target: ' + compute_name) else: print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, # STANDARD_NC6 is GPU-enabled min_nodes=0, max_nodes=4) # create the compute target compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # Can poll for a minimum number of nodes and for a specific timeout. # If no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current cluster status, use the 'status' property print(compute_target.status.serialize()) aml_run_config = RunConfiguration() # `compute_target` as defined in "Azure Machine Learning compute" section above aml_run_config.target = compute_target USE_CURATED_ENV = True if USE_CURATED_ENV: curated_environment = Environment.get(workspace=ws, name="AzureML-Tutorial") aml_run_config.environment = curated_environment else: aml_run_config.environment.python.user_managed_dependencies = False # Add some packages relied on by data prep step aml_run_config.environment.python.conda_dependencies = CondaDependencies.create( conda_packages=['pandas', 'scikit-learn'], pip_packages=['azureml-sdk', 'azureml-dataprep[fuse,pandas]'], pin_sdk_version=False) dataprep_source_dir = "./dataprep_src" entry_point = "prepare.py" # `my_dataset` as defined above ds_input = xyz_phishing_dataset.as_named_input('input1') # `output_data1`, `compute_target`, `aml_run_config` as defined above data_prep_step = PythonScriptStep(script_name=entry_point, source_directory=dataprep_source_dir, arguments=[ "--input", ds_input.as_download(), "--output", output_data1 ], compute_target=compute_target, runconfig=aml_run_config, allow_reuse=True) train_source_dir = "./train_src" train_entry_point = "train.py" training_results = OutputFileDatasetConfig(name="training_results", destination=def_blob_store) train_step = PythonScriptStep(script_name=train_entry_point, source_directory=train_source_dir, arguments=[ "--prepped_data", output_data1.as_input(), "--training_results", training_results ], compute_target=compute_target, runconfig=aml_run_config, allow_reuse=True) # list of steps to run (`compare_step` definition not shown) compare_models = [data_prep_step, train_step, compare_step] # Build the pipeline pipeline1 = Pipeline(workspace=ws, steps=[compare_models]) #dataset_consuming_step = PythonScriptStep( # script_name="iris_train.py", # inputs=[iris_tabular_dataset.as_named_input("iris_data")], # compute_target=compute_target, # source_directory=project_folder #) #run_context = Run.get_context() #iris_dataset = run_context.input_datasets['iris_data'] #dataframe = iris_dataset.to_pandas_dataframe() ## Within a PythonScriptStep #ws = Run.get_context().experiment.workspace #step = PythonScriptStep(name="Hello World", # script_name="hello_world.py", # compute_target=aml_compute, # source_directory=source_directory, # allow_reuse=False, # hash_paths=['hello_world.ipynb']) # Submit the pipeline to be run pipeline_run1 = Experiment(ws, 'Compare_Models_Exp').submit(pipeline1) pipeline_run1.wait_for_completion()
ws = Workspace.from_config(auth=cli_auth) # Read the New VM Config with open("aml_config/security_config.json") as f: config = json.load(f) remote_vm_name = config["remote_vm_name"] # Attach Experiment experiment_name = "devops-ai-demo" exp = Experiment(workspace=ws, name=experiment_name) print(exp.name, exp.workspace.name, sep="\n") run_config = RunConfiguration() run_config.target = remote_vm_name # replace with your path to the python interpreter in the remote VM found earlier run_config.environment.python.interpreter_path = "/anaconda/envs/myenv/bin/python" run_config.environment.python.user_managed_dependencies = True src = ScriptRunConfig( source_directory="./code", script="training/train.py", run_config=run_config ) run = exp.submit(src) # Shows output of the run on stdout. run.wait_for_completion(show_output=True, wait_post_processing=True) # Raise exception if run fails
# Create a set of package dependencies fraud_packages = CondaDependencies.create( conda_packages=['scikit-learn', 'pandas'], pip_packages=['azureml-sdk']) # Add the dependencies to the environment fraud_env.python.conda_dependencies = fraud_packages # Register the environment (just in case you want to use it again) fraud_env.register(workspace=ws) registered_env = Environment.get(ws, 'fraud-pipeline-env') # Create a new runconfig object for the pipeline pipeline_run_config = RunConfiguration() # Use the compute you created above. pipeline_run_config.target = pipeline_cluster # Assign the environment to the run configuration pipeline_run_config.environment = registered_env print("Run configuration created.") # Get the training dataset fraud_ds = ws.datasets.get("creditcard") # Create a PipelineData (temporary Data Reference) for the model folder model_folder = PipelineData("model_folder", datastore=ws.get_default_datastore()) #pipeline_data = PipelineData('pipeline_data', datastore=default_ds) data_ref = DataReference(datastore=default_ds,
# ## Pipeline 1st step: Data Preprocessing # # We start by defining the run configuration with the needed dependencies by the preprocessing step. # # In the cell that follow, we compose the first step of the pipeline. # #%% cd = CondaDependencies() cd.add_conda_package('pandas') cd.add_conda_package('matplotlib') cd.add_conda_package('numpy') cd.add_conda_package('scikit-learn') run_config = RunConfiguration(framework="python", conda_dependencies=cd) run_config.target = cluster run_config.environment.docker.enabled = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE run_config.environment.python.user_managed_dependencies = False #%% pre_processing = PythonScriptStep( name='preprocess dataset', script_name='preprocess.py', arguments=['--input_path', input_dir,\ '--output_path', processed_dir], inputs=[input_dir], outputs=[processed_dir], compute_target=cluster_name, runconfig=run_config, source_directory=PREPROCESS_DIR
exp = Experiment(workspace=ws, name=experiment_name) #<run_temp_compute> from azureml.core.compute import ComputeTarget, AmlCompute # First, list the supported VM families for Azure Machine Learning Compute print(AmlCompute.supported_vmsizes(workspace=ws)) from azureml.core.runconfig import RunConfiguration # Create a new runconfig object run_temp_compute = RunConfiguration() # Signal that you want to use AmlCompute to execute the script run_temp_compute.target = "amlcompute" # AmlCompute is created in the same region as your workspace # Set the VM size for AmlCompute from the list of supported_vmsizes run_temp_compute.amlcompute.vm_size = 'STANDARD_D2_V2' #</run_temp_compute> # Submit the experiment using the run configuration from azureml.core import ScriptRunConfig src = ScriptRunConfig(source_directory = script_folder, script = 'train.py', run_config = run_temp_compute) run = exp.submit(src) run.wait_for_completion(show_output = True)