def _to_data_reference_config(config): from azureml.core.runconfig import DataReferenceConfiguration from azureml.data.constants import MOUNT_MODE return DataReferenceConfiguration( datastore_name=config.get("DataStoreName", None), mode=config.get("Mode", MOUNT_MODE).lower(), path_on_datastore=config.get("PathOnDataStore", None), path_on_compute=config.get("PathOnCompute", None), overwrite=config.get("Overwrite", False)), config.get("ForceRead", False)
def to_config(self): """Convert the DataReference object to DataReferenceConfiguration object. :return: A new DataReferenceConfiguration object. :rtype: azureml.core.runconfig.DataReferenceConfiguration """ from azureml.core.runconfig import DataReferenceConfiguration return DataReferenceConfiguration( datastore_name=self.datastore.name, mode=self.mode, path_on_datastore=self._get_normalized_path( self.path_on_datastore), path_on_compute=self._get_normalized_path(self.path_on_compute), overwrite=self.overwrite)
# Retrieve datsets dataset_train = Dataset.get_by_name(workspace, name=input_name_train) dataset_test = Dataset.get_by_name(workspace, name=input_name_test) # Runconfig amlcompute_run_config = RunConfiguration(conda_dependencies=cd, script="data_validation.py") amlcompute_run_config.environment.docker.enabled = True amlcompute_run_config.environment.spark.precache_packages = False amlcompute_run_config.target = compute_target amlcompute_run_config.data = { input_name_train: load_data(dataset_train, input_name_train), input_name_test: load_data(dataset_test, input_name_test) } amlcompute_run_config.data_references = { "baseline_profile": DataReferenceConfiguration( datastore_name='workspaceblobstore', mode='download', path_on_datastore='baseline_profile', ) } amlcompute_run_config.save(path=os.path.join( os.path.dirname(os.path.realpath(__file__)), "RunConfig/", "runconfig_data_validation.yml", ), name='datavalidationsubset', separate_environment_yaml=True)
# get the default datastore and upload data from local folder to VM ds = ws.get_default_datastore() print(ds.name, ds.datastore_type, ds.account_name, ds.container_name) # Upload data to default data storage data_folder = config['train']['data_folder'] ds.upload(config['train']['local_directory'], target_path=data_folder, overwrite=True) print('Finished Uploading Data.') # Run Configuration from azureml.core.runconfig import DataReferenceConfiguration dr = DataReferenceConfiguration( datastore_name=ds.name, path_on_datastore=data_folder, mode='download', # download files from datastore to compute target overwrite=True) from azureml.core.runconfig import RunConfiguration from azureml.core.conda_dependencies import CondaDependencies # create a new RunConfig object conda_run_config = RunConfiguration(framework="python") # Set compute target to the Linux DSVM conda_run_config.target = dsvm_compute.name # set the data reference of the run coonfiguration conda_run_config.data_references = {ds.name: dr}
# ### Step 3 : Generate data reference config # # You can configure to mount your preconfigured dataset (including train.tfrecords, test.tfrecords) from your ```Datastore``` in your compute target. # See "[Exercise02 : Prepare Datastore](/notebooks/exercise02_prepare_datastore.ipynb)". #%% from azureml.core import Datastore from azureml.core.runconfig import DataReferenceConfiguration # from azureml.data.data_reference import DataReference # get your datastore (See "Exercise 02 : Prepare Datastore") ds = Datastore.get(ws, datastore_name="myblob01") # generate data reference configuration dr_conf = DataReferenceConfiguration( datastore_name=ds.name, path_on_datastore='tfdata', mode='mount') # set 'download' if you copy all files instead of mounting #%% [markdown] # ### Step 4 : Generate config # # Here we set docker environments for running scripts. We want to use ```Datastore``` as input data, so we set previous data reference configuration in this configuration. #%% from azureml.core.runconfig import RunConfiguration, DEFAULT_GPU_IMAGE from azureml.core.conda_dependencies import CondaDependencies run_config = RunConfiguration( framework="python", conda_dependencies=CondaDependencies.create(conda_packages=['tensorflow-gpu'])) run_config.target = compute_target.name
# Create or get existing AML compute cluster computeTarget = getComputeAML(ws, os.environ['AZML_COMPUTE_NAME']) if not computeTarget: print('### Failed! Bye!') exit() # Create AML experiment and connect to default data store exp = Experiment(workspace=ws, name=os.environ['AZML_EXPERIMENT']) ds = ws.get_default_datastore() print(f"### Working with experiment name '{exp.name}'") # This allows us to mount/upload data to remote compute job print(f"### Will mount datapath '{dataPathRemote}' on remote compute") dataRef = DataReferenceConfiguration(datastore_name=ds.name, path_on_datastore=dataPathRemote, path_on_compute='/tmp', mode='download', overwrite=False) # Create a new RunConfiguration and attach data runConfig = RunConfiguration() runConfig.data_references = { ds.name: dataRef } # This syntax is not documented! if not os.environ.get('AZML_RUN_LOCAL', 'false') == "true": # Set it up for running in Azure ML compute runConfig.target = computeTarget runConfig.environment.docker.enabled = True runConfig.auto_prepare_environment = True runConfig.environment.python.conda_dependencies = CondaDependencies.create(
def run(workspace, config, args): compute_target_name = config['train']['compute_target_name'] data_folder = config['train']['data_folder'] try: compute_target = ComputeTarget(workspace=workspace, name=compute_target_name) print('found existing:', compute_target.name) except ComputeTargetException: print('creating new.') compute_config = AmlCompute.provisioning_configuration( vm_size=config['train']['vm_size'], min_nodes=0, max_nodes=1) compute_target = ComputeTarget.create(workspace, compute_target_name, compute_config) compute_target.wait_for_completion(show_output=True) # ds = Datastore.register_azure_blob_container( # workspace, # datastore_name=config['train']['datastore_name'], # account_name=config['train']['account_name'], # account_key=config['train']['account_key'], # container_name=config['train']['container_name'], # overwrite=True) # # # # Upload local "data" folder (incl. files) as "tfdata" folder # ds.upload( # src_dir=config['train']['local_directory'], # target_path=data_folder, # overwrite=True) ds = Datastore.get(workspace, datastore_name=config['train']['datastore_name']) # generate data reference configuration dr_conf = DataReferenceConfiguration( datastore_name=ds.name, path_on_datastore=data_folder, mode='mount' ) # set 'download' if you copy all files instead of mounting run_config = RunConfiguration(framework="python", conda_dependencies=CondaDependencies.create( conda_packages=ast.literal_eval( config['train']['conda_packages']))) run_config.target = compute_target.name run_config.data_references = {ds.name: dr_conf} run_config.environment.docker.enabled = True # run_config.environment.docker.gpu_support = True run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE src = ScriptRunConfig( source_directory='./script', script='train.py', run_config=run_config, arguments=[ '--datadir', str(ds.as_mount()), '--step', args.step, '--train_on', args.train_on, '--fold', args.fold, '--epochs', args.epochs, '--experiment', args.experiment, '--reference', args.reference, '--batchsize', args.batchsize, '--optimizertype', args.optimizertype, '--convrnn_filters', args.convrnn_filters, '--learning_rate', args.learning_rate, '--pix250m', args.pix250m ]) # exp = Experiment(workspace=ws, name='test20181210-09') exp = Experiment(workspace=workspace, name=config['train']['experiment_name']) run = exp.submit(config=src) run.wait_for_completion(show_output=True)
model_name = proj_root if proj_root.isalnum() else ''.join(ch for ch in proj_root if ch.isalnum()) experiment_name = model_name exp = Experiment(workspace=ws, name=experiment_name) print("datastore:{}, compute:{}".format(ds.container_name, type(compute_target))) print("proj_root:{}, model_name:{}".format(proj_root, model_name)) image_registry_details = ContainerRegistry() image_registry_details.address = docker_registry_address image_registry_details.username = docker_registry_username image_registry_details.password = docker_registry_password training_docker_image = docker_registry_address + '/' + training_docker_image_short_name # set up training configuration dr = DataReferenceConfiguration(datastore_name=ds.name, path_on_datastore=proj_root, overwrite=True) drlogs = DataReferenceConfiguration(datastore_name=dslogs.name, path_on_datastore=proj_root, overwrite=True) run_cfg = RunConfiguration() run_cfg.environment.docker.enabled = True run_cfg.environment.docker.gpu_support = support_gpu run_cfg.environment.docker.base_image = training_docker_image run_cfg.environment.docker.base_image_registry = image_registry_details run_cfg.data_references = {ds.name: dr, dslogs.name: drlogs} run_cfg.environment.python.user_managed_dependencies = True run_cfg.target = compute_target # submit training currentDT = datetime.datetime.now()
# create or use an existing experiment exp = Experiment(workspace=ws, name=experiment_name) # register our existing Azure Blob Container with the labled audio files ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name=azureStorageTargetContainer, container_name=azureStorageTargetContainer, account_name=azureStorgeAccountName, account_key=azureStorageKeyName, create_if_not_exists=False) # create a reference where we mount the DataStore to the container instance dr = DataReferenceConfiguration(datastore_name=ds.name, path_on_compute='data', mode='mount') # upload any needed files ws.get_default_datastore().upload(src_dir='.', target_path='.', overwrite=True, show_progress=True) # create the computer_target object, if it does not exist try: compute_target = ComputeTarget(workspace=ws, name=azureMLClusterName) print('using existing computer cluster: ' + azureMLClusterName) except ComputeTargetException: print('Creating a new compute target: ' + azureMLClusterName) compute_config = AmlCompute.provisioning_configuration(
myenv.docker.enabled = True myenv.python.user_managed_dependencies = False #%% """ 02b -- Configuration (Data Reference) """ ds = ws.get_default_datastore() print(ds.name, ds.datastore_type, ds.account_name, ds.container_name) dataset = Dataset.File.from_files(path=[(ds, 'diabetes/')]) print(dataset.to_path()) from azureml.core.runconfig import DataReferenceConfiguration dr = DataReferenceConfiguration(datastore_name=ds.name, path_on_datastore='diabetes', path_on_compute='/tmp/azureml_runs', mode='download', overwrite=True) #%% """ 02c -- Configuration (Directory) """ import shutil script_folder = './aml-run' # this is the folder that we are going to send to the remote vm os.makedirs(script_folder, exist_ok=True) shutil.copy('./train.py', os.path.join(script_folder, 'train.py')) shutil.copy('./myfucs.py', os.path.join(script_folder, 'myfucs.py')) #%% """ 02d-1 -- Configuration (Compute - DSVM)