示例#1
0
 def _to_data_reference_config(config):
     from azureml.core.runconfig import DataReferenceConfiguration
     from azureml.data.constants import MOUNT_MODE
     return DataReferenceConfiguration(
         datastore_name=config.get("DataStoreName", None),
         mode=config.get("Mode", MOUNT_MODE).lower(),
         path_on_datastore=config.get("PathOnDataStore", None),
         path_on_compute=config.get("PathOnCompute", None),
         overwrite=config.get("Overwrite",
                              False)), config.get("ForceRead", False)
示例#2
0
    def to_config(self):
        """Convert the DataReference object to DataReferenceConfiguration object.

        :return: A new DataReferenceConfiguration object.
        :rtype: azureml.core.runconfig.DataReferenceConfiguration
        """
        from azureml.core.runconfig import DataReferenceConfiguration
        return DataReferenceConfiguration(
            datastore_name=self.datastore.name,
            mode=self.mode,
            path_on_datastore=self._get_normalized_path(
                self.path_on_datastore),
            path_on_compute=self._get_normalized_path(self.path_on_compute),
            overwrite=self.overwrite)
示例#3
0
# Retrieve datsets
dataset_train = Dataset.get_by_name(workspace, name=input_name_train)
dataset_test = Dataset.get_by_name(workspace, name=input_name_test)

# Runconfig
amlcompute_run_config = RunConfiguration(conda_dependencies=cd,
                                         script="data_validation.py")

amlcompute_run_config.environment.docker.enabled = True
amlcompute_run_config.environment.spark.precache_packages = False
amlcompute_run_config.target = compute_target
amlcompute_run_config.data = {
    input_name_train: load_data(dataset_train, input_name_train),
    input_name_test: load_data(dataset_test, input_name_test)
}
amlcompute_run_config.data_references = {
    "baseline_profile":
    DataReferenceConfiguration(
        datastore_name='workspaceblobstore',
        mode='download',
        path_on_datastore='baseline_profile',
    )
}
amlcompute_run_config.save(path=os.path.join(
    os.path.dirname(os.path.realpath(__file__)),
    "RunConfig/",
    "runconfig_data_validation.yml",
),
                           name='datavalidationsubset',
                           separate_environment_yaml=True)
示例#4
0
# get the default datastore and upload data from local folder to VM
ds = ws.get_default_datastore()
print(ds.name, ds.datastore_type, ds.account_name, ds.container_name)

# Upload data to default data storage
data_folder = config['train']['data_folder']
ds.upload(config['train']['local_directory'],
          target_path=data_folder,
          overwrite=True)
print('Finished Uploading Data.')

# Run Configuration
from azureml.core.runconfig import DataReferenceConfiguration
dr = DataReferenceConfiguration(
    datastore_name=ds.name,
    path_on_datastore=data_folder,
    mode='download',  # download files from datastore to compute target
    overwrite=True)

from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

# create a new RunConfig object
conda_run_config = RunConfiguration(framework="python")

# Set compute target to the Linux DSVM
conda_run_config.target = dsvm_compute.name

# set the data reference of the run coonfiguration
conda_run_config.data_references = {ds.name: dr}
# ### Step 3 : Generate data reference config
# 
# You can configure to mount your preconfigured dataset (including train.tfrecords, test.tfrecords) from your ```Datastore``` in your compute target.    
# See "[Exercise02 : Prepare Datastore](/notebooks/exercise02_prepare_datastore.ipynb)".

#%%
from azureml.core import Datastore
from azureml.core.runconfig import DataReferenceConfiguration
# from azureml.data.data_reference import DataReference

# get your datastore (See "Exercise 02 : Prepare Datastore")
ds = Datastore.get(ws, datastore_name="myblob01")

# generate data reference configuration
dr_conf = DataReferenceConfiguration(
    datastore_name=ds.name,
    path_on_datastore='tfdata',
    mode='mount') # set 'download' if you copy all files instead of mounting

#%% [markdown]
# ### Step 4 : Generate config
# 
# Here we set docker environments for running scripts. We want to use ```Datastore``` as input data, so we set previous data reference configuration in this configuration.

#%%
from azureml.core.runconfig import RunConfiguration, DEFAULT_GPU_IMAGE
from azureml.core.conda_dependencies import CondaDependencies

run_config = RunConfiguration(
    framework="python",
    conda_dependencies=CondaDependencies.create(conda_packages=['tensorflow-gpu']))
run_config.target = compute_target.name
示例#6
0
# Create or get existing AML compute cluster
computeTarget = getComputeAML(ws, os.environ['AZML_COMPUTE_NAME'])
if not computeTarget:
    print('### Failed! Bye!')
    exit()

# Create AML experiment and connect to default data store
exp = Experiment(workspace=ws, name=os.environ['AZML_EXPERIMENT'])
ds = ws.get_default_datastore()
print(f"### Working with experiment name '{exp.name}'")

# This allows us to mount/upload data to remote compute job
print(f"### Will mount datapath '{dataPathRemote}' on remote compute")
dataRef = DataReferenceConfiguration(datastore_name=ds.name,
                                     path_on_datastore=dataPathRemote,
                                     path_on_compute='/tmp',
                                     mode='download',
                                     overwrite=False)

# Create a new RunConfiguration and attach data
runConfig = RunConfiguration()
runConfig.data_references = {
    ds.name: dataRef
}  # This syntax is not documented!

if not os.environ.get('AZML_RUN_LOCAL', 'false') == "true":
    # Set it up for running in Azure ML compute
    runConfig.target = computeTarget
    runConfig.environment.docker.enabled = True
    runConfig.auto_prepare_environment = True
    runConfig.environment.python.conda_dependencies = CondaDependencies.create(
示例#7
0
def run(workspace, config, args):
    compute_target_name = config['train']['compute_target_name']
    data_folder = config['train']['data_folder']

    try:
        compute_target = ComputeTarget(workspace=workspace,
                                       name=compute_target_name)
        print('found existing:', compute_target.name)
    except ComputeTargetException:
        print('creating new.')
        compute_config = AmlCompute.provisioning_configuration(
            vm_size=config['train']['vm_size'], min_nodes=0, max_nodes=1)
        compute_target = ComputeTarget.create(workspace, compute_target_name,
                                              compute_config)
        compute_target.wait_for_completion(show_output=True)

    # ds = Datastore.register_azure_blob_container(
    #     workspace,
    #     datastore_name=config['train']['datastore_name'],
    #     account_name=config['train']['account_name'],
    #     account_key=config['train']['account_key'],
    #     container_name=config['train']['container_name'],
    #     overwrite=True)
    #
    # # # Upload local "data" folder (incl. files) as "tfdata" folder
    # ds.upload(
    #     src_dir=config['train']['local_directory'],
    #     target_path=data_folder,
    #     overwrite=True)

    ds = Datastore.get(workspace,
                       datastore_name=config['train']['datastore_name'])

    # generate data reference configuration
    dr_conf = DataReferenceConfiguration(
        datastore_name=ds.name, path_on_datastore=data_folder, mode='mount'
    )  # set 'download' if you copy all files instead of mounting

    run_config = RunConfiguration(framework="python",
                                  conda_dependencies=CondaDependencies.create(
                                      conda_packages=ast.literal_eval(
                                          config['train']['conda_packages'])))
    run_config.target = compute_target.name
    run_config.data_references = {ds.name: dr_conf}
    run_config.environment.docker.enabled = True
    # run_config.environment.docker.gpu_support = True
    run_config.environment.docker.base_image = DEFAULT_GPU_IMAGE

    src = ScriptRunConfig(
        source_directory='./script',
        script='train.py',
        run_config=run_config,
        arguments=[
            '--datadir',
            str(ds.as_mount()), '--step', args.step, '--train_on',
            args.train_on, '--fold', args.fold, '--epochs', args.epochs,
            '--experiment', args.experiment, '--reference', args.reference,
            '--batchsize', args.batchsize, '--optimizertype',
            args.optimizertype, '--convrnn_filters', args.convrnn_filters,
            '--learning_rate', args.learning_rate, '--pix250m', args.pix250m
        ])
    # exp = Experiment(workspace=ws, name='test20181210-09')
    exp = Experiment(workspace=workspace,
                     name=config['train']['experiment_name'])
    run = exp.submit(config=src)
    run.wait_for_completion(show_output=True)
示例#8
0
model_name = proj_root if proj_root.isalnum() else ''.join(ch for ch in proj_root if ch.isalnum())
experiment_name = model_name
exp = Experiment(workspace=ws, name=experiment_name)

print("datastore:{}, compute:{}".format(ds.container_name, type(compute_target)))
print("proj_root:{}, model_name:{}".format(proj_root, model_name))

image_registry_details = ContainerRegistry()
image_registry_details.address = docker_registry_address
image_registry_details.username = docker_registry_username
image_registry_details.password = docker_registry_password
training_docker_image = docker_registry_address + '/' + training_docker_image_short_name

# set up training configuration
dr = DataReferenceConfiguration(datastore_name=ds.name, 
                                path_on_datastore=proj_root,
                                overwrite=True)
drlogs = DataReferenceConfiguration(datastore_name=dslogs.name, 
                                path_on_datastore=proj_root,
                                overwrite=True)
run_cfg = RunConfiguration()
run_cfg.environment.docker.enabled = True
run_cfg.environment.docker.gpu_support = support_gpu
run_cfg.environment.docker.base_image = training_docker_image 
run_cfg.environment.docker.base_image_registry = image_registry_details
run_cfg.data_references = {ds.name: dr, dslogs.name: drlogs} 
run_cfg.environment.python.user_managed_dependencies = True
run_cfg.target = compute_target

# submit training
currentDT = datetime.datetime.now()
示例#9
0
# create or use an existing experiment
exp = Experiment(workspace=ws, name=experiment_name)

# register our existing Azure Blob Container with the labled audio files
ds = Datastore.register_azure_blob_container(
    workspace=ws,
    datastore_name=azureStorageTargetContainer,
    container_name=azureStorageTargetContainer,
    account_name=azureStorgeAccountName,
    account_key=azureStorageKeyName,
    create_if_not_exists=False)

# create a reference where we mount the DataStore to the container instance
dr = DataReferenceConfiguration(datastore_name=ds.name,
                                path_on_compute='data',
                                mode='mount')

# upload any needed files
ws.get_default_datastore().upload(src_dir='.',
                                  target_path='.',
                                  overwrite=True,
                                  show_progress=True)

# create the computer_target object, if it does not exist
try:
    compute_target = ComputeTarget(workspace=ws, name=azureMLClusterName)
    print('using existing computer cluster: ' + azureMLClusterName)
except ComputeTargetException:
    print('Creating a new compute target: ' + azureMLClusterName)
    compute_config = AmlCompute.provisioning_configuration(
myenv.docker.enabled = True
myenv.python.user_managed_dependencies = False

#%%
""" 02b -- Configuration (Data Reference)
"""
ds = ws.get_default_datastore()
print(ds.name, ds.datastore_type, ds.account_name, ds.container_name)
dataset = Dataset.File.from_files(path=[(ds, 'diabetes/')])
print(dataset.to_path())

from azureml.core.runconfig import DataReferenceConfiguration

dr = DataReferenceConfiguration(datastore_name=ds.name,
                                path_on_datastore='diabetes',
                                path_on_compute='/tmp/azureml_runs',
                                mode='download',
                                overwrite=True)

#%%
""" 02c -- Configuration (Directory)
"""
import shutil

script_folder = './aml-run'  # this is the folder that we are going to send to the remote vm
os.makedirs(script_folder, exist_ok=True)
shutil.copy('./train.py', os.path.join(script_folder, 'train.py'))
shutil.copy('./myfucs.py', os.path.join(script_folder, 'myfucs.py'))

#%%
""" 02d-1 -- Configuration (Compute - DSVM)