def prepare(): ws = None try: print("Connecting to workspace '%s'..." % workspace_name) ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) except: print("Workspace not accessible.") print(ws.get_details()) ws.write_config() # # Register an existing datastore to the workspace. # if datastore_name not in ws.datastores: Datastore.register_azure_blob_container( workspace=ws, datastore_name=datastore_name, container_name=blob_container_name, account_name=blob_account_name, account_key=blob_account_key ) print("Datastore '%s' registered." % datastore_name) else: print("Datastore '%s' has already been regsitered." % datastore_name)
def register_datastore(self, datastore_name, blob_container, storage_acct_name, storage_acct_key): Datastore.register_azure_blob_container(workspace=self.workspace, datastore_name=datastore_name, container_name=blob_container, account_name=storage_acct_name, account_key=storage_acct_key)
def get_datastore(ws: Workspace, datastore_name: str, container: str, account_name: str, account_key: str) -> Datastore: if not datastore_name in ws.datastores: Datastore.register_azure_blob_container(workspace=ws, datastore_name=datastore_name, container_name=container, account_name=account_name, account_key=account_key, create_if_not_exists=True) return ws.datastores[datastore_name]
def register_datastore(workspace, ds_config): ds_name = ds_config.get("name") if not is_datastore_exists(workspace, ds_name): Datastore.register_azure_blob_container( workspace=workspace, datastore_name=ds_name, account_name=ds_config.get("account_name"), container_name=ds_config.get("container_name"), account_key=ds_config.get("account_key"), create_if_not_exists=ds_config.get("create_if_not_exists") )
def _create_datastore( aml_workspace, datastore_name, container_name, account_name, account_key, create_if_not_exists=True, ): """Creates datastore Args: datastore_name (string): Name you wish to assign to your datastore. container_name (string): Name of your container. account_name (string): Storage account name. account_key (string): The storage account key. Returns: azureml.core.Datastore """ logger = logging.getLogger(__name__) ds = Datastore.register_azure_blob_container( workspace=aml_workspace, datastore_name=datastore_name, container_name=container_name, account_name=account_name, account_key=account_key, create_if_not_exists=create_if_not_exists, ) logger.info(f"Registered existing blob storage: {ds.name}.") return ds
def _setup_datastore(self, blob_dataset_name, output_path=None): """ sets up the datastore in azureml. Either retrieves a pre-existing datastore or registers a new one in the workspace. :param str blob_dataset_name: [required] name of the datastore registered with the workspace. If the datastore does not yet exist, the name it will be registered under. :param str output_path: [optional] if registering a datastore for inferencing, the output path for writing back predictions. """ try: self.blob_ds = Datastore.get(self.ws, blob_dataset_name) print("Found Blob Datastore with name: %s" % blob_dataset_name) except HttpOperationError: self.blob_ds = Datastore.register_azure_blob_container( workspace=self.ws, datastore_name=blob_dataset_name, account_name=self.account_name, container_name=self.container_name, account_key=self.account_key, subscription_id=self.blob_sub_id, ) print("Registered blob datastore with name: %s" % blob_dataset_name) if output_path is not None: self.output_dir = PipelineData( name="output", datastore=self.ws.get_default_datastore(), output_path_on_compute=output_path)
def register_blob_datastore( workspace: Workspace, blob_datastore_name: str, container_name: str, account_name: str, account_key: str, datastore_rg: str, ) -> AzureBlobDatastore: """ Register a Blob Storage Account with the Azure Machine Learning Workspace :param workspace: Azure Machine Learning Workspace :param blob_datastore_name: Name for blob datastore :param container_name: Name for blob container :param account_name: Name for blob account :param account_key: Blob Account Key using for auth :param datastore_rg: Resource Group containing Azure Storage Account :return: Pointer to Azure Machine Learning Blob Datastore """ return Datastore.register_azure_blob_container( workspace=workspace, datastore_name=blob_datastore_name, container_name=container_name, account_name=account_name, account_key=account_key, resource_group=datastore_rg, overwrite=True, )
def get_or_create_datastore( datastorename: str, ws: Workspace, env: Env, input: bool = True ) -> Datastore: """ Obtains a datastore with matching name. Creates it if none exists. :param datastorename: Name of the datastore :param ws: Current AML Workspace :param env: Environment variables :param input: Datastore points to the input container if this is True(default) or the output storage container otherwise :returns: Datastore :raises: ValueError """ if datastorename is None: raise ValueError("Datastore name is required.") containername = ( env.scoring_datastore_input_container if input else env.scoring_datastore_output_container ) if datastorename in ws.datastores: datastore = ws.datastores[datastorename] # the datastore is not registered but we have all details to register it elif ( env.scoring_datastore_access_key is not None and containername is not None # NOQA: E501 ): # NOQA:E501 datastore = Datastore.register_azure_blob_container( workspace=ws, datastore_name=datastorename, account_name=env.scoring_datastore_storage_name, account_key=env.scoring_datastore_access_key, container_name=containername, ) else: raise ValueError( "No existing datastore named {} nor was enough information supplied to create one.".format( # NOQA: E501 datastorename ) ) return datastore
def register_blob_ws(ws, ds_name, container_name): """ Register blob storage as datastore in workspace :param ws: azureml Workspace instance :return: """ ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name=ds_name, container_name=container_name, account_name=os.environ.get('ACCOUNT_NAME'), account_key=os.environ.get('ACCOUNT_ACCESS_KEY'), create_if_not_exists=True) return ds
def _create_datastore( aml_workspace, datastore_name, container_name, account_name, account_key, create_if_not_exists=True, ): ds = Datastore.register_azure_blob_container( workspace=aml_workspace, datastore_name=datastore_name, container_name=container_name, account_name=account_name, account_key=account_key, create_if_not_exists=create_if_not_exists, ) return ds
def get_datastore(): env = EnvironmentVariables() datastore_name = env.datastore_name storage_account_name = env.storage_account_name storage_container_name = env.storage_container_name storage_account_key = env.storage_account_key workspace = get_workspace() try: datastore = Datastore.get(workspace=workspace, datastore_name=datastore_name) except HttpOperationError: datastore = Datastore.register_azure_blob_container( workspace=workspace, datastore_name=datastore_name, account_name=storage_account_name, container_name=storage_container_name, account_key=storage_account_key) return datastore
def mount(self, storage_name, storage_key, container): ws = Workspace(subscription_id, resource_group, ws_name) # , auth=svc_pr) from azureml.core import Datastore datastore = Datastore.register_azure_blob_container( workspace=ws, datastore_name=container, container_name=container, account_name=storage_name, account_key=storage_key, create_if_not_exists=True) console.print("datastore=", datastore) dataref = datastore.as_mount() dir_name = dataref.path_on_compute console.print("daatastore MOUNT dir_name=", dir_name) return dir_name
def get_or_create_datastore(datastore_name, env, workspace, input=True): if datastore_name is None: raise ValueError('datastore name can not be empty!') container_name = env.scoring_datastore_input_container if input else env.scoring_datastore_output_container if datastore_name in workspace.datastores: datastore = workspace.datastores[datastore_name] elif container_name is not None and env.scoring_datastore_access_key is not None: datastore = Datastore.register_azure_blob_container( workspace=workspace, datastore_name=datastore_name, container_name=container_name, account_name=env.scoring_datastore_storage_name, account_key=env.scoring_datastore_access_key) else: raise ValueError( 'no datastore_name exsting in current workspace nor enough info provided to build a new datastore' ) return datastore
def run_azure_experiment_with_storage( subscription_id: str, resource_group: str, workspace_name: str, datastore_name: str, container_name: str, storage_account_name: str, storage_account_key: str, compute_name: str, experiment_name: Optional[str] = None, source_directory: Optional[str] = None, image_name: Optional[str] = None, use_gpu=True, ) -> Run: workspace = Workspace(subscription_id, resource_group, workspace_name,) data_store = Datastore.register_azure_blob_container( workspace=workspace, datastore_name=datastore_name, container_name=container_name, account_name=storage_account_name, account_key=storage_account_key, ) source_directory = source_directory or dirname(__file__) assert ( compute_name in workspace.compute_targets ), f"compute {compute_name} is not created in {workspace_name} workspace" estimator = Estimator( source_directory=source_directory, script_params={"--data-folder": data_store.as_mount()}, compute_target=workspace.compute_targets[compute_name], pip_packages=pip_packages(), entry_script=os.path.join(source_directory, "azure_train.py"), use_gpu=use_gpu, custom_docker_image=image_name, ) experiment_name = experiment_name or __file__.split(os.sep)[-1].split(".py")[0] experiment = Experiment(workspace=workspace, name=experiment_name) run = experiment.submit(estimator) return run
def register_blob_datastore(subscription_id, resource_group, workspace, datastore_name, container_name, account_name, account_key, set_as_default=True): datastore = Datastore.register_azure_blob_container( workspace=workspace, datastore_name=datastore_name, grant_workspace_access=True, container_name=container_name, account_name=account_name, account_key=account_key, subscription_id=subscription_id, resource_group=resource_group) if set_as_default: datastore.set_as_default() return datastore
def _load(self) -> np.ndarray: """Loads data from the image file. Returns: Data from the image file as a numpy array. """ # Initialis Workspace ws = Workspace.from_config() blob_datastore_name = self._credentials['storage_name'] account_name = self._credentials['storage_name'] # Storage account name container_name = self._credentials['container_name'] # Name of Azure blob container account_key = self._credentials['key'] # Storage account key # Register a new datastore try: blob_datastore = blob_datastore = Datastore.get(ws, blob_datastore_name) print("Found Blob Datastore with name: %s" % blob_datastore_name) except HttpOperationError: blob_datastore = Datastore.register_azure_blob_container(workspace = ws, datastore_name = blob_datastore_name, container_name = container_name, account_name = account_name, blob_datastore.download(target_path=self._local_path, prefix=self._container_path, show_progress=False) ... def _save(self, data: np.ndarray) -> None: """Saves image data to the specified filepath""" ... def _describe(self) -> Dict[str, Any]: """Returns a dict that describes the attributes of the dataset"""
workspace = Workspace.from_config() print('Workspace name: ' + workspace.name, 'Azure region: ' + workspace.location, 'Subscription id: ' + workspace.subscription_id, 'Resource group: ' + workspace.resource_group, sep = '\n') # - from model import TFBertForMultiClassification from transformers import BertTokenizer import tensorflow as tf datastore_config = json.loads(open('datastore.json').read()) datastore = Datastore.register_azure_blob_container(workspace=workspace, datastore_name=datastore_config['datastore_name'], container_name=datastore_config['container_name'], account_name=datastore_config['account_name'], sas_token=datastore_config['sas_token']) # If you haven't finished training the model then just download pre-made model from datastore datastore.download('./',prefix="azure-service-classifier/model") def encode_example(text, max_seq_length): # Encode inputs using tokenizer inputs = tokenizer.encode_plus( text, add_special_tokens=True, max_length=max_seq_length ) input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] # The mask has 1 for real tokens and 0 for padding tokens. Only real tokens are attended to.
try: ct = ComputeTarget(workspace=ws, name=cluster_name) print("Found existing cluster '%s'. Skip." % cluster_name) except ComputeTargetException: print("Creating new cluster '%s'..." % cluster_name) compute_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_NC6", min_nodes=0, max_nodes=1) ct = ComputeTarget.create(ws, cluster_name, compute_config) ct.wait_for_completion(show_output=True) print(ct.get_status().serialize()) # # Register an existing datastore to the workspace. # datastore_name = "hellotfstore" if datastore_name not in ws.datastores: Datastore.register_azure_blob_container( workspace=ws, datastore_name=datastore_name, container_name="hello-tf", account_name="wuhamltestsa", account_key= "LBpyUOlJT/wbiHQReiwY1EB3WhDF3Sn2STia4UY//SkMWerh08M0QjhImmQ8TwCrmvDfq0tVtB3xF9mxZFiMXA==" ) print("Datastore '%s' registered." % datastore_name) else: print("Datastore '%s' has already been regsitered." % datastore_name) # (END)
import numpy as np import matplotlib.pyplot as plt import pandas as pd #connecting the workspace from azureml.core import Workspace, Datastore ws = Workspace.from_config() #Registering a new datastores # for registration we need storage name and storage key in which we want to create this data source blob_ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name="blob_data", container_name='azureml-blobstore-bf4e0c62-87d2-4233-920c-6870aa62cfc0', account_name='rishabhmachine5989301776', account_key= 'OCToPz0m8zQBNxIUL01aZDyhHDGK3fDuMXCE0NV/e28UW89q9YWfZimujAeGMS4dvGSOEbHE5YYFmZUFRrXaeA==' ) #lets check all the datastores in the workspace for ds_name in ws.datastores: print(ds_name) #get a reference to any datastore blob_store = Datastore.get(ws, datastore_name="blob_data") #to get the by default datastore default_store = ws.get_default_datastore()
def main(args): logging.info('Main started.') # Define workspace object try: ws = Workspace.from_config(path='config.json') # Need to create the workspace except Exception as err: print('No workspace. Check for config.json file.') assert False # ws = Workspace.create(name=os.getenv('WORKSPACE_NAME', ''), # subscription_id=os.getenv('AZURE_SUB', ''), # resource_group=os.getenv('RESOURCE_GROUP', ''), # create_resource_group=True, # location='westus2')) # print("Created workspace {} at location {}".format(ws.name, ws.location)) # choose a name for your cluster - under 16 characters cluster_name = "gpuforkeras" try: compute_target = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') # AML Compute config - if max_nodes are set, it becomes persistent storage that scales compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', min_nodes=0, max_nodes=5) # create the cluster compute_target = ComputeTarget.create(ws, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current cluster. # print(compute_target.get_status().serialize()) # # Create a project directory and copy training script to ii project_folder = os.path.join(os.getcwd(), 'project') # Create an experiment experiment_name = args.experiment_name experiment = Experiment(ws, name=experiment_name) # # Use an AML Data Store for training data ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name=args.datastore_name, container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Set up for training script_params = { # --data_path is a Python object that will mount the # datastore to the compute target in next step (linking # to Blob Storage) '--data_path': ds.as_mount(), '--data_dir': args.data_dir, '--gpu_num': args.gpu_num, '--class_path': args.class_path, '--num_clusters': args.num_clusters, '--batch_size': args.batch_size, '--learning_rate': args.learning_rate } # Instantiate TensorFlow estimator to call training script estimator = TensorFlow(source_directory=project_folder, script_params=script_params, compute_target=compute_target, entry_script='train_azureml.py', pip_packages=[ 'keras==2.2.4', 'matplotlib==3.1.1', 'opencv-python==4.1.1.26', 'Pillow', 'numpy', 'configparser', 'python-dotenv', 'tensorflow==1.13.1' ], use_gpu=True, framework_version='1.13') # Submit and wait for run to complete - check experiment in Azure Portal for progress run = experiment.submit(estimator) print(run.get_details()) run.wait_for_completion(show_output=True) # Register models to Workspace model = run.register_model( model_name='keras-dnn-intermediate', model_path='./outputs/trained_weights_intermediate.h5', tags={ 'framework': "Keras", 'task': "object detection" }, description="Custom Keras YOLOv3 model - before fine-tuning phase") model = run.register_model( model_name='keras-dnn', model_path='./outputs/trained_weights_final.h5', tags={ 'framework': "Keras", 'task': "object detection" }, description="Custom Keras YOLOv3 model - final, after fine-tuning phase" )
workspace_name = arg print("Azure ML SDK Version: ", VERSION) #### Connect to our workspace #### ################################## # workspace ws = Workspace.get(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group) # data ds = Datastore.register_azure_blob_container(workspace=ws, datastore_name=datastorename, container_name='seer-container', account_name=storage_account, account_key=storage_account_key, create_if_not_exists=True) datastore = ws.datastores[datastorename] # compute target try: cpu_cluster = ComputeTarget(workspace=ws, name=computetarget) print('Found existing cluster, use it.') except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC12', min_nodes=1, max_nodes=4) cpu_cluster = ComputeTarget.create(ws, computetarget, compute_config) cpu_cluster.wait_for_completion(show_output=True)
def main(req: func.HttpRequest) -> (func.HttpResponse): logging.info('Python HTTP trigger function processed a request.') # For now this can be a POST where we have <base url>/api/HttpTrigger?start=<any string> image_url = req.params.get('start') logging.info(type(image_url)) # Write a config.json (fill in template values with system vars) config_temp = { 'subscription_id': os.getenv('AZURE_SUB', ''), 'resource_group': os.getenv('RESOURCE_GROUP', ''), 'workspace_name': os.getenv('WORKSPACE_NAME', '') } with open(os.path.join(os.getcwd(), 'HttpTrigger', 'config.json'), 'w') as f: json.dump(config_temp, f) # Get the workspace from config.json try: ws = Workspace.from_config( os.path.join(os.getcwd(), 'HttpTrigger', 'config.json')) # Authentication didn't work except ProjectSystemException as err: return json.dumps('ProjectSystemException') # Need to create the workspace except Exception as err: ws = Workspace.create( name=os.getenv('WORKSPACE_NAME', ''), subscription_id=os.getenv('AZURE_SUB', ''), resource_group=os.getenv('RESOURCE_GROUP', ''), create_resource_group=True, location='eastus2' # Or other supported Azure region ) # choose a name for your cluster cluster_name = "gpuclusterplease" try: compute_target = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') # AML Compute config - if max_nodes are set, it becomes persistent storage that scales compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', max_nodes=4) # create the cluster compute_target = ComputeTarget.create(ws, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current cluster. print(compute_target.get_status().serialize()) # Create a project directory and copy training script to ii project_folder = os.path.join(os.getcwd(), 'HttpTrigger', 'project') os.makedirs(project_folder, exist_ok=True) shutil.copy(os.path.join(os.getcwd(), 'HttpTrigger', 'pytorch_train.py'), project_folder) # Create an experiment experiment_name = 'fish-no-fish' experiment = Experiment(ws, name=experiment_name) # Use an AML Data Store for training data ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name='funcdefaultdatastore', container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Use an AML Data Store to save models back up to ds_models = Datastore.register_azure_blob_container( workspace=ws, datastore_name='modelsdatastorage', container_name=os.getenv('STORAGE_CONTAINER_NAME_MODELS', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Set up for training ("trans" flag means - use transfer learning and # this should download a model on compute) script_params = { '--data_dir': ds.as_mount(), '--num_epochs': 30, '--learning_rate': 0.01, '--output_dir': './outputs', '--trans': 'True' } # Instantiate PyTorch estimator with upload of final model to # a specified blob storage container (this can be anything) estimator = PyTorch( source_directory=project_folder, script_params=script_params, compute_target=compute_target, entry_script='pytorch_train.py', use_gpu=True, inputs=[ ds_models.as_upload( path_on_compute='./outputs/model_finetuned.pth') ]) run = experiment.submit(estimator) run.wait_for_completion(show_output=True) return json.dumps('Job complete')
def launch_experiment(ws, conf_aml, conf_cluster, conf_docker, conf_experiment): # Register the input data blob container input_ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name='petridishdata', container_name='datasets', account_name='petridishdata', account_key=conf_aml['azure_storage_account_key'], create_if_not_exists=False) output_ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name='petridishoutput', container_name='amloutput', account_name='petridishdata', account_key=conf_aml['azure_storage_account_key'], create_if_not_exists=False) # Create or attach compute cluster cluster_name = conf_cluster['cluster_name'] try: compute_target = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing compute target.') except: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=conf_cluster['vm_size'], max_nodes=conf_cluster['max_nodes'], vm_priority=conf_cluster['vm_priority'], idle_seconds_before_scaledown=conf_cluster[ 'idle_seconds_before_scaledown']) # Create the cluster compute_target = ComputeTarget.create(ws, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current cluster. print(compute_target.get_status().serialize()) # Set project directory # Assuming running in extract_features_from_videos folder project_folder = '../../' # Setup custom docker usage image_registry_details = ContainerRegistry() image_registry_details.address = conf_docker['image_registry_address'] image_registry_details.username = conf_docker['image_registry_username'] image_registry_details.password = conf_docker['image_registry_password'] # don't let the system build a new conda environment user_managed_dependencies = True # Note that experiment names have to be # <36 alphanumeric characters exp_name = conf_experiment['experiment_name'] experiment = Experiment(ws, name=exp_name) script_params = { '--nas.eval.loader.dataset.dataroot': input_ds.path('/').as_mount(), '--nas.search.loader.dataset.dataroot': input_ds.path('/').as_mount(), '--common.logdir': output_ds.path('/').as_mount(), } est = Estimator(source_directory=project_folder, script_params=script_params, compute_target=compute_target, entry_script='scripts/main.py', custom_docker_image=conf_docker['image_name'], image_registry_details=image_registry_details, user_managed=user_managed_dependencies, source_directory_data_store=input_ds) run = experiment.submit(est)
datastores = ws.datastores for name, ds in datastores.items(): print(name, ds.datastore_type) #ws.set_default_datastore('chexrayds') # In[48]: with open(os.path.join(notshared_dir, 'credentials.json')) as creds: credentials = json.load(creds) #print(credentials) ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name=credentials['datastore_name'], container_name=credentials['container_name'], account_name=credentials['account_name'], account_key=credentials['account_key'], create_if_not_exists=False) # In[49]: ds = Datastore.get(ws, datastore_name='chexrayds') print(ds.name) # In[ ]: # In[50]: import azureml.data from azureml.data.data_reference import DataReference
# In[ ]: def_blob_store.upload_files( ["./data/20news.pkl"], target_path="20newsgroups", overwrite=True) # In[129]: datastore = Datastore.register_azure_blob_container(workspace=ws, datastore_name='your', account_name='lokeshdata', container_name='yourc', account_key='L6ot0h04xROx/83/W6AymEAR7f66KuhVLKxOCm1SvcMAg70yrJv32mcY389mOoSPVyfRxuTYr3eSZpGF0WHPUg==', subscription_id = "c3ef02ec-8e19-415f-a0d7-b562b6b78b11", create_if_not_exists=True) # In[133]: import azureml.data from azureml.data.azure_storage_datastore import AzureFileDatastore, AzureBlobDatastore # In[ ]:
def main(req: func.HttpRequest) -> (func.HttpResponse): logging.info('Python HTTP trigger function processed a request.') # For now this can be a POST where we have <base url>/api/HttpTrigger?start=<any string> image_url = req.params.get('start') logging.info(type(image_url)) # Use service principal secrets to create authentication vehicle and # define workspace object try: svc_pr = ServicePrincipalAuthentication( tenant_id=os.getenv('TENANT_ID', ''), service_principal_id=os.getenv('APP_ID', ''), service_principal_password=os.getenv('PRINCIPAL_PASSWORD', '')) ws = Workspace(subscription_id=os.getenv('AZURE_SUB', ''), resource_group=os.getenv('RESOURCE_GROUP', ''), workspace_name=os.getenv('WORKSPACE_NAME',''), auth=svc_pr) print("Found workspace {} at location {} using Azure CLI \ authentication".format(ws.name, ws.location)) # Usually because authentication didn't work except ProjectSystemException as err: print('Authentication did not work.') return json.dumps('ProjectSystemException') # Need to create the workspace except Exception as err: ws = Workspace.create(name=os.getenv('WORKSPACE_NAME', ''), subscription_id=os.getenv('AZURE_SUB', ''), resource_group=os.getenv('RESOURCE_GROUP', ''), create_resource_group=True, location='westus', # Or other supported Azure region auth=svc_pr) print("Created workspace {} at location {}".format(ws.name, ws.location)) # choose a name for your cluster - under 16 characters cluster_name = "gpuforpytorch" try: compute_target = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') # AML Compute config - if max_nodes are set, it becomes persistent storage that scales compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_NC6', min_nodes=0, max_nodes=2) # create the cluster compute_target = ComputeTarget.create(ws, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current cluster. # print(compute_target.get_status().serialize()) # # Create a project directory and copy training script to ii project_folder = os.path.join(os.getcwd(), 'HttpTrigger', 'project') # os.makedirs(project_folder, exist_ok=True) # shutil.copy(os.path.join(os.getcwd(), 'HttpTrigger', 'pytorch_train.py'), project_folder) # Create an experiment experiment_name = 'fish-no-fish' experiment = Experiment(ws, name=experiment_name) # Use an AML Data Store for training data ds = Datastore.register_azure_blob_container(workspace=ws, datastore_name='funcdefaultdatastore', container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Use an AML Data Store to save models back up to ds_models = Datastore.register_azure_blob_container(workspace=ws, datastore_name='modelsdatastorage', container_name=os.getenv('STORAGE_CONTAINER_NAME_MODELS', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Set up for training ("trans" flag means - use transfer learning and # this should download a model on compute) # Using /tmp to store model and info due to the fact that # creating new folders and files on the Azure Function host # will trigger the function to restart. script_params = { '--data_dir': ds.as_mount(), '--num_epochs': 30, '--learning_rate': 0.01, '--output_dir': '/tmp/outputs', '--trans': 'True' } # Instantiate PyTorch estimator with upload of final model to # a specified blob storage container (this can be anything) estimator = PyTorch(source_directory=project_folder, script_params=script_params, compute_target=compute_target, entry_script='pytorch_train.py', use_gpu=True, inputs=[ds_models.as_upload(path_on_compute='./outputs/model_finetuned.pth')]) run = experiment.submit(estimator) print(run.get_details()) # # The following would certainly be blocking, but that's ok for debugging # while run.get_status() not in ['Completed', 'Failed']: # For example purposes only, not exhaustive # print('Run {} not in terminal state'.format(run.id)) # time.sleep(10) return json.dumps(run.get_status())
RESOURCE_GROUP = os.environ.get("RESOURCE_GROUP_NAME") UPDATE_DATA = os.environ.get("UPDATE_DATA") STORAGE_NAME = os.environ.get("STORAGE_NAME") EPIS_CONTAINER = os.environ.get("EPIS_CONTAINER") EPIS_DATASTORE = os.environ.get("EPIS_DATASTORE") STORAGE_ACCOUNT_KEY = os.environ.get("STORAGE_ACCOUNT_KEY") SP_AUTH = ServicePrincipalAuthentication( tenant_id=TENANT_ID, service_principal_id=APP_ID, service_principal_password=APP_SECRET) WORKSPACE = Workspace.get( WORKSPACE_NAME, SP_AUTH, SUBSCRIPTION_ID, RESOURCE_GROUP ) try: if UPDATE_DATA: Datastore.register_azure_blob_container(WORKSPACE, EPIS_DATASTORE, EPIS_CONTAINER, STORAGE_NAME, sas_token=None, account_key=STORAGE_ACCOUNT_KEY, protocol=None, endpoint=None, overwrite=True, create_if_not_exists=True, subscription_id=SUBSCRIPTION_ID, resource_group=RESOURCE_GROUP) print("Dataset EPIS registered") print("Dataset EPIS registered successfully") except Exception as caught_error: print("Error while registering the dataset on datastore: " + str(caught_error)) sys.exit(1)
ds = ws.get_default_datastore() # Upload local "data" folder (incl. files) as "tfdata" folder ds.upload(src_dir='./data', target_path='tfdata', overwrite=True) #%% [markdown] # ## Use your own blob storage # # You can also use your own blob storage. Set your previously generated storage account name, key, and container. #%% from azureml.core import Datastore ds = Datastore.register_azure_blob_container(ws, datastore_name='myblob01', account_name='amltest01', account_key='BAYcnjJ/TK...', container_name='container01', overwrite=True) # Upload local "data" folder (incl. files) as "tfdata" folder ds.upload(src_dir='./data', target_path='tfdata', overwrite=True) #%% [markdown] # Get the generated Datastore, and upload again. #%% # Get your own registered datastore ds = Datastore.get(ws, datastore_name='myblob01') # Upload local "data" folder (incl. files) as "tfdata" folder ds.upload(src_dir='./data', target_path='tfdata', overwrite=True)
blob_datastore_name = env.blob_datastore_name # Name of Azure blob container container_name = env.blob_container_name # Storage account name account_name = env.storage_account_name # Storage account access key account_key = env.storage_account_key # Verify that the blob store does not exist already try: blob_datastore = Datastore.get(aml_workspace, blob_datastore_name) print('Found existing datastore, use it.') except HttpOperationError: blob_datastore = Datastore.register_azure_blob_container( workspace=aml_workspace, datastore_name=blob_datastore_name, container_name=container_name, account_name=account_name, account_key=account_key) print("Registered blob datastore with name: %s" % blob_datastore_name) # Register dataset without creating new version input_datastore_paths = [DataPath(blob_datastore, env.input_dataset_name)] input_dataset = Dataset.File.from_files(path=input_datastore_paths) input_dataset = input_dataset.register(workspace=aml_workspace, name=env.input_dataset_name, description=env.input_dataset_name) print("Registered dataset: %s" % input_dataset.name) waves_datastore_paths = [DataPath(blob_datastore, env.waves_dataset_name)] waves_dataset = Dataset.File.from_files(path=waves_datastore_paths) waves_dataset = waves_dataset.register(workspace=aml_workspace,
min_nodes=0, max_nodes=1, ) cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True) old_datastore = [ds for ds in ws.datastores if ds == "telemetry"] if old_datastore: old_ds = Datastore.get(ws, "telemetry") old_ds.unregister() telemetry_ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name='telemetry', container_name=args.storage_container, account_name=args.storage_account, account_key=args.storage_key, ) input_data = DataReference( datastore=telemetry_ds, data_reference_name="input_data", path_on_datastore=args.storage_path, ) preprocessing_est = SKLearn( source_directory='010-preprocessing', compute_target=cpu_cluster, entry_script='dataprep.py', conda_packages=['pandas'],