def ComputeDelete(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] location = request.json['location'] Cluster_type = request.json['Cluster_type'] cluster_name = request.json['cluster_name'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) try: if Cluster_type == 'Training': aml_compute = AmlCompute(ws, cluster_name) print('Found existing AML compute context.') aml_compute.delete() else: aks_target = AksCompute(ws, cluster_name) print('Found existing AKS compute context.') aks_target.delete() print('compute deleted') return "compute deleted" except Exception as e: error_statement = str(e) print("Error statement: ", error_statement) return error_statement
run = experiment.submit(config=run_config) #%% run.wait_for_completion(show_output=True) #%% [markdown] # ## View logs #%% [markdown] # You can view logs using [Azure Portal](https://portal.azure.com/), but you can also view using AML run history widget in your notebook. #%% from azureml.widgets import RunDetails RunDetails(run_instance=run).show() #%% [markdown] # You can also explorer metrics with your python code. #%% allmetrics = run.get_metrics() print(allmetrics) #%% [markdown] # ## Remove AML compute #%% # Delete cluster (nbodes) and remove from AML workspace mycompute = AmlCompute(workspace=ws, name='hypertest01') mycompute.delete() #%%
# access the inference_output data = inference_pipeline_run.find_step_run('inference')[0].get_output_data('inference_output') # download the predictions to local path data.download('.', show_progress=True) # print the predictions predictions = np.loadtxt(os.path.join('./', data.path_on_datastore, 'results.txt')) print(predictions) # ### Cleanup Resources # # If you are done experimenting with this quickstart, run the following two cell to clean up resources. # In[ ]: schedule.disable() # In[ ]: aml_compute.delete() # In[ ]:
cli_auth = AzureCliAuthentication() ws = Workspace.from_config(path=workspace_config_settings["path"], auth=cli_auth, _file_name=workspace_config_settings["file_name"]) print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n') try: # Loading AMLCompute print("Loading existing AML Compute") cluster = AmlCompute(workspace=ws, name=aml_settings["name"]) # Check settings and redeploy if required settings have changed print("Found existing cluster") if cluster.vm_size.lower() != aml_settings["vm_size"].lower( ) or cluster.vm_priority.lower() != aml_settings["vm_priority"].lower(): cluster.delete() cluster.wait_for_completion(show_output=True) raise ComputeTargetException( "Cluster is of incorrect size or has incorrect priority. Deleting cluster and provisioning a new one." ) # Update AMLCompute #if cluster.provisioning_configuration.min_nodes != aml_settings["min_nodes"] or cluster.provisioning_configuration.max_nodes != aml_settings["max_nodes"] or cluster.provisioning_configuration.idle_seconds_before_scaledown != aml_settings["idle_seconds_before_scaledown"]: print("Updating settings of Cluster") cluster.update(min_nodes=aml_settings["min_nodes"], max_nodes=aml_settings["max_nodes"], idle_seconds_before_scaledown=aml_settings[ "idle_seconds_before_scaledown"]) # Wait until the operation has completed cluster.wait_for_completion(show_output=True)
from azureml.core import Workspace from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException ws = Workspace.get(name='akws', subscription_id='8b3748c0-bb0b-4913-ab5b-c462062118fe', resource_group='akrg') cpu_cluster_name = 'tdsp-cluster' #verify that cluster does not exist try: cpu_cluster = AmlCompute(workspace=ws, name=cpu_cluster_name) cpu_cluster.delete() print('Deleting cluster...') except ComputeTargetException: print('Cluster does not exist in workspace.')
def main(): # Loading input values print("::debug::Loading input values") parameters_file = os.environ.get("INPUT_PARAMETERSFILE", default="workspace.json") azure_credentials = os.environ.get("INPUT_AZURECREDENTIALS", default="{}") azure_credentials = json.loads(azure_credentials) # Loading parameters file print("::debug::Loading parameters file") parameters_file_path = os.path.join(".aml", parameters_file) try: with open(parameters_file_path) as f: parameters = json.load(f) except FileNotFoundError: print( f"::error::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository (e.g. .aml/workspace.json)." ) return # Loading Workspace sp_auth = ServicePrincipalAuthentication( tenant_id=azure_credentials.get("tenantId", ""), service_principal_id=azure_credentials.get("clientId", ""), service_principal_password=azure_credentials.get("clientSecret", "")) try: print("::debug::Loading existing Workspace") ws = Workspace.get( name=parameters.get("name", None), subscription_id=azure_credentials.get("subscriptionId", ""), resource_group=parameters.get("resourceGroup", None), auth=sp_auth) print("::debug::Successfully loaded existing Workspace") except AuthenticationException as exception: print( f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}" ) return except AuthenticationError as exception: print(f"::error::Microsoft REST Authentication Error: {exception}") return except AdalError as exception: print( f"::error::Active Directory Authentication Library Error: {exception}" ) return except ProjectSystemException as exception: print(f"::error::Workspace authorizationfailed: {exception}") return # TODO: Create compute if not existing. try: # Loading AMLCompute print("::debug::Loading existing AML Compute") cluster = AmlCompute(workspace=ws, name=parameters["name"]) # Check settings and redeploy if required settings have changed print("::debug::Found existing cluster") if cluster.vm_size.lower() != parameters["vm_size"].lower( ) or cluster.vm_priority.lower() != parameters["vm_priority"].lower(): cluster.delete() cluster.wait_for_completion(show_output=True) raise ComputeTargetException( "Cluster is of incorrect size or has incorrect priority. Deleting cluster and provisioning a new one." ) # Update AMLCompute #if cluster.provisioning_configuration.min_nodes != aml_settings["min_nodes"] or cluster.provisioning_configuration.max_nodes != aml_settings["max_nodes"] or cluster.provisioning_configuration.idle_seconds_before_scaledown != aml_settings["idle_seconds_before_scaledown"]: print("::debug::Updating settings of Cluster") cluster.update(min_nodes=parameters["min_nodes"], max_nodes=parameters["max_nodes"], idle_seconds_before_scaledown=parameters[ "idle_seconds_before_scaledown"]) # Wait until the operation has completed cluster.wait_for_completion(show_output=True) print("::debug::Successfully updated Cluster definition") except ComputeTargetException: print("::debug::Loading failed") print("::debug::Creating new AML Compute resource") compute_config = AmlCompute.provisioning_configuration( vm_size=parameters["vm_size"], vm_priority=parameters["vm_priority"], min_nodes=parameters["min_nodes"], max_nodes=parameters["max_nodes"], idle_seconds_before_scaledown=parameters[ "idle_seconds_before_scaledown"], tags=parameters["tags"], description=parameters["description"]) # Deploy to VNET if provided if parameters["vnet_resource_group_name"] and parameters[ "vnet_name"] and parameters["subnet_name"]: compute_config.vnet_resourcegroup_name = parameters[ "vnet_resource_group_name"] compute_config.vnet_name = parameters["vnet_name"] compute_config.subnet_name = parameters["subnet_name"] # Set Credentials if provided if parameters["admin_username"] and parameters["admin_user_password"]: compute_config.admin_username = parameters["admin_username"] compute_config.admin_user_password = parameters[ "admin_user_password"] elif parameters["admin_username"] and parameters["admin_user_ssh_key"]: compute_config.admin_username = parameters["admin_username"] compute_config.admin_user_ssh_key = parameters[ "admin_user_ssh_key"] # Create Compute Target cluster = ComputeTarget.create( workspace=ws, name=parameters["name"], provisioning_configuration=compute_config) # Wait until the cluster is attached cluster.wait_for_completion(show_output=True) # Checking status of AMLCompute Cluster print("::debug::Checking status of AMLCompute Cluster") if cluster.provisioning_state == "Failed": cluster.delete() raise Exception( "::debug::Deployment of AMLCompute Cluster failed with the following status: {} and logs: \n{}" .format(cluster.provisioning_state, cluster.provisioning_errors)) print(parameters) print( "::debug::Successfully finished Azure Machine Learning Compute Action")