def get_aml_compute(workspace): # TODO: Set desired name for compute target aml_compute_target = "example_vm_name" try: aml_compute = AmlCompute(workspace, aml_compute_target) print("found existing compute target.") except ComputeTargetException: print("creating new compute target") # TODO: Configure desired VM, see: https://docs.microsoft.com/nl-nl/azure/virtual-machines/sizes-general provisioning_config = AmlCompute.provisioning_configuration( vm_size="STANDARD_D1_V2", min_nodes=0, max_nodes=1, vnet_resourcegroup_name="", vnet_name="-vn", subnet_name="default", idle_seconds_before_scaledown=1800, vm_priority='lowpriority') aml_compute = ComputeTarget.create(workspace, aml_compute_target, provisioning_config) aml_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) print("Azure Machine Learning Compute attached") return aml_compute
def _create_or_update_cluster(self, min_nodes, max_nodes, idle_timeout_secs): try: self.cluster = AmlCompute(workspace=self.workspace, name=self.cluster_name) print('Updating existing cluster "{}"'.format( colored(self.cluster_name, "green"))) self.cluster.update( min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_timeout_secs, ) except ComputeTargetException: print('Creating new cluster "{}"'.format( colored(self.cluster_name, "green"))) cluster_config = AmlCompute.provisioning_configuration( vm_size=self.vm_type, min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_timeout_secs, admin_username=self.admin_username, admin_user_ssh_key=self.ssh_key, remote_login_port_public_access="Enabled", ) self.cluster = AmlCompute.create(self.workspace, self.cluster_name, cluster_config) self.cluster.wait_for_completion() if len(self.cluster_nodes) < min_nodes: sleep(30) if len(self.cluster_nodes) < min_nodes: raise RuntimeError("Failed to provision sufficient nodes")
def ComputeDelete(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] location = request.json['location'] Cluster_type = request.json['Cluster_type'] cluster_name = request.json['cluster_name'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) try: if Cluster_type == 'Training': aml_compute = AmlCompute(ws, cluster_name) print('Found existing AML compute context.') aml_compute.delete() else: aks_target = AksCompute(ws, cluster_name) print('Found existing AKS compute context.') aks_target.delete() print('compute deleted') return "compute deleted" except Exception as e: error_statement = str(e) print("Error statement: ", error_statement) return error_statement
def _deploy_azuremlcompute_clusters(workspace, default_cpu_compute_target=None, default_gpu_compute_target=None, show_output=True): cpu_compute_object = gpu_compute_object = None # Start creation of both computes if default_cpu_compute_target: cpu_compute_object = AmlCompute.create( workspace, Workspace.DEFAULT_CPU_CLUSTER_NAME, default_cpu_compute_target) if show_output: print("Deploying Compute Target with name {}".format( cpu_compute_object.name)) if default_gpu_compute_target: gpu_compute_object = AmlCompute.create( workspace, Workspace.DEFAULT_GPU_CLUSTER_NAME, default_gpu_compute_target) if show_output: print("Deploying Compute Target with name {}".format( gpu_compute_object.name)) # Wait for both computes to finish remaining_timeout_minutes = 10 # The time when both computes started creating start_time = time.time() for compute_object in [cpu_compute_object, gpu_compute_object]: if compute_object: # The time since we've started checking this specific compute compute_start_time = time.time() compute_object.wait_for_completion( show_output=False, timeout_in_minutes=remaining_timeout_minutes) compute_time_taken = time.time() - compute_start_time time_taken = round(time.time() - start_time, 2) remaining_timeout_minutes = remaining_timeout_minutes - \ (compute_time_taken / 60) provision_status = compute_object.get_status() if not provision_status or provision_status.provisioning_state != "Succeeded": errors = getattr(provision_status, "errors", []) if remaining_timeout_minutes <= 0: errors.append("Creation has exceeded timeout") raise ValueError( "Compute creation failed for {} with errors: {}".format( compute_object.name, errors)) if show_output: print("Deployed Compute Target with name {}. Took {} seconds". format(compute_object.name, time_taken))
def choose_compute_target(workspace, name): try: aml_compute = AmlCompute(workspace, name) print("Found existing compute target: {}".format(name)) except: print("Creating new compute target: {}".format(name)) provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", min_nodes=1, max_nodes=4) aml_compute = ComputeTarget.create(workspace, name, provisioning_config) aml_compute.wait_for_completion(show_output=True) print(aml_compute) return aml_compute
def create_aml_compute(self, ws): # choose a name for your cluster print("Creating new AML Compute") compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 0) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4) print(" AML Compute " + compute_name + " min nodes " + str(compute_min_nodes) + " compute max nodes " + str(compute_max_nodes)) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. just use it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster print("Starting to create ACI Compute cluster") compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def create_compute(ws, gpus): '''Creates an azure compute cluster''' if gpus == 1: # # the name for the cluster compute_name = "gpu-cluster-NC6" # compute_name = "gpu-cluster-NC4as" # # the reference to the azure machine type vm_size = 'Standard_NC6_Promo' # vm_size = 'Standard_NC4as_T4_v3' elif gpus == 2: # the name for the cluster compute_name = "gpu-cluster-NC12" # the reference to the azure machine type vm_size = 'Standard_NC12_Promo' elif gpus == 4: # the name for the cluster compute_name = "gpu-cluster-NC24" # the reference to the azure machine type vm_size = 'Standard_NC24_Promo' else: print(gpus, 'is not a valid number of GPUs. No compute was created') return # define the cluster and the max and min number of nodes provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=10) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config)
def ComputeExist(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] location = request.json['location'] Cluster_type = request.json['Cluster_type'] cluster_name = request.json['cluster_name'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') #aml_compute = AmlCompute(ws, cluster_name) #cluster_name = 'cpu-cluster' try: if Cluster_type == 'Training': aml_compute = AmlCompute(ws, cluster_name) else: aks_target = AksCompute(ws, cluster_name) print('Found existing AML compute context.') return "compute exist" except: print('need to create new Compute.') return "compute not exist"
def create_amlcompute_cluster(self, pet_cluster_name, min_nodes, max_nodes, vm_size): self.min_nodes = min_nodes self.max_nodes = max_nodes # Verify that the cluster doesn't exist already try: self.pet_compute_target = ComputeTarget(workspace=self.ws, name=pet_cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes, vnet_name=self.vnet_name, vnet_resourcegroup_name=self.rg_name, subnet_name=self.subnet_name) # create the cluster self.pet_compute_target = ComputeTarget.create( self.ws, pet_cluster_name, compute_config) self.pet_compute_target.wait_for_completion(show_output=True) # Use the 'status' property to get a detailed status for the current cluster. #print(self.pet_compute_target.status.serialize()) return self.pet_compute_target
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): # Load the environment variables from .env in case this script # is called outside an existing process load_dotenv() # Verify that cluster does not exist already try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", 'dedicated'), min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 2)), idle_seconds_before_scaledown="120" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, # vnet_name=vnet_name, # subnet_name=subnet_name ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit()
def getComputeClusterResource( ws: Workspace, compute_name: str, vm_size: str = "Standard_DS1_v2", min_nodes=0, max_nodes=1, ): if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: return compute_target # Create new compute resource: provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes) compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def exec_ComputeTargetSetup( self, Parameters: ComputeTargetSetupParameter) -> ExecResult: execResult = False old_stdout = sys.stdout sys.stdout = mystdout = StringIO() try: self.compute_target = self.ws.compute_targets[ Parameters.ClusterName] print('Found existing compute target.') self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) execResult = True except KeyError: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size='Standard_NC6', idle_seconds_before_scaledown=1800, min_nodes=0, max_nodes=4) self.compute_target = ComputeTarget.create(self.ws, Parameters.ClusterName, compute_config) execResult = True self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) except Exception as ex: print(ex) sys.stdout = old_stdout return ExecResult(execResult, mystdout.getvalue())
def create_aml_cluster(workspace, parameters): print("::debug::Creating aml cluster configuration") aml_config = AmlCompute.provisioning_configuration( vm_size=parameters.get("vm_size", None), vm_priority=parameters.get("vm_priority", "dedicated"), min_nodes=parameters.get("min_nodes", 0), max_nodes=parameters.get("max_nodes", 4), idle_seconds_before_scaledown=parameters.get("idle_seconds_before_scaledown", None), tags={"Created": "GitHub Action: Azure/aml-compute"}, description="AML Cluster created by Azure/aml-compute GitHubb Action", remote_login_port_public_access=parameters.get("remote_login_port_public_access", "NotSpecified") ) print("::debug::Adding VNET settings to configuration if all required settings were provided") if parameters.get("vnet_resource_group_name", None) and parameters.get("vnet_name", None) and parameters.get("subnet_name", None): aml_config.vnet_resourcegroup_name = parameters.get("vnet_resource_group_name", None) aml_config.vnet_name = parameters.get("vnet_name", None) aml_config.subnet_name = parameters.get("subnet_name", None) print("::debug::Adding credentials to configuration if all required settings were provided") if parameters.get("admin_username", None) and parameters.get("admin_user_password", None): aml_config.admin_username = parameters.get("admin_username", None) aml_config.admin_user_password = parameters.get("admin_user_password", None) elif parameters.get("admin_username", None) and parameters.get("admin_user_ssh_key", None): aml_config.admin_username = parameters.get("admin_username", None) aml_config.admin_user_ssh_key = parameters.get("admin_user_ssh_key", None) print("::debug::Creating compute target") aml_cluster = create_compute_target( workspace=workspace, name=parameters.get("name", None), config=aml_config ) return aml_cluster
def __check_compute_target(self, compute_target, use_gpu: bool): __vm_size = '' if isinstance(compute_target, AmlCompute): __vm_size = compute_target.vm_size elif isinstance(compute_target, str): compute = ComputeTarget(workspace=self.__workspace, name=compute_target) __vm_size = compute.vm_size if self.__vm_size_list is None: self.__vm_size_list = AmlCompute.supported_vmsizes( self.__workspace) vm_description = list( filter( lambda vmsize: str.upper(vmsize['name']) == str.upper( __vm_size), self.__vm_size_list))[0] if (use_gpu and vm_description['gpus'] == 0): raise errors.TrainingComputeException( f'gpu_compute was specified, but the target does not have GPUs: {vm_description} ' ) if (not (use_gpu) and vm_description['vCPUs'] == 0): raise errors.TrainingComputeException( f'cpu_compute was specified, but the target does not have CPUs: {vm_description} ' )
def prepare_remote_compute(ws): compute_name = os.environ.get("AML_COMPUTE_CLUSTER_NAME", "cpucluster") compute_min_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MIN_NODES", 1) compute_max_nodes = os.environ.get("AML_COMPUTE_CLUSTER_MAX_NODES", 4) # This example uses CPU VM. For using GPU VM, set SKU to STANDARD_NC6 vm_size = os.environ.get("AML_COMPUTE_CLUSTER_SKU", "STANDARD_D2_V2") if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('found compute target. Using it. ' + compute_name) else: print('creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def _create_cluster( workspace, cluster_name=_CLUSTER_NAME, vm_size=_CLUSTER_VM_SIZE, min_nodes=_CLUSTER_MIN_NODES, max_nodes=_CLUSTER_MAX_NODES, ): logger = logging.getLogger(__name__) try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) logger.info("Found existing compute target.") except ComputeTargetException: logger.info("Creating a new compute target...") compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes) # create the cluster compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current AmlCompute. logger.debug(compute_target.get_status().serialize()) return compute_target
def main(): # workspace ws = Workspace.from_config() #compute compute = AmlCompute(workspace=ws, name='gandalf') # datasource datastore = Datastore.get(ws, datastore_name='surfrider') # experiment script_params = { "--datastore": datastore.as_mount() } # Create and run experiment estimator = Estimator(source_directory='./', script_params=script_params, compute_target=compute, entry_script='train.py', use_gpu=True, pip_packages=['opencv-python>=4.1', 'tensorpack==0.9.8', 'tensorflow-gpu>=1.3,<2.0', 'tqdm>=4.36.1', 'cython>=0.29.13', 'scipy>=1.3.1', 'ffmpeg-python', 'wget']) exp = Experiment(ws, 'surfrider_rcnn') run = exp.submit(estimator)
def createCompute(ws, args): compute_name = args.clusterName if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print("Found compute target : {0}".format(compute_name)) else: print("Compute target {0} not found.".format(compute_name)) compute_min_nodes = args.minNodes compute_max_nodes = args.maxNodes vm_size = args.clusterSku print("Creating a new compute target {0}.".format(compute_name)) provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=compute_min_nodes, max_nodes=compute_max_nodes) # create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) if (args.verbose): # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def _create_cluster(workspace, cluster_name, vm_size, min_nodes, max_nodes): """Creates AzureML cluster Args: cluster_name (string): The name you wish to assign the cluster. vm_size (string): The type of sku to use for your vm. min_nodes (int): Minimum number of nodes in cluster. Use 0 if you don't want to incur costs when it isn't being used. max_nodes (int): Maximum number of nodes in cluster. """ logger = logging.getLogger(__name__) try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) logger.info("Found existing compute target.") except ComputeTargetException: logger.info("Creating a new compute target...") compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes ) # create the cluster compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current AmlCompute. logger.debug(compute_target.serialize()) return compute_target
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): load_dotenv() try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=os.environ.get("AML_CLUSTER_PRIORITY", 'lowpriority'), min_nodes=int(os.environ.get("AML_CLUSTER_MIN_NODES", 0)), max_nodes=int(os.environ.get("AML_CLUSTER_MAX_NODES", 4)), idle_seconds_before_scaledown="300") compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit()
def getComputeAML(ws, name="amlcluster"): # Azure ML compute configuration if name in ws.compute_targets: compute_target = ws.compute_targets[name] if compute_target and type(compute_target) is AmlCompute: print(f"### Found existing cluster '{name}' so will use it") return compute_target else: nodesMin = int(os.environ.get('AZML_COMPUTE_MIN_NODES', "0")) nodesMax = int(os.environ.get('AZML_COMPUTE_MAX_NODES', "3")) vmSize = os.environ.get('AZML_COMPUTE_VMSIZE', "Standard_D3_v2") print(f"### Creating cluster '{name}' this could take time...") provisioning_config = AmlCompute.provisioning_configuration( vm_size=vmSize, min_nodes=nodesMin, max_nodes=nodesMax, idle_seconds_before_scaledown=3600) # create the cluster compute_target = ComputeTarget.create(ws, name, provisioning_config) # can poll for a minimum number of nodes and for a specific timeout. # if no min node count is provided it will use the scale settings for the cluster compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # For a more detailed view of current AmlCompute status, use get_status() print(compute_target.get_status().serialize()) return compute_target
def get_compute(workspace: Workspace, compute_name: str, vm_size: str): try: if compute_name in workspace.compute_targets: compute_target = workspace.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found existing compute target ' + compute_name + ' so using it.') else: e = Env() compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, vm_priority=e.vm_priority, min_nodes=e.min_nodes, max_nodes=e.max_nodes, idle_seconds_before_scaledown="300" # #Uncomment the below lines for VNet support # vnet_resourcegroup_name=vnet_resourcegroup_name, # vnet_name=vnet_name, # subnet_name=subnet_name ) compute_target = ComputeTarget.create(workspace, compute_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=10) return compute_target except ComputeTargetException as e: print(e) print('An error occurred trying to provision compute.') exit(1)
def supported_vm_sizes(ws): """ Get vm sizes available for your region :param ws: azureml Workspace instance :return: list """ return [size for size in AmlCompute.supported_vmsizes(workspace=ws)]
def get_compute(workspace, cluster_name, vm_size='STANDARD_NC6', max_nodes=4): """ Get or create a compute cluster. If a cluster with the provided name already exists in this workspace, return it. Otherwise, create a new one. :param workspace: The Azure ML workspace to use. :param cluster_name: Name of the cluster to find or create. :param vm_size: Type/size of VM to create on AzureML, if no cluster was found. :param max_nodes: Max number of nodes to give to this cluster. :returns: A ComputeTarget object. """ try: compute_target = ComputeTarget(workspace=workspace, name=cluster_name) print('Found existing compute target') except ComputeTargetException: print('Creating a new compute target...') compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, max_nodes=max_nodes) compute_target = ComputeTarget.create(workspace, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) return compute_target
def create_cluster(ws, cluster_name, vm_size, max_nodes): if cluster_name is None: cluster_name = input("name of the cluster : ") else: print("using cluster : ", cluster_name) if vm_size is None: vm_size = input("size of your VM : ") else: print("using vm size of : ", vm_size) if max_nodes is None: max_nodes = input("maximum amount of nodes on the cluster : ") else: print("maximum amount of nodes is : ", max_nodes) from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException # Verify that cluster does not exist already try: cluster = ComputeTarget(workspace=ws, name=cluster_name) print("Found existing cluster") except ComputeTargetException: print("Creating new cluster") # Specify the configuration for the new cluster compute_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=max_nodes) # Create the cluster with the specified name and configuration cluster = ComputeTarget.create(ws, cluster_name, compute_config) # Wait for the cluster to complete, show the output log cluster.wait_for_completion(show_output=True) return cluster
def get_compute_object(ws, compute_name, size="STANDARD_NC6", min_nodes=1, max_nodes=4): """ get_compute_object - Retrieves a AMLS compute object. :param Workspace ws: AMLS Workspace object. :param str compute_name: AMLS compute name. :returns: MLS compute target :rtype: azureml.core.compute.ComputeTarget """ if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] else: provisioning_config = AmlCompute.provisioning_configuration( vm_size=size, min_nodes=min_nodes, max_nodes=max_nodes) # Create the cluster compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True) return compute_target
def _setup_compute(self): """ sets up the compute in the azureml workspace. Either retrieves a pre-existing compute target or creates one (uses environment variables). :returns: compute_target :rtype: ComputeTarget """ if self.comp_name in self.ws.compute_targets: self.compute_target = self.ws.compute_targets[self.comp_name] if self.compute_target and type(self.compute_target) is AmlCompute: print("Found compute target: " + self.comp_name) else: print("creating a new compute target...") p_cfg = AmlCompute.provisioning_configuration( vm_size=self.comp_vm_size, min_nodes=self.comp_min_nodes, max_nodes=self.comp_max_nodes) self.compute_target = ComputeTarget.create(self.ws, self.comp_name, p_cfg) self.compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) print(self.compute_target.get_status().serialize()) return self.compute_target
def create_aml_cluster(workspace, parameters): print("::debug::Creating aml cluster configuration") aml_config = AmlCompute.provisioning_configuration( vm_size=parameters.get("vm_size", "Standard_DS3_v2"), vm_priority=parameters.get("vm_priority", "dedicated"), min_nodes=parameters.get("min_nodes", 0), max_nodes=parameters.get("max_nodes", 4), idle_seconds_before_scaledown=parameters.get( "idle_seconds_before_scaledown", None), tags={"Created": "GitHub Action: Azure/aml-compute"}, description="AML Cluster created by Azure/aml-compute GitHub Action", remote_login_port_public_access=parameters.get( "remote_login_port_public_access", "NotSpecified")) print( "::debug::Adding VNET settings to configuration if all required settings were provided" ) if parameters.get( "vnet_resource_group_name", None) is not None and parameters.get( "vnet_name", None) is not None and parameters.get( "subnet_name", None) is not None: aml_config.vnet_resourcegroup_name = parameters.get( "vnet_resource_group_name", None) aml_config.vnet_name = parameters.get("vnet_name", None) aml_config.subnet_name = parameters.get("subnet_name", None) print( "::debug::Adding credentials to configuration if all required settings were provided" ) if os.environ.get("ADMIN_USER_NAME", None) is not None and os.environ.get( "ADMIN_USER_PASSWORD", None) is not None: aml_config.admin_username = os.environ.get("ADMIN_USER_NAME", None) aml_config.admin_user_password = os.environ.get( "ADMIN_USER_PASSWORD", None) elif os.environ.get("ADMIN_USER_NAME", None) is not None and os.environ.get( "ADMIN_USER_SSH_KEY", None) is not None: aml_config.admin_username = os.environ.get("ADMIN_USER_NAME", None) aml_config.admin_user_ssh_key = os.environ.get("ADMIN_USER_SSH_KEY", None) print( "::debug::Adding identity settings to configuration if all required settings were provided" ) if parameters.get("identity_type", None) == "UserAssigned" and parameters.get( "identity_id", None) is not None: aml_config.identity_type = parameters.get("identity_type", None) aml_config.identity_id = parameters.get("identity_id", None) print("::debug::Creating compute target") # Default compute target name repository_name = str( os.environ.get("GITHUB_REPOSITORY")).split("/")[-1][:16] aml_cluster = create_compute_target(workspace=workspace, name=parameters.get( "name", repository_name), config=aml_config) return aml_cluster
def get_compute(ws: Workspace, compute_target: str) -> ComputeTarget: if not compute_target in ws.compute_targets: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', min_nodes=1, max_nodes=4) cluster = ComputeTarget.create(ws, compute_target, compute_config) cluster.wait_for_completion(show_output=True) return ws.compute_targets[compute_target]
def get_aml_ws_sizes(self, aml_ws_name): ws = get_aml_ws(self.config, aml_ws_name) # TODO: make this an xt cmd: xt list sizes from azureml.core.compute import ComputeTarget, AmlCompute sizes = AmlCompute.supported_vmsizes(workspace=ws) # for size in sizes: # if size["gpus"] > 0: # console.print(size) return sizes