def _deploy_azuremlcompute_clusters(workspace, default_cpu_compute_target=None, default_gpu_compute_target=None, show_output=True): cpu_compute_object = gpu_compute_object = None # Start creation of both computes if default_cpu_compute_target: cpu_compute_object = AmlCompute.create( workspace, Workspace.DEFAULT_CPU_CLUSTER_NAME, default_cpu_compute_target) if show_output: print("Deploying Compute Target with name {}".format( cpu_compute_object.name)) if default_gpu_compute_target: gpu_compute_object = AmlCompute.create( workspace, Workspace.DEFAULT_GPU_CLUSTER_NAME, default_gpu_compute_target) if show_output: print("Deploying Compute Target with name {}".format( gpu_compute_object.name)) # Wait for both computes to finish remaining_timeout_minutes = 10 # The time when both computes started creating start_time = time.time() for compute_object in [cpu_compute_object, gpu_compute_object]: if compute_object: # The time since we've started checking this specific compute compute_start_time = time.time() compute_object.wait_for_completion( show_output=False, timeout_in_minutes=remaining_timeout_minutes) compute_time_taken = time.time() - compute_start_time time_taken = round(time.time() - start_time, 2) remaining_timeout_minutes = remaining_timeout_minutes - \ (compute_time_taken / 60) provision_status = compute_object.get_status() if not provision_status or provision_status.provisioning_state != "Succeeded": errors = getattr(provision_status, "errors", []) if remaining_timeout_minutes <= 0: errors.append("Creation has exceeded timeout") raise ValueError( "Compute creation failed for {} with errors: {}".format( compute_object.name, errors)) if show_output: print("Deployed Compute Target with name {}. Took {} seconds". format(compute_object.name, time_taken))
def _create_or_update_cluster(self, min_nodes, max_nodes, idle_timeout_secs): try: self.cluster = AmlCompute(workspace=self.workspace, name=self.cluster_name) print('Updating existing cluster "{}"'.format( colored(self.cluster_name, "green"))) self.cluster.update( min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_timeout_secs, ) except ComputeTargetException: print('Creating new cluster "{}"'.format( colored(self.cluster_name, "green"))) cluster_config = AmlCompute.provisioning_configuration( vm_size=self.vm_type, min_nodes=min_nodes, max_nodes=max_nodes, idle_seconds_before_scaledown=idle_timeout_secs, admin_username=self.admin_username, admin_user_ssh_key=self.ssh_key, remote_login_port_public_access="Enabled", ) self.cluster = AmlCompute.create(self.workspace, self.cluster_name, cluster_config) self.cluster.wait_for_completion() if len(self.cluster_nodes) < min_nodes: sleep(30) if len(self.cluster_nodes) < min_nodes: raise RuntimeError("Failed to provision sufficient nodes")
def ComputeCompute(): subscription_id = request.json['subscription_id'] resource_group = request.json['resource_group'] workspace_name = request.json['workspace_name'] location = request.json['location'] cluster_name = request.json['cluster_name'] vm_size = request.json['vm_size'] min_nodes = request.json['min_nodes'] max_nodes = request.json['max_nodes'] ws = Workspace(subscription_id=subscription_id, resource_group=resource_group, workspace_name=workspace_name) print("Found workspace {} at location {}".format(ws.name, ws.location)) print('Found existing Workspace.') #aml_compute = AmlCompute(ws, cluster_name) #cluster_name = 'cpu-cluster' try: aml_compute = AmlCompute(ws, cluster_name) print('Found existing AML compute context.') return "Found existing AML compute context." except: print('need to create new Compute.') print('Creating new AML compute context.') aml_config = AmlCompute.provisioning_configuration(vm_size=vm_size, min_nodes=min_nodes, max_nodes=max_nodes) aml_compute = AmlCompute.create(ws, name=cluster_name, provisioning_configuration=aml_config) aml_compute.wait_for_completion(show_output=True) return "Compute successfully created"
from azureml.core import Workspace from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException ws = Workspace.get(name='akws', subscription_id='8b3748c0-bb0b-4913-ab5b-c462062118fe', resource_group='akrg') cpu_cluster_name = 'tdsp-cluster' #verify that cluster does not exist try: cpu_cluster = AmlCompute(workspace=ws, name=cpu_cluster_name) print('Cluster already exists.') except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size='Standard_NC6', max_nodes=4) cpu_cluster = AmlCompute.create(ws, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True)
pd.set_option('display.max_colwidth', -1) outputDf = pd.DataFrame(data=output, index=['']) outputDf.T from azureml.core.compute import AmlCompute aml_name = 'cpu-cluster' try: aml_compute = AmlCompute(ws, aml_name) print('Found existing AML compute context.') except: print('Creating new AML compute context.') aml_config = AmlCompute.provisioning_configuration( vm_size="Standard_D2_v2", min_nodes=1, max_nodes=4) aml_compute = AmlCompute.create(ws, name=aml_name, provisioning_configuration=aml_config) aml_compute.wait_for_completion(show_output=True) #writefile get_data.py from sklearn import datasets from sklearn.model_selection import train_test_split from scipy import sparse import numpy as np #def get_data(): boston = pd.read_csv( 'C:\\Users\\datacore\\OneDrive\\Desktop\\Capstone Project\\train_values_wJZrCmI.csv' ) X = boston.drop(columns=['poverty_probability'])
# Create Batch AI Cluster compute_target_name = 'myazbai' try: batch_ai_compute = AmlCompute(workspace=ws, name=compute_target_name) print('found existing Azure Batch AI cluster:', batch_ai_compute.name) except ComputeTargetException: print('creating new Azure Batch AI cluster...') batch_ai_config = AmlCompute.provisioning_configuration( vm_size="Standard_NC6", vm_priority="dedicated", min_nodes=0, max_nodes=4, idle_seconds_before_scaledown=300) batch_ai_compute = AmlCompute.create( ws, name=compute_target_name, provisioning_configuration=batch_ai_config) batch_ai_compute.wait_for_completion(show_output=True) project_folder = './tmp/automl-remote-batchai' if not os.path.exists(project_folder): os.makedirs(project_folder) shutil.copy('./scripts/get_data.py', project_folder) print("Training the model...") # configure Auto ML automl_config = AutoMLConfig(task='classification', debug_log='automl_errors.log', primary_metric='AUC_weighted', iteration_timeout_minutes=2,
from azureml.core.conda_dependencies import CondaDependencies #created an env my_env = Environment("My_new_env") conda_dep = CondaDependencies.create(conda_packages=['scikit-learn']) my_env.python.conda_dependencies = conda_dep my_env.register() #creating the cluster from azureml.core.compute import AmlCompute cluster_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D11_V2"), max_nodes = 2) cluster = AmlCompute.create(ws, "My_cluster", cluster_config) cluster.wait_for_completion() #fetching the data input_ds = ws.datasets.get("Loan Application") #for ScriptRunning from azureml.core import ScriptRunConfig, Experiment script_run = ScriptRunConfig(source_directory = ".", script = "hyperdrive_script.py", arguments = ["--input_data", input_ds.as_named_input("raw_data")], environment = my_env, compute_taret = cluster
experiment_name = 'azureautoml' project_folder = 'remote_automl' nodes = 4 dsvm_name = 'dsvmaml' try: dsvm_compute = AmlCompute(ws, dsvm_name) print('found existing dsvm.') except: print('creating new dsvm.') # Below is using a VM of SKU Standard_D2_v2 which is 2 core machine. You can check Azure virtual machines documentation for additional SKUs of VMs. dsvm_config = AmlCompute.provisioning_configuration(vm_size="Standard_NC6", max_nodes=nodes, min_nodes=0) dsvm_compute = AmlCompute.create(ws, name=dsvm_name, provisioning_configuration=dsvm_config) dsvm_compute.wait_for_completion(show_output=True) automl_settings = { "name": "AutoML_Demo_Experiment_{0}".format(time.time()), "iteration_timeout_minutes": 60, "iterations": 20, "n_cross_validations": 5, "primary_metric": 'accuracy', "preprocess": True, "verbosity": logging.INFO, "max_concurrent_iterations": nodes } ## note here that the project folder gets uploaded to our DSVM.