print("all args: ", args) with open('config.json', 'r') as f: config = json.load(f) try: svc_pr = ServicePrincipalAuthentication( tenant_id=config['tenant_id'], service_principal_id=config['service_principal_id'], service_principal_password=config['service_principal_password']) except KeyError as e: print("Getting Service Principal Authentication from Azure Devops") svr_pr = None pass ws = Workspace.from_config(auth=svc_pr) input_dir = os.path.dirname(args.input_dir) with open(os.path.join(input_dir, 'data_metrics')) as f: metrics = json.load(f) best_loss = 1.0 best_run_id = None print(metrics) for run in metrics.keys(): try: loss = metrics[run]['val_loss'][-1] if loss < best_loss: best_loss = loss
def main(): # Loading azure credentials print("::debug::Loading azure credentials") azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}") try: azure_credentials = json.loads(azure_credentials) except JSONDecodeError: print( "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS. The JSON should include the following keys: 'tenantId', 'clientId', 'clientSecret' and 'subscriptionId'." ) raise AMLConfigurationException( "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-workspace/blob/master/README.md" ) # Checking provided parameters print("::debug::Checking provided parameters") validate_json(data=azure_credentials, schema=azure_credentials_schema, input_name="AZURE_CREDENTIALS") # Mask values print("::debug::Masking parameters") mask_parameter(parameter=azure_credentials.get("tenantId", "")) mask_parameter(parameter=azure_credentials.get("clientId", "")) mask_parameter(parameter=azure_credentials.get("clientSecret", "")) mask_parameter(parameter=azure_credentials.get("subscriptionId", "")) # Loading parameters file print("::debug::Loading parameters file") parameters_file = os.environ.get("INPUT_PARAMETERS_FILE", default="compute.json") parameters_file_path = os.path.join(".cloud", ".azure", parameters_file) try: with open(parameters_file_path) as f: parameters = json.load(f) except FileNotFoundError: print( f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/compute.json)." ) parameters = {} # Checking provided parameters print("::debug::Checking provided parameters") validate_json(data=parameters, schema=parameters_schema, input_name="PARAMETERS_FILE") # Define target cloud if azure_credentials.get( "resourceManagerEndpointUrl", "").startswith("https://management.usgovcloudapi.net"): cloud = "AzureUSGovernment" elif azure_credentials.get( "resourceManagerEndpointUrl", "").startswith("https://management.chinacloudapi.cn"): cloud = "AzureChinaCloud" else: cloud = "AzureCloud" # Loading Workspace print("::debug::Loading AML Workspace") sp_auth = ServicePrincipalAuthentication( tenant_id=azure_credentials.get("tenantId", ""), service_principal_id=azure_credentials.get("clientId", ""), service_principal_password=azure_credentials.get("clientSecret", ""), cloud=cloud) config_file_path = os.environ.get("GITHUB_WORKSPACE", default=".cloud/.azure") config_file_name = "aml_arm_config.json" try: ws = Workspace.from_config(path=config_file_path, _file_name=config_file_name, auth=sp_auth) except AuthenticationException as exception: print( f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}" ) raise AuthenticationException except AuthenticationError as exception: print(f"::error::Microsoft REST Authentication Error: {exception}") raise AuthenticationError except AdalError as exception: print( f"::error::Active Directory Authentication Library Error: {exception}" ) raise AdalError except ProjectSystemException as exception: print(f"::error::Workspace authorizationfailed: {exception}") raise ProjectSystemException # Loading compute target try: # Default compute target name repository_name = os.environ.get("GITHUB_REPOSITORY").split( "/")[-1][:16] # names can be max 16 characters print("::debug::Loading existing compute target") compute_target = ComputeTarget(workspace=ws, name=parameters.get( "name", repository_name)) print( f"::debug::Found compute target with same name. Not updating the compute target: {compute_target.serialize()}" ) except ComputeTargetException: print( "::debug::Could not find existing compute target with provided name" ) # Checking provided parameters print("::debug::Checking provided parameters") required_parameters_provided( parameters=parameters, keys=["compute_type"], message= "Required parameter(s) not found in your parameters file for creating a compute target. Please provide a value for the following key(s): " ) print("::debug::Creating new compute target") compute_type = parameters.get("compute_type", "") print(f"::debug::Compute type listed is{compute_type}") if compute_type == "amlcluster": compute_target = create_aml_cluster(workspace=ws, parameters=parameters) print( f"::debug::Successfully created AML cluster: {compute_target.serialize()}" ) elif compute_type == "akscluster": compute_target = create_aks_cluster(workspace=ws, parameters=parameters) print( f"::debug::Successfully created AKS cluster: {compute_target.serialize()}" ) else: print(f"::error::Compute type '{compute_type}' is not supported") raise AMLConfigurationException( f"Compute type '{compute_type}' is not supported.") print( "::debug::Successfully finished Azure Machine Learning Compute Action")
def main(): # Loading input values print("::debug::Loading input values") model_name = os.environ.get("INPUT_MODEL_NAME", default=None) model_version = os.environ.get("INPUT_MODEL_VERSION", default=None) # Casting input values print("::debug::Casting input values") try: model_version = int(model_version) except TypeError as exception: print(f"::debug::Could not cast model version to int: {exception}") model_version = None except ValueError as exception: print(f"::debug::Could not cast model version to int: {exception}") model_version = None # Loading azure credentials print("::debug::Loading azure credentials") azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}") try: azure_credentials = json.loads(azure_credentials) except JSONDecodeError: print( "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS" ) raise AMLConfigurationException( "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-compute/blob/master/README.md" ) # Checking provided parameters print("::debug::Checking provided parameters") validate_json(data=azure_credentials, schema=azure_credentials_schema, input_name="AZURE_CREDENTIALS") # Mask values print("::debug::Masking parameters") mask_parameter(parameter=azure_credentials.get("tenantId", "")) mask_parameter(parameter=azure_credentials.get("clientId", "")) mask_parameter(parameter=azure_credentials.get("clientSecret", "")) mask_parameter(parameter=azure_credentials.get("subscriptionId", "")) # Loading parameters file print("::debug::Loading parameters file") parameters_file = os.environ.get("INPUT_PARAMETERS_FILE", default="deploy.json") parameters_file_path = os.path.join(".cloud", ".azure", parameters_file) try: with open(parameters_file_path) as f: parameters = json.load(f) except FileNotFoundError: print( f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/deploy.json)." ) parameters = {} # Checking provided parameters print("::debug::Checking provided parameters") validate_json(data=parameters, schema=parameters_schema, input_name="PARAMETERS_FILE") # Define target cloud if azure_credentials.get( "resourceManagerEndpointUrl", "").startswith("https://management.usgovcloudapi.net"): cloud = "AzureUSGovernment" elif azure_credentials.get( "resourceManagerEndpointUrl", "").startswith("https://management.chinacloudapi.cn"): cloud = "AzureChinaCloud" else: cloud = "AzureCloud" # Loading Workspace print("::debug::Loading AML Workspace") sp_auth = ServicePrincipalAuthentication( tenant_id=azure_credentials.get("tenantId", ""), service_principal_id=azure_credentials.get("clientId", ""), service_principal_password=azure_credentials.get("clientSecret", ""), cloud=cloud) config_file_path = os.environ.get("GITHUB_WORKSPACE", default=".cloud/.azure") config_file_name = "aml_arm_config.json" try: ws = Workspace.from_config(path=config_file_path, _file_name=config_file_name, auth=sp_auth) except AuthenticationException as exception: print( f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}" ) raise AuthenticationException except AuthenticationError as exception: print(f"::error::Microsoft REST Authentication Error: {exception}") raise AuthenticationError except AdalError as exception: print( f"::error::Active Directory Authentication Library Error: {exception}" ) raise AdalError except ProjectSystemException as exception: print(f"::error::Workspace authorizationfailed: {exception}") raise ProjectSystemException # Loading deployment target print("::debug::Loading deployment target") try: deployment_target = ComputeTarget(workspace=ws, name=parameters.get( "deployment_compute_target", "")) except ComputeTargetException: deployment_target = None except TypeError: deployment_target = None # Loading model print("::debug::Loading model") try: model = Model(workspace=ws, name=model_name, version=model_version) except WebserviceException as exception: print( f"::error::Could not load model with provided details: {exception}" ) raise AMLConfigurationException( f"Could not load model with provided details: {exception}") # Creating inference config print("::debug::Creating inference config") if os.environ.get("CONTAINER_REGISTRY_ADRESS", None) is not None: container_registry = ContainerRegistry() container_registry.address = os.environ.get( "CONTAINER_REGISTRY_ADRESS", None) container_registry.username = os.environ.get( "CONTAINER_REGISTRY_USERNAME", None) container_registry.password = os.environ.get( "CONTAINER_REGISTRY_PASSWORD", None) else: container_registry = None try: inference_config = InferenceConfig( entry_script=parameters.get("inference_entry_script", "score.py"), runtime=parameters.get("runtime", "python"), conda_file=parameters.get("conda_file", "environment.yml"), extra_docker_file_steps=parameters.get("extra_docker_file_steps", None), source_directory=parameters.get("inference_source_directory", "code/deploy/"), enable_gpu=parameters.get("enable_gpu", None), description=parameters.get("description", None), base_image=parameters.get("base_image", None), base_image_registry=container_registry, cuda_version=parameters.get("cuda_version", None)) except WebserviceException as exception: print( f"::debug::Failed to create InferenceConfig. Trying to create no code deployment: {exception}" ) inference_config = None except TypeError as exception: print( f"::debug::Failed to create InferenceConfig. Trying to create no code deployment: {exception}" ) inference_config = None # Loading run config print("::debug::Loading run config") model_resource_config = model.resource_configuration cpu_cores = get_resource_config(config=parameters.get("cpu_cores", None), resource_config=model_resource_config, config_name="cpu") memory_gb = get_resource_config(config=parameters.get("memory_gb", None), resource_config=model_resource_config, config_name="memory_in_gb") gpu_cores = get_resource_config(config=parameters.get("gpu_cores", None), resource_config=model_resource_config, config_name="gpu") # Creating deployment config print("::debug::Creating deployment config") if type(deployment_target) is AksCompute: deployment_config = AksWebservice.deploy_configuration( autoscale_enabled=parameters.get("autoscale_enabled", None), autoscale_min_replicas=parameters.get("autoscale_min_replicas", None), autoscale_max_replicas=parameters.get("autoscale_max_replicas", None), autoscale_refresh_seconds=parameters.get( "autoscale_refresh_seconds", None), autoscale_target_utilization=parameters.get( "autoscale_target_utilization", None), collect_model_data=parameters.get("model_data_collection_enabled", None), auth_enabled=parameters.get("authentication_enabled", None), cpu_cores=cpu_cores, memory_gb=memory_gb, enable_app_insights=parameters.get("app_insights_enabled", None), scoring_timeout_ms=parameters.get("scoring_timeout_ms", None), replica_max_concurrent_requests=parameters.get( "replica_max_concurrent_requests", None), max_request_wait_time=parameters.get("max_request_wait_time", None), num_replicas=parameters.get("num_replicas", None), primary_key=os.environ.get("PRIMARY_KEY", None), secondary_key=os.environ.get("SECONDARY_KEY", None), tags=parameters.get("tags", None), properties=parameters.get("properties", None), description=parameters.get("description", None), gpu_cores=gpu_cores, period_seconds=parameters.get("period_seconds", None), initial_delay_seconds=parameters.get("initial_delay_seconds", None), timeout_seconds=parameters.get("timeout_seconds", None), success_threshold=parameters.get("success_threshold", None), failure_threshold=parameters.get("failure_threshold", None), namespace=parameters.get("namespace", None), token_auth_enabled=parameters.get("token_auth_enabled", None)) else: deployment_config = AciWebservice.deploy_configuration( cpu_cores=cpu_cores, memory_gb=memory_gb, tags=parameters.get("tags", None), properties=parameters.get("properties", None), description=parameters.get("description", None), location=parameters.get("location", None), auth_enabled=parameters.get("authentication_enabled", None), ssl_enabled=parameters.get("ssl_enabled", None), enable_app_insights=parameters.get("app_insights_enabled", None), ssl_cert_pem_file=parameters.get("ssl_cert_pem_file", None), ssl_key_pem_file=parameters.get("ssl_key_pem_file", None), ssl_cname=parameters.get("ssl_cname", None), dns_name_label=parameters.get("dns_name_label", None), primary_key=os.environ.get("PRIMARY_KEY", None), secondary_key=os.environ.get("SECONDARY_KEY", None), collect_model_data=parameters.get("model_data_collection_enabled", None), cmk_vault_base_url=os.environ.get("CMK_VAULT_BASE_URL", None), cmk_key_name=os.environ.get("CMK_KEY_NAME", None), cmk_key_version=os.environ.get("CMK_KEY_VERSION", None)) # Deploying model print("::debug::Deploying model") try: # Default service name repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1] branch_name = os.environ.get("GITHUB_REF").split("/")[-1] default_service_name = f"{repository_name}-{branch_name}".lower( ).replace("_", "-")[:32] service = Model.deploy(workspace=ws, name=parameters.get("name", default_service_name), models=[model], inference_config=inference_config, deployment_config=deployment_config, deployment_target=deployment_target, overwrite=True) service.wait_for_deployment(show_output=True) except WebserviceException as exception: print(f"::error::Model deployment failed with exception: {exception}") service_logs = service.get_logs() raise AMLDeploymentException( f"Model deployment failedlogs: {service_logs} \nexception: {exception}" ) # Checking status of service print("::debug::Checking status of service") if service.state != "Healthy": service_logs = service.get_logs() print( f"::error::Model deployment failed with state '{service.state}': {service_logs}" ) raise AMLDeploymentException( f"Model deployment failed with state '{service.state}': {service_logs}" ) if parameters.get("test_enabled", False): # Testing service print("::debug::Testing service") root = os.environ.get("GITHUB_WORKSPACE", default=None) test_file_path = parameters.get("test_file_path", "code/test/test.py") test_file_function_name = parameters.get("test_file_function_name", "main") print("::debug::Adding root to system path") sys.path.insert(1, f"{root}") print("::debug::Importing module") test_file_path = f"{test_file_path}.py" if not test_file_path.endswith( ".py") else test_file_path try: test_spec = importlib.util.spec_from_file_location( name="testmodule", location=test_file_path) test_module = importlib.util.module_from_spec(spec=test_spec) test_spec.loader.exec_module(test_module) test_function = getattr(test_module, test_file_function_name, None) except ModuleNotFoundError as exception: print( f"::error::Could not load python script in your repository which defines theweb service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) raise AMLConfigurationException( f"Could not load python script in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) except FileNotFoundError as exception: print( f"::error::Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) raise AMLConfigurationException( f"Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) except AttributeError as exception: print( f"::error::Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) raise AMLConfigurationException( f"Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) # Load experiment config print("::debug::Loading experiment config") try: test_function(service) except TypeError as exception: print( f"::error::Could not load experiment config from your module (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) raise AMLConfigurationException( f"Could not load experiment config from your module (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}" ) except Exception as exception: print( f"::error::The webservice tests did not complete successfully: {exception}" ) raise AMLDeploymentException( f"The webservice tests did not complete successfully: {exception}" ) # Deleting service if desired if parameters.get("delete_service_after_deployment", False): service.delete() else: # Create outputs print("::debug::Creating outputs") print(f"::set-output name=service_scoring_uri::{service.scoring_uri}") print(f"::set-output name=service_swagger_uri::{service.swagger_uri}") print( "::debug::Successfully finished Azure Machine Learning Deploy Action")
dest="path", required=True) args = parser.parse_args() print("Argument 1: %s" % args.aml_compute_target) print("Argument 2: %s" % args.model_name) print("Argument 3: %s" % args.build_number) print("Argument 4: %s" % args.image_name) print("Argument 5: %s" % args.path) print('creating AzureCliAuthentication...') cli_auth = AzureCliAuthentication() print('done creating AzureCliAuthentication!') print('get workspace...') ws = Workspace.from_config(path=args.path, auth=cli_auth) print('done getting workspace!') print("looking for existing compute target.") aml_compute = AmlCompute(ws, args.aml_compute_target) print("found existing compute target.") # Create a new runconfig object run_amlcompute = RunConfiguration() # Use the cpu_cluster you created above. run_amlcompute.target = args.aml_compute_target # Enable Docker run_amlcompute.environment.docker.enabled = True
from azureml.core.model import InferenceConfig from azureml.core.environment import Environment from azureml.core import Workspace from azureml.core.model import Model from azureml.core.webservice import AciWebservice ws = Workspace.from_config(path='./.azureml', _file_name='config.json') model = Model(ws, name='digits_model', version=2) env = Environment.from_conda_specification( name='sklearn-aml-env', file_path='./.azureml/sklearn-env-aml.yml') inference_config = InferenceConfig(entry_script="./src/score2.py", environment=env) deployment_config = AciWebservice.deploy_configuration(cpu_cores=1, memory_gb=1) aci_service = Model.deploy(workspace=ws, name='digits-model-service2', models=[model], inference_config=inference_config, deployment_config=deployment_config) aci_service.wait_for_deployment(show_output=True) print(aci_service.state)
# tutorial/02-create-compute.py from azureml.core import Workspace from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException ws = Workspace.from_config( ) # This automatically looks for a directory .azureml # Choose a name for your CPU cluster cpu_cluster_name = "cpu-cluster" # Verify that the cluster does not exist already try: cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name) print('Found existing cluster, use it.') except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_D2_V2', max_nodes=6, idle_seconds_before_scaledown=2400) cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True)
} akscomputes = { "aks-cpu-deploy": { "vm_size": "STANDARD_DS3_V2", "agent_count": 3, }, "aks-gpu-deploy": { "vm_size": "STANDARD_NC6S_V3", "agent_count": 3, }, } # create or get Workspace try: ws = Workspace.from_config(args.config) except: ws = Workspace.create( args.workspace_name, subscription_id=args.subscription_id, resource_group=args.resource_group, location=args.location, create_resource_group=True, exist_ok=True, show_output=True, ) ws.write_config() # create aml compute targets for ct_name in amlcomputes: if ct_name not in ws.compute_targets:
print("Argument 1: %s" % args.service_name) print("Argument 2: %s" % args.aks_name) print("Argument 3: %s" % args.aks_region) print("Argument 4: %s" % args.description) print('..4.completed') print('') print('') print('5. Authenticating with AzureCliAuthentication...') clientAuthn = AzureCliAuthentication() print('..5.completed') print('') print('') print('6. Instantiate AML workspace') amlWs = Workspace.from_config(auth=clientAuthn) print('..6.completed') print('') print('') print('7. Instantiate image') containerImage = Image(amlWs, id=image_id) print(containerImage) print('..7.completed') print('') print('') print('8. Check for and delete any existing web service instance') aksName = args.aks_name aksRegion = args.aks_region
def __init__(self, ctx): self.ctx = ctx self.name = ctx.config['config'].get('name', None) self.source = ctx.config['config'].get('source', None) self.target = ctx.config['config'].get('target', None) self.exclude = ctx.config['config'].get('exclude', None) self.budget = ctx.config['config'].get('budget', None) # per provider experiment settings self.metric = ctx.config['azure'].get('experiment/metric', 'spearman_correlation') self.cross_validation_folds = ctx.config['azure'].get( 'experiment/cross_validation_folds', 5) self.max_total_time = ctx.config['azure'].get( 'experiment/max_total_time', 60) self.iteration_timeout_minutes = ctx.config['azure'].get( 'experiment/iteration_timeout_minutes', 10) self.max_n_trials = ctx.config['azure'].get('experiment/max_n_trials', 10) self.use_ensemble = ctx.config['azure'].get('experiment/use_ensemble', False) # per provider compute settings self.subscription_id = ctx.config['azure'].get( 'subscription_id', os.environ.get("AZURE_SUBSCRIPTION_ID")) self.workspace = ctx.config['azure'].get('workspace', self.name + '_ws') self.resource_group = ctx.config['azure'].get('resource_group', self.name + '_resources') # cluster specific options self.compute_cluster = ctx.config['azure'].get('cluster/name', 'cpucluster') self.compute_region = ctx.config['azure'].get('cluster/region', 'eastus2') self.compute_min_nodes = ctx.config['azure'].get( 'cluster/min_nodes', 0) self.compute_max_nodes = ctx.config['azure'].get( 'cluster/max_nodes', 4) self.compute_sku = ctx.config['azure'].get('cluster/type', 'STANDARD_D2_V2') # check core SDK version number print("Azure ML SDK Version: {}".format(azureml.core.VERSION)) print("Current directory: {}".format(os.getcwd())) try: # get the preloaded workspace definition self.ws = Workspace.from_config(path='./.azureml/config.json') except: # or create a new one self.ws = Workspace.create(name=self.workspace, subscription_id=self.subscription_id, resource_group=self.resource_group, create_resource_group=True, location=self.compute_region) self.ws.write_config() if self.compute_cluster in self.ws.compute_targets: compute_target = self.ws.compute_targets[self.compute_cluster] if compute_target and type(compute_target) is AmlCompute: print('Found compute target. Just use it: ' + self.compute_cluster) else: print('Creating new AML compute context.') provisioning_config = AmlCompute.provisioning_configuration( vm_size=self.compute_sku, min_nodes=self.compute_min_nodes, max_nodes=self.compute_max_nodes) compute_target = ComputeTarget.create(self.workspace, self.compute_cluster, provisioning_config) compute_target.wait_for_completion(show_output=True)
def create_pipeline(): ws = Workspace.from_config(auth=authenticate()) def_data_store = ws.get_default_datastore() run = Run.get_context() project_folder = "project" read_output = PipelineData("read_output", datastore=def_data_store, output_name="read_output") process_out = PipelineData("process_out", datastore=def_data_store, output_name="process_out") # hist, line, scatter chart_type = PipelineParameter(name="chart_type", default_value="line") # Check if compute exist compute_name = "Dedicated-DS3-v2" vm_size = "STANDARD_D3_V2" if compute_name in ws.compute_targets: compute_target = ws.compute_targets[compute_name] if compute_target and type(compute_target) is AmlCompute: print('Found compute target: ' + compute_name) else: # create the compute target print('Creating a new compute target...') provisioning_config = AmlCompute.provisioning_configuration( vm_size=vm_size, min_nodes=0, max_nodes=4) compute_target = ComputeTarget.create(ws, compute_name, provisioning_config) compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20) # create run config for our python steps def conda_deps(): deps = CondaDependencies(f'{project_folder}/environment.yml') deps.add_channel("conda-forge") deps.add_conda_package('curl') return deps run_config = RunConfiguration(conda_dependencies=conda_deps()) run_config.environment.docker.enabled = True run_config.environment.spark.precache_packages = False # Create each step for our pipeline read_data = PythonScriptStep( name="read_data", script_name="read_data.py", arguments=["read-data", "--output-path", read_output], outputs=[read_output], compute_target=compute_target, source_directory=project_folder, runconfig=run_config) pre_process = PythonScriptStep(name="pre_process", script_name="pre_process.py", arguments=[ "pre-process", "--input-path", read_output, "--output-path", process_out ], inputs=[read_output], outputs=[process_out], compute_target=compute_target, source_directory=project_folder, runconfig=run_config) visualize = PythonScriptStep(name="visualize", script_name="visualize.py", arguments=[ "visualize", "--input-path", process_out, "--chart", chart_type ], inputs=[process_out], compute_target=compute_target, source_directory=project_folder, runconfig=run_config) # list of steps to run steps = [read_data, pre_process, visualize] # Build the pipeline test_pipeline = Pipeline(workspace=ws, steps=[steps]) # Submit the pipeline to be run - In the same experiment pipeline_run = run.experiment.submit(test_pipeline) pipeline_run.wait_for_completion()
def analyse_with_gordo(): ws = Workspace.from_config() # Azure ML # Get an experiment object from Azure Machine Learning experiment_name = "dummy_test" experiment = Experiment(workspace=ws, name=experiment_name) # Azure ML mlflow.set_experiment(experiment_name) # MLFlow resamples_for_model = ["1T", "1H"] aggregation_methods = ["max", "mean"] batch_sizes = [1, 10, 100] epochs = [1, 10] number_of_permutations = len( list( itertools.product( aggregation_methods, resamples_for_model, batch_sizes, epochs ) ) ) resampled_original_data = read_and_resample("2nd_test.hdf", "1S") if PLOTTING: plotnum = 0 f, axarr = plt.subplots(number_of_permutations + 1, sharex=True) axarr[plotnum].plot( resampled_original_data, linewidth=1, label="sensor_data_1S_mean" ) axarr[plotnum].legend(loc="upper left") plotnum += 1 for aggregation_method, interval, batch_size, epoch in itertools.product( aggregation_methods, resamples_for_model, batch_sizes, epochs ): run = experiment.start_logging() with mlflow.start_run(): mlflow.log_param("interval", interval) # MLFlow mlflow.log_param("aggregation_method", aggregation_method) # MLFlow mlflow.log_param("batch_size", batch_size) # MLFlow mlflow.log_param("epochs", epoch) # MLFlow run.log("interval", interval) # Azure ML run.log("aggregation_method", aggregation_method) # Azure ML run.log("batch_size", batch_size) # Azure ML run.log("epochs", epoch) # Azure ML print( f"Build model for data resampled with {interval} resolution, method {aggregation_method}, batch size {batch_size} and number of epochs {epoch}" ) resampled = read_and_resample( "2nd_test.hdf", interval, aggregation_method=aggregation_method ) anomalies, avg_train_anomaly, predicted_data, train_until_index = build_model( resampled, epoch, batch_size ) r2_train, expl_train, r2_test, expl_test = calc_scores( resampled, predicted_data, train_until_index ) run.log("r2_train", r2_train) # Azure ML run.log("explained_variance_train", expl_train) # Azure ML run.log("r2_test", r2_test) # Azure ML run.log("explained_variance_test", expl_test) # Azure ML mlflow.log_metric("r2_train", r2_train) # MLFlow mlflow.log_metric("explained_variance_train", expl_train) # MLFlow mlflow.log_metric("r2_test", r2_test) # MLFlow mlflow.log_metric("explained_variance_test", expl_test) # MLFlow anomalies = anomalies.rolling( resamples_for_model[-1] ).mean() # Use the last of the experiment resamples as the anomaly resample if PLOTTING: axarr[plotnum].plot( anomalies, label=interval + "-" + aggregation_method + "-model" ) axarr[plotnum].axhline(avg_train_anomaly, color="r") axarr[plotnum].legend(loc="upper left") plotnum += 1 run.complete() # Azure ML if PLOTTING: plt.show()
def main(): # Loading azure credentials print("::debug::Loading azure credentials") azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}") try: azure_credentials = json.loads(azure_credentials) except JSONDecodeError: print( "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS" ) raise AMLConfigurationException( "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-workspace/blob/master/README.md" ) # Checking provided parameters print("::debug::Checking provided parameters") validate_json(data=azure_credentials, schema=azure_credentials_schema, input_name="AZURE_CREDENTIALS") # Mask values print("::debug::Masking parameters") mask_parameter(parameter=azure_credentials.get("tenantId", "")) mask_parameter(parameter=azure_credentials.get("clientId", "")) mask_parameter(parameter=azure_credentials.get("clientSecret", "")) mask_parameter(parameter=azure_credentials.get("subscriptionId", "")) # Loading parameters file print("::debug::Loading parameters file") parameters_file = os.environ.get("INPUT_PARAMETERS_FILE", default="run.json") parameters_file_path = os.path.join(".cloud", ".azure", parameters_file) try: with open(parameters_file_path) as f: parameters = json.load(f) except FileNotFoundError: print( f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/run.json)." ) parameters = {} # Checking provided parameters print("::debug::Checking provided parameters") validate_json(data=parameters, schema=parameters_schema, input_name="PARAMETERS_FILE") # Define target cloud if azure_credentials.get( "resourceManagerEndpointUrl", "").startswith("https://management.usgovcloudapi.net"): cloud = "AzureUSGovernment" elif azure_credentials.get( "resourceManagerEndpointUrl", "").startswith("https://management.chinacloudapi.cn"): cloud = "AzureChinaCloud" else: cloud = "AzureCloud" # Loading Workspace print("::debug::Loading AML Workspace") sp_auth = ServicePrincipalAuthentication( tenant_id=azure_credentials.get("tenantId", ""), service_principal_id=azure_credentials.get("clientId", ""), service_principal_password=azure_credentials.get("clientSecret", ""), cloud=cloud) config_file_path = os.environ.get("GITHUB_WORKSPACE", default=".cloud/.azure") config_file_name = "aml_arm_config.json" try: ws = Workspace.from_config(path=config_file_path, _file_name=config_file_name, auth=sp_auth) except AuthenticationException as exception: print( f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}" ) raise AuthenticationException except AuthenticationError as exception: print(f"::error::Microsoft REST Authentication Error: {exception}") raise AuthenticationError except AdalError as exception: print( f"::error::Active Directory Authentication Library Error: {exception}" ) raise AdalError except ProjectSystemException as exception: print(f"::error::Workspace authorizationfailed: {exception}") raise ProjectSystemException # Create experiment print("::debug::Creating experiment") try: # Default experiment name repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1] branch_name = os.environ.get("GITHUB_REF").split("/")[-1] default_experiment_name = f"{repository_name}-{branch_name}" experiment = Experiment( workspace=ws, name=parameters.get("experiment_name", default_experiment_name)[:36]) except TypeError as exception: experiment_name = parameters.get("experiment", None) print( f"::error::Could not create an experiment with the specified name {experiment_name}: {exception}" ) raise AMLExperimentConfigurationException( f"Could not create an experiment with the specified name {experiment_name}: {exception}" ) except UserErrorException as exception: experiment_name = parameters.get("experiment", None) print( f"::error::Could not create an experiment with the specified name {experiment_name}: {exception}" ) raise AMLExperimentConfigurationException( f"Could not create an experiment with the specified name {experiment_name}: {exception}" ) # Loading run config print("::debug::Loading run config") run_config = None if run_config is None: # Loading run config from runconfig yaml file print("::debug::Loading run config from runconfig yaml file") run_config = load_runconfig_yaml(runconfig_yaml_file=parameters.get( "runconfig_yaml_file", "code/train/run_config.yml")) if run_config is None: # Loading run config from pipeline yaml file print("::debug::Loading run config from pipeline yaml file") run_config = load_pipeline_yaml(workspace=ws, pipeline_yaml_file=parameters.get( "pipeline_yaml_file", "code/train/pipeline.yml")) if run_config is None: # Loading run config from python runconfig file print("::debug::Loading run config from python runconfig file") run_config = load_runconfig_python( workspace=ws, runconfig_python_file=parameters.get("runconfig_python_file", "code/train/run_config.py"), runconfig_python_function_name=parameters.get( "runconfig_python_function_name", "main")) if run_config is None: # Loading values for errors pipeline_yaml_file = parameters.get("pipeline_yaml_file", "code/train/pipeline.yml") runconfig_yaml_file = parameters.get("runconfig_yaml_file", "code/train/run_config.yml") runconfig_python_file = parameters.get("runconfig_python_file", "code/train/run_config.py") runconfig_python_function_name = parameters.get( "runconfig_python_function_name", "main") print( f"::error::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file})." ) print( f"::error::Error when loading pipeline yaml definition your repository (Path: /{pipeline_yaml_file})." ) print( f"::error::Error when loading python script or function in your repository which defines the experiment config (Script path: '/{runconfig_python_file}', Function: '{runconfig_python_function_name}()')." ) print( "::error::You have to provide either a yaml definition for your run, a yaml definition of your pipeline or a python script, which returns a runconfig (Pipeline, ScriptRunConfig, AutoMlConfig, Estimator, etc.). Please read the documentation for more details." ) raise AMLExperimentConfigurationException( "You have to provide a yaml definition for your run, a yaml definition of your pipeline or a python script, which returns a runconfig. Please read the documentation for more details." ) # Submit run config print("::debug::Submitting experiment config") try: # Defining default tags print("::debug::Defining default tags") default_tags = { "GITHUB_ACTOR": os.environ.get("GITHUB_ACTOR"), "GITHUB_REPOSITORY": os.environ.get("GITHUB_REPOSITORY"), "GITHUB_SHA": os.environ.get("GITHUB_SHA"), "GITHUB_REF": os.environ.get("GITHUB_REF") } run = experiment.submit(config=run_config, tags=dict(parameters.get("tags", {}), **default_tags)) except AzureMLException as exception: print( f"::error::Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}" ) raise AMLExperimentConfigurationException( f"Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}" ) except TypeError as exception: print( f"::error::Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}" ) raise AMLExperimentConfigurationException( f"Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}" ) # Create outputs print("::debug::Creating outputs") print(f"::set-output name=experiment_name::{run.experiment.name}") print(f"::set-output name=run_id::{run.id}") print(f"::set-output name=run_url::{run.get_portal_url()}") # Waiting for run to complete print("::debug::Waiting for run to complete") if parameters.get("wait_for_completion", True): run.wait_for_completion(show_output=True) # Creating additional outputs of finished run run_metrics = run.get_metrics(recursive=True) print(f"::set-output name=run_metrics::{run_metrics}") run_metrics_markdown = convert_to_markdown(run_metrics) print( f"::set-output name=run_metrics_markdown::{run_metrics_markdown}") # Download artifacts if enabled if parameters.get("download_artifacts", False): # Defining artifacts folder print("::debug::Defining artifacts folder") root_path = os.environ.get("GITHUB_WORKSPACE", default=None) folder_name = f"aml_artifacts_{run.id}" artifact_path = os.path.join(root_path, folder_name) # Downloading artifacts print("::debug::Downloading artifacts") run.download_files( output_directory=os.path.join(artifact_path, "parent")) children = run.get_children(recursive=True) for i, child in enumerate(children): child.download_files( output_directory=os.path.join(artifact_path, f"child_{i}")) # Creating additional outputs print(f"::set-output name=artifact_path::{artifact_path}") # Publishing pipeline print("::debug::Publishing pipeline") if type(run) is PipelineRun and parameters.get("publish_pipeline", False): # Default pipeline name repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1] branch_name = os.environ.get("GITHUB_REF").split("/")[-1] default_pipeline_name = f"{repository_name}-{branch_name}" published_pipeline = run.publish_pipeline( name=parameters.get("pipeline_name", default_pipeline_name), description="Pipeline registered by GitHub Run Action", version=parameters.get("pipeline_version", None), continue_on_step_failure=parameters.get( "pipeline_continue_on_step_failure", False)) # Creating additional outputs print( f"::set-output name=published_pipeline_id::{published_pipeline.id}" ) print( f"::set-output name=published_pipeline_status::{published_pipeline.status}" ) print( f"::set-output name=published_pipeline_endpoint::{published_pipeline.endpoint}" ) elif parameters.get("publish_pipeline", False): print( "::error::Could not register pipeline because you did not pass a pipeline to the action" ) print("::debug::Successfully finished Azure Machine Learning Train Action")
def main(): """ Run the experiment for training """ interactive_auth = InteractiveLoginAuthentication( tenant_id=os.getenv("TENANT_ID")) work_space = Workspace.from_config(auth=interactive_auth) # Set up the dataset for training datastore = work_space.get_default_datastore() dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist")) # Set up the experiment for training experiment = Experiment(workspace=work_space, name="keras-lenet-train") # azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000 config = ScriptRunConfig( source_directory=".", script="train_keras.py", compute_target="cpu-cluster", arguments=[ "--data_folder", dataset.as_named_input("input").as_mount(), "--log_folder", "./logs", ], ) # Set up the Tensoflow/Keras environment environment = Environment("keras-environment") # environment = Environment.from_conda_specification( # name='keras-environment', # file_path='keras-environment.yml' # ) environment.python.conda_dependencies = CondaDependencies.create( python_version="3.7.7", pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"]) config.run_config.environment = environment # Run the experiment for training run = experiment.submit(config) aml_url = run.get_portal_url() print( "Submitted to an Azure Machine Learning compute cluster. Click on the link below" ) print("") print(aml_url) tboard = Tensorboard([run]) # If successful, start() returns a string with the URI of the instance. tboard.start(start_browser=True) run.wait_for_completion(show_output=True) # After your job completes, be sure to stop() the streaming otherwise it will continue to run. print("Press enter to stop") input() tboard.stop() # Register Model metrics = run.get_metrics() run.register_model( model_name="keras_mnist", tags={ "data": "mnist", "model": "classification" }, model_path="outputs/keras_lenet.h5", model_framework=Model.Framework.TENSORFLOW, model_framework_version="2.3.1", properties={ "train_loss": metrics["train_loss"][-1], "train_accuracy": metrics["train_accuracy"][-1], "val_loss": metrics["val_loss"][-1], "val_accuracy": metrics["val_accuracy"][-1], }, )
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ import os, json, datetime, sys from operator import attrgetter from azureml.core import Workspace from azureml.core.model import Model from azureml.core.image import Image from azureml.core.webservice import Webservice from azureml.core.webservice import AciWebservice from azureml.core.authentication import AzureCliAuthentication cli_auth = AzureCliAuthentication() # Get workspace ws = Workspace.from_config(auth=cli_auth) # Get the Image to deploy details try: with open("aml_config/image.json") as f: config = json.load(f) except: print("No new model, thus no deployment on ACI") # raise Exception('No new model to register as production model perform better') sys.exit(0) image_name = config["image_name"] image_version = config["image_version"] images = Image.list(workspace=ws) image, = (m for m in images if m.version == image_version and m.name == image_name) print(
from azureml.core import Workspace from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException from ml_service.util.env_variables import Env if __name__ == "__main__": # Environment variables env = Env() ws = Workspace.from_config() # Load config from .azureml # Choose a name for your CPU cluster cpu_cluster_name = env.aml_compute_name # Verify that the cluster does not exist already try: cpu_cluster = AmlCompute(workspace=ws, name=cpu_cluster_name) print('Found existing cluster, use it.') except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_D2_V2', idle_seconds_before_scaledown=2400, min_nodes=0, max_nodes=4) cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True)
import azureml from azureml.core import Workspace, Run from azureml.core.compute import ComputeTarget, BatchAiCompute from azureml.core.compute_target import ComputeTargetException # check Azure ML SDK version print("Azure ML SDK Version: ", azureml.core.VERSION) # check workspace ws = Workspace.from_config('aml_config/config.json') print(f'Using Azure ML Workspace {ws.name} in location {ws.location}') # check and create the Batch AI Compute cluster try: compute_target = ComputeTarget(workspace=ws, name='tgschallenge') print('The BatchAI cluster already exists.') except ComputeTargetException: compute_config = BatchAiCompute.provisioning_configuration( vm_size='STANDARD_NC6', vm_priority='dedicated', autoscale_enabled=True, cluster_min_nodes=0, cluster_max_nodes=4) compute_target = ComputeTarget.create(ws, 'tgschallenge', compute_config) compute_target.wait_for_completion(show_output=True) print(compute_target.get_status())
parser = argparse.ArgumentParser() parser.add_argument("--glue_task", default="cola", help="Name of GLUE task used for finetuning.") parser.add_argument( "--model_checkpoint", default="distilbert-base-uncased", help="Pretrained transformers model name.", ) args = parser.parse_args() print( f"Finetuning {args.glue_task} with model {args.model_checkpoint} on Azure ML..." ) ws: Workspace = Workspace.from_config() target: ComputeTarget = ws.compute_targets["gpu-K80-2"] env: Environment = transformers_environment(use_gpu=True) exp: Experiment = Experiment(ws, "transformers-glue-finetuning") run: Run = submit_glue_finetuning_to_aml( glue_task=args.glue_task, model_checkpoint=args.model_checkpoint, # try: "bert-base-uncased" environment=env, target=target, experiment=exp, )
plt.subplot(1,2,1) plot_image(i, predictions, test_labels, test_images) plt.subplot(1,2,2) plot_value_array(i, predictions, test_labels) _ = plt.xticks(range(10), class_names, rotation=45) plt.show() if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--config', default='..\\aml_config\\config.json') parser.add_argument('--spconfig', default='..\\aml_config\\spconfig.json') parser.add_argument('--modelName', default='fmnist') parser.add_argument('--modelPath', default='outputs/fmnist.h5') parser.add_argument('--modelFileName', default='fmnist.h5') parser.add_argument('--selected_item', type=int, default=0) parser.add_argument('-v', '--verbose', dest='verbose', action='store_true') parser.add_argument('--version', action='version', version='%(prog)s 1.0') args = parser.parse_args() folders = createFolders() if (args.verbose): print ("config file: {0}".format(args.config)) print ("verbose value: {0}".format(args.verbose)) print ("local script folder: {0}".format(folders.script_folder)) print ("local data folder: {0}".format(folders.data_folder)) print ("local output folder: {0}".format(folders.output_folder)) svc_pr = loadAuthCredentials(args) ws = Workspace.from_config(path=args.config, auth=svc_pr) downloadModel(ws,args,folders) testModel(args,folders)
""" Check the logs of a cloud deployed Azure ML webservice in a workspace """ from azureml.core import Workspace from azureml.core.webservice import Webservice ws = Workspace.from_config(path='./.azureml/config.json') webservices = ws.webservices for name, webservice in webservices.items(): print("Webservice: {}, scoring URI: {}".format(name, webservice.scoring_uri)) if name == "keras-yolov3-firearms": print(webservice.get_logs()) print(list(webservice.get_keys())) print(webservice.scoring_uri)
def main(): # e = Env() # print(e.workspace_name) # svc_pr = ServicePrincipalAuthentication( # tenant_id=os.environ.get("TENANT_ID"), # service_principal_id=os.environ.get("AZURE_SP_ID"), # service_principal_password=os.environ.get("AZURE_SP_PASSWORD")) # # Get Azure machine learning workspace # ws = Workspace.get( # name=os.environ.get("WORKSPACE_NAME"), # subscription_id=os.environ.get("SUBSCRIPTION_ID"), # resource_group=os.environ.get("AZURE_RESOURCE_GROUP") # ,auth=svc_pr # ) #ex = Experiment(ws, 'iris-pipeline') #ex.archive() ws = Workspace.from_config() print("get_workspace:") print(ws) # ws.write_config(path="", file_name="config.json") print("writing config.json.") # Get Azure machine learning cluster aml_compute = get_compute(ws, compute_name='cpu1', vm_size='STANDARD_D1') # Data stores data_dir = "pipelines/modelout" def_data_store = ws.get_default_datastore() output_dir = PipelineData(name="scores", datastore=def_data_store, output_path_on_compute=data_dir) if aml_compute is not None: print("aml_compute:") print(aml_compute) run_config = RunConfiguration(conda_dependencies=CondaDependencies.create( conda_packages=[ 'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras' ], pip_packages=[ 'azure', 'azureml-core', 'azureml-pipeline', 'azure-storage', 'azure-storage-blob', 'azureml-dataprep' ])) run_config.environment.docker.enabled = True ######### TRAIN ################ # model_path = "outputs/model.pkl" # data_dir = "./outputs/pipelines/modelout/" # train_step = PythonScriptStep( # name="Train", # source_directory="./", # script_name="train.py", # compute_target=aml_compute, # arguments=["--model_path", model_path, # "--data_dir",data_dir], # outputs=[output_dir], # runconfig=run_config, # allow_reuse=False, # ) # print("Train Step created") ######### REGISTER ################ # model_path = "trained-model/model.pkl" # register_step = PythonScriptStep( # name="Register", # source_directory="./", # script_name="register.py", # compute_target=aml_compute, # arguments=["--model_path", model_path], # inputs=[output_dir], # runconfig=run_config, # allow_reuse=False, # ) # print("Register Step created") ######### DEPLOY ################ # print("Uploading entry script") # score_path = "./deploy/deploy.py" # datastore = ws.get_default_datastore() # datastore.upload_files(files = [model_path], target_path = 'deploy/', overwrite = True,show_progress = True) # print("done!") deploy_step = PythonScriptStep( name="Deploy", source_directory="./deploy", script_name="deploy.py", compute_target=aml_compute, arguments=[], inputs=[], runconfig=run_config, allow_reuse=False, ) print("Deploy Step created") #evaluate_step.run_after(train_step) # register_step.run_after(deploy_step) steps = [deploy_step] train_pipeline = Pipeline(workspace=ws, steps=steps) train_pipeline._set_experiment_name train_pipeline.validate() published_pipeline = train_pipeline.publish(name="aks-deployment-pipeline", description="") print(f'Published pipeline: {published_pipeline.name}') print(f'for build {published_pipeline.version}') pipeline_parameters = {"model_name": "sklearn_regression_model.pkl"} run = published_pipeline.submit(ws, "compute-instance-pipeline-experiment", pipeline_parameters)
config_json = os.path.join(base_dir, 'config.json') with open(config_json, 'r') as f: config = json.load(f) try: svc_pr = ServicePrincipalAuthentication( tenant_id=config['tenant_id'], service_principal_id=config['service_principal_id'], service_principal_password=config['service_principal_password']) except KeyError as e: print("Getting Service Principal Authentication from Azure Devops") svc_pr = None pass ws = Workspace.from_config(path=config_json, auth=svc_pr) # folder for scripts that need to be uploaded to Aml compute target script_folder = './scripts/' try: os.makedirs(script_folder) except BaseException as e: print("Deleting:", script_folder) shutil.rmtree(script_folder) os.makedirs(script_folder) cpu_compute_name = config['cpu_compute'] try: cpu_compute_target = AmlCompute(ws, cpu_compute_name) print("found existing compute target: %s" % cpu_compute_name) except: # ComputeTargetException:
def setUpClass(cls) -> None: cls.workspace = Workspace.from_config( str(Path(__file__).parent.parent / 'config.json')) cls.base_path = Path(__file__).parent.parent / 'data'
parser.add_argument("--aks_name", type=str, help="aks name", dest="aks_name", required=True) parser.add_argument("--aks_region", type=str, help="aks region", dest="aks_region", required=True) parser.add_argument("--description", type=str, help="description", dest="description", required=True) args = parser.parse_args() print("Argument 1: %s" % args.service_name) print("Argument 2: %s" % args.aks_name) print("Argument 3: %s" % args.aks_region) print("Argument 4: %s" % args.description) print('creating AzureCliAuthentication...') cli_auth = AzureCliAuthentication() print('done creating AzureCliAuthentication!') print('get workspace...') ws = Workspace.from_config(auth=cli_auth) print('done getting workspace!') image = Image(ws, id = image_id) print(image) aks_name = args.aks_name aks_region = args.aks_region aks_service_name = args.service_name try: service = Webservice(name=aks_service_name, workspace=ws) print("Deleting AKS service {}".format(aks_service_name)) service.delete() except: print("No existing webservice found: ", aks_service_name)
print("Argument 1: %s" % args.aml_compute_target) print("Argument 2: %s" % args.path) print('1. Parse arguments...END') print('') print('') print('2. Authenticating...START') print('.............................................') cliAuth = AzureCliAuthentication() print('2. Authenticating...END') print('') print('') print('3. Get workspace reference...START') print('.............................................') amlWs = Workspace.from_config(path=args.path, auth=cliAuth) print('3. Get workspace reference...END') print('') print('') print('4. Get compute reference or create new...START') print('.............................................') try: amlCompute = AmlCompute(amlWs, args.aml_compute_target) print("....found existing compute target.") except ComputeTargetException: print("....creating new compute target") amlComputeProvisioningConfig = AmlCompute.provisioning_configuration( vm_size="STANDARD_D12_V2", min_nodes=0, max_nodes=1) amlCompute = ComputeTarget.create(amlWs, args.aml_compute_target,
from azureml.core.compute import ComputeTarget, AmlCompute from azureml.core.compute_target import ComputeTargetException from azureml.core import Workspace from azureml.core.authentication import AzureCliAuthentication # load Azure ML workspace workspace = Workspace.from_config(auth=AzureCliAuthentication()) # Create compute target if not present # Choose a name for your CPU cluster cpu_cluster_name = "alwaysoncluster" # Verify that cluster does not exist already try: cu_cluster = ComputeTarget(workspace=workspace, name=cpu_cluster_name) print('Found existing cluster, use it.') except ComputeTargetException: compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_D3_V2', min_nodes=2, max_nodes=2) cpu_cluster = ComputeTarget.create(workspace, cpu_cluster_name, compute_config) cpu_cluster.wait_for_completion(show_output=True)
def main(req: func.HttpRequest) -> (func.HttpResponse): logging.info('Python HTTP trigger function processed a request.') # For now this can be a POST where we have <base url>/api/HttpTrigger?start=<any string> image_url = req.params.get('start') logging.info(type(image_url)) # Write a config.json (fill in template values with system vars) config_temp = { 'subscription_id': os.getenv('AZURE_SUB', ''), 'resource_group': os.getenv('RESOURCE_GROUP', ''), 'workspace_name': os.getenv('WORKSPACE_NAME', '') } with open(os.path.join(os.getcwd(), 'HttpTrigger', 'config.json'), 'w') as f: json.dump(config_temp, f) # Get the workspace from config.json try: ws = Workspace.from_config( os.path.join(os.getcwd(), 'HttpTrigger', 'config.json')) # Authentication didn't work except ProjectSystemException as err: return json.dumps('ProjectSystemException') # Need to create the workspace except Exception as err: ws = Workspace.create( name=os.getenv('WORKSPACE_NAME', ''), subscription_id=os.getenv('AZURE_SUB', ''), resource_group=os.getenv('RESOURCE_GROUP', ''), create_resource_group=True, location='eastus2' # Or other supported Azure region ) # choose a name for your cluster cluster_name = "gpuclusterplease" try: compute_target = ComputeTarget(workspace=ws, name=cluster_name) print('Found existing compute target.') except ComputeTargetException: print('Creating a new compute target...') # AML Compute config - if max_nodes are set, it becomes persistent storage that scales compute_config = AmlCompute.provisioning_configuration( vm_size='STANDARD_NC6', max_nodes=4) # create the cluster compute_target = ComputeTarget.create(ws, cluster_name, compute_config) compute_target.wait_for_completion(show_output=True) # use get_status() to get a detailed status for the current cluster. print(compute_target.get_status().serialize()) # Create a project directory and copy training script to ii project_folder = os.path.join(os.getcwd(), 'HttpTrigger', 'project') os.makedirs(project_folder, exist_ok=True) shutil.copy(os.path.join(os.getcwd(), 'HttpTrigger', 'pytorch_train.py'), project_folder) # Create an experiment experiment_name = 'fish-no-fish' experiment = Experiment(ws, name=experiment_name) # Use an AML Data Store for training data ds = Datastore.register_azure_blob_container( workspace=ws, datastore_name='funcdefaultdatastore', container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Use an AML Data Store to save models back up to ds_models = Datastore.register_azure_blob_container( workspace=ws, datastore_name='modelsdatastorage', container_name=os.getenv('STORAGE_CONTAINER_NAME_MODELS', ''), account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''), account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''), create_if_not_exists=True) # Set up for training ("trans" flag means - use transfer learning and # this should download a model on compute) script_params = { '--data_dir': ds.as_mount(), '--num_epochs': 30, '--learning_rate': 0.01, '--output_dir': './outputs', '--trans': 'True' } # Instantiate PyTorch estimator with upload of final model to # a specified blob storage container (this can be anything) estimator = PyTorch( source_directory=project_folder, script_params=script_params, compute_target=compute_target, entry_script='pytorch_train.py', use_gpu=True, inputs=[ ds_models.as_upload( path_on_compute='./outputs/model_finetuned.pth') ]) run = experiment.submit(estimator) run.wait_for_completion(show_output=True) return json.dumps('Job complete')
from azureml.exceptions import ComputeTargetException from azureml.core.authentication import AzureCliAuthentication # Load the JSON settings file print("Loading settings") with open(os.path.join("aml_service", "settings.json")) as f: settings = json.load(f) aml_settings = settings["compute_target"]["training"]["amlcompute"] # Get workspace print("Loading Workspace") cli_auth = AzureCliAuthentication() config_file_path = os.environ.get("GITHUB_WORKSPACE", default="aml_service") config_file_name = "aml_arm_config.json" ws = Workspace.from_config( path=config_file_path, auth=cli_auth, _file_name=config_file_name) print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n') try: # Loading AMLCompute print("Loading existing AML Compute") cluster = AmlCompute(workspace=ws, name=aml_settings["name"]) # Check settings and redeploy if required settings have changed print("Found existing cluster") if cluster.vm_size.lower() != aml_settings["vm_size"].lower() or cluster.vm_priority.lower() != aml_settings["vm_priority"].lower(): cluster.delete() cluster.wait_for_completion(show_output=True) raise ComputeTargetException("Cluster is of incorrect size or has incorrect priority. Deleting cluster and provisioning a new one.")
from azureml.core import Workspace import requests import pandas as pd features = ["age", "fare", "sex", "embarked", "home.dest", "pclass", "sibsp", "parch", "cabin"] pd.read_csv("data/titanic.csv")[features].iloc[6:7, :].to_json("data/records.json", orient="index") ws = Workspace.from_config("code/config_ws.json") service = ws.webservices['titanic-webservice-new'] with open("data/records.json") as file: test_samples = file.read() print(service.run(input_data=str(test_samples))) print(requests.post(service.scoring_uri, test_samples, headers={'Content-Type': 'application/json'}).text) service.delete()
from azureml.core import Workspace from azureml.core.webservice import Webservice # Requires the config to be downloaded first to the current working directory ws = Workspace.from_config() # Set with the deployment name # chosen deployment name in a previous stage, ie: depproj02-ht name = "dep-p2-htamayo" # load existing web service service = Webservice(name=name, workspace=ws) #enabling app insights service.update(enable_app_insights=True) logs = service.get_logs() for line in logs.split('\n'): print(line)
# Load the JSON settings file and relevant section print("Loading settings") with open(os.path.join("aml_service", "settings.json")) as f: settings = json.load(f) experiment_settings = settings["experiment"] compute_target_to_use = settings["compute_target"][ "compute_target_to_use_for_training"].strip().lower() compute_target_name = settings["compute_target"]["training"][ compute_target_to_use]["name"] workspace_config_settings = settings["workspace"]["config"] # Get workspace print("Loading Workspace") cli_auth = AzureCliAuthentication() ws = Workspace.from_config(path=workspace_config_settings["path"], auth=cli_auth, _file_name=workspace_config_settings["file_name"]) print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n') # Attach Experiment print("Loading Experiment") exp = Experiment(workspace=ws, name=experiment_settings["name"]) print(exp.name, exp.workspace.name, sep="\n") # Load compute target print("Loading Compute Target") compute_target = ComputeTarget(workspace=ws, name=compute_target_name) # Create image registry configuration if experiment_settings["docker"]["custom_image"]: container_registry = ContainerRegistry()