Exemplo n.º 1
0
print("all args: ", args)

with open('config.json', 'r') as f:
    config = json.load(f)

try:
    svc_pr = ServicePrincipalAuthentication(
        tenant_id=config['tenant_id'],
        service_principal_id=config['service_principal_id'],
        service_principal_password=config['service_principal_password'])
except KeyError as e:
    print("Getting Service Principal Authentication from Azure Devops")
    svr_pr = None
    pass

ws = Workspace.from_config(auth=svc_pr)

input_dir = os.path.dirname(args.input_dir)

with open(os.path.join(input_dir, 'data_metrics')) as f:
    metrics = json.load(f)

best_loss = 1.0
best_run_id = None

print(metrics)
for run in metrics.keys():
    try:
        loss = metrics[run]['val_loss'][-1]
        if loss < best_loss:
            best_loss = loss
Exemplo n.º 2
0
def main():
    # Loading azure credentials
    print("::debug::Loading azure credentials")
    azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}")
    try:
        azure_credentials = json.loads(azure_credentials)
    except JSONDecodeError:
        print(
            "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS. The JSON should include the following keys: 'tenantId', 'clientId', 'clientSecret' and 'subscriptionId'."
        )
        raise AMLConfigurationException(
            "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-workspace/blob/master/README.md"
        )

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=azure_credentials,
                  schema=azure_credentials_schema,
                  input_name="AZURE_CREDENTIALS")

    # Mask values
    print("::debug::Masking parameters")
    mask_parameter(parameter=azure_credentials.get("tenantId", ""))
    mask_parameter(parameter=azure_credentials.get("clientId", ""))
    mask_parameter(parameter=azure_credentials.get("clientSecret", ""))
    mask_parameter(parameter=azure_credentials.get("subscriptionId", ""))

    # Loading parameters file
    print("::debug::Loading parameters file")
    parameters_file = os.environ.get("INPUT_PARAMETERS_FILE",
                                     default="compute.json")
    parameters_file_path = os.path.join(".cloud", ".azure", parameters_file)
    try:
        with open(parameters_file_path) as f:
            parameters = json.load(f)
    except FileNotFoundError:
        print(
            f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/compute.json)."
        )
        parameters = {}

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=parameters,
                  schema=parameters_schema,
                  input_name="PARAMETERS_FILE")

    # Define target cloud
    if azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.usgovcloudapi.net"):
        cloud = "AzureUSGovernment"
    elif azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.chinacloudapi.cn"):
        cloud = "AzureChinaCloud"
    else:
        cloud = "AzureCloud"

    # Loading Workspace
    print("::debug::Loading AML Workspace")
    sp_auth = ServicePrincipalAuthentication(
        tenant_id=azure_credentials.get("tenantId", ""),
        service_principal_id=azure_credentials.get("clientId", ""),
        service_principal_password=azure_credentials.get("clientSecret", ""),
        cloud=cloud)
    config_file_path = os.environ.get("GITHUB_WORKSPACE",
                                      default=".cloud/.azure")
    config_file_name = "aml_arm_config.json"
    try:
        ws = Workspace.from_config(path=config_file_path,
                                   _file_name=config_file_name,
                                   auth=sp_auth)
    except AuthenticationException as exception:
        print(
            f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}"
        )
        raise AuthenticationException
    except AuthenticationError as exception:
        print(f"::error::Microsoft REST Authentication Error: {exception}")
        raise AuthenticationError
    except AdalError as exception:
        print(
            f"::error::Active Directory Authentication Library Error: {exception}"
        )
        raise AdalError
    except ProjectSystemException as exception:
        print(f"::error::Workspace authorizationfailed: {exception}")
        raise ProjectSystemException

    # Loading compute target
    try:
        # Default compute target name
        repository_name = os.environ.get("GITHUB_REPOSITORY").split(
            "/")[-1][:16]  # names can be max 16 characters

        print("::debug::Loading existing compute target")
        compute_target = ComputeTarget(workspace=ws,
                                       name=parameters.get(
                                           "name", repository_name))
        print(
            f"::debug::Found compute target with same name. Not updating the compute target: {compute_target.serialize()}"
        )
    except ComputeTargetException:
        print(
            "::debug::Could not find existing compute target with provided name"
        )

        # Checking provided parameters
        print("::debug::Checking provided parameters")
        required_parameters_provided(
            parameters=parameters,
            keys=["compute_type"],
            message=
            "Required parameter(s) not found in your parameters file for creating a compute target. Please provide a value for the following key(s): "
        )

        print("::debug::Creating new compute target")
        compute_type = parameters.get("compute_type", "")
        print(f"::debug::Compute type listed is{compute_type}")
        if compute_type == "amlcluster":
            compute_target = create_aml_cluster(workspace=ws,
                                                parameters=parameters)
            print(
                f"::debug::Successfully created AML cluster: {compute_target.serialize()}"
            )
        elif compute_type == "akscluster":
            compute_target = create_aks_cluster(workspace=ws,
                                                parameters=parameters)
            print(
                f"::debug::Successfully created AKS cluster: {compute_target.serialize()}"
            )
        else:
            print(f"::error::Compute type '{compute_type}' is not supported")
            raise AMLConfigurationException(
                f"Compute type '{compute_type}' is not supported.")
    print(
        "::debug::Successfully finished Azure Machine Learning Compute Action")
def main():
    # Loading input values
    print("::debug::Loading input values")
    model_name = os.environ.get("INPUT_MODEL_NAME", default=None)
    model_version = os.environ.get("INPUT_MODEL_VERSION", default=None)

    # Casting input values
    print("::debug::Casting input values")
    try:
        model_version = int(model_version)
    except TypeError as exception:
        print(f"::debug::Could not cast model version to int: {exception}")
        model_version = None
    except ValueError as exception:
        print(f"::debug::Could not cast model version to int: {exception}")
        model_version = None

    # Loading azure credentials
    print("::debug::Loading azure credentials")
    azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}")
    try:
        azure_credentials = json.loads(azure_credentials)
    except JSONDecodeError:
        print(
            "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS"
        )
        raise AMLConfigurationException(
            "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-compute/blob/master/README.md"
        )

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=azure_credentials,
                  schema=azure_credentials_schema,
                  input_name="AZURE_CREDENTIALS")

    # Mask values
    print("::debug::Masking parameters")
    mask_parameter(parameter=azure_credentials.get("tenantId", ""))
    mask_parameter(parameter=azure_credentials.get("clientId", ""))
    mask_parameter(parameter=azure_credentials.get("clientSecret", ""))
    mask_parameter(parameter=azure_credentials.get("subscriptionId", ""))

    # Loading parameters file
    print("::debug::Loading parameters file")
    parameters_file = os.environ.get("INPUT_PARAMETERS_FILE",
                                     default="deploy.json")
    parameters_file_path = os.path.join(".cloud", ".azure", parameters_file)
    try:
        with open(parameters_file_path) as f:
            parameters = json.load(f)
    except FileNotFoundError:
        print(
            f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository  if you do not want to use default settings (e.g. .cloud/.azure/deploy.json)."
        )
        parameters = {}

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=parameters,
                  schema=parameters_schema,
                  input_name="PARAMETERS_FILE")

    # Define target cloud
    if azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.usgovcloudapi.net"):
        cloud = "AzureUSGovernment"
    elif azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.chinacloudapi.cn"):
        cloud = "AzureChinaCloud"
    else:
        cloud = "AzureCloud"

    # Loading Workspace
    print("::debug::Loading AML Workspace")
    sp_auth = ServicePrincipalAuthentication(
        tenant_id=azure_credentials.get("tenantId", ""),
        service_principal_id=azure_credentials.get("clientId", ""),
        service_principal_password=azure_credentials.get("clientSecret", ""),
        cloud=cloud)
    config_file_path = os.environ.get("GITHUB_WORKSPACE",
                                      default=".cloud/.azure")
    config_file_name = "aml_arm_config.json"
    try:
        ws = Workspace.from_config(path=config_file_path,
                                   _file_name=config_file_name,
                                   auth=sp_auth)
    except AuthenticationException as exception:
        print(
            f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}"
        )
        raise AuthenticationException
    except AuthenticationError as exception:
        print(f"::error::Microsoft REST Authentication Error: {exception}")
        raise AuthenticationError
    except AdalError as exception:
        print(
            f"::error::Active Directory Authentication Library Error: {exception}"
        )
        raise AdalError
    except ProjectSystemException as exception:
        print(f"::error::Workspace authorizationfailed: {exception}")
        raise ProjectSystemException

    # Loading deployment target
    print("::debug::Loading deployment target")
    try:
        deployment_target = ComputeTarget(workspace=ws,
                                          name=parameters.get(
                                              "deployment_compute_target", ""))
    except ComputeTargetException:
        deployment_target = None
    except TypeError:
        deployment_target = None

    # Loading model
    print("::debug::Loading model")
    try:
        model = Model(workspace=ws, name=model_name, version=model_version)
    except WebserviceException as exception:
        print(
            f"::error::Could not load model with provided details: {exception}"
        )
        raise AMLConfigurationException(
            f"Could not load model with provided details: {exception}")

    # Creating inference config
    print("::debug::Creating inference config")
    if os.environ.get("CONTAINER_REGISTRY_ADRESS", None) is not None:
        container_registry = ContainerRegistry()
        container_registry.address = os.environ.get(
            "CONTAINER_REGISTRY_ADRESS", None)
        container_registry.username = os.environ.get(
            "CONTAINER_REGISTRY_USERNAME", None)
        container_registry.password = os.environ.get(
            "CONTAINER_REGISTRY_PASSWORD", None)
    else:
        container_registry = None

    try:
        inference_config = InferenceConfig(
            entry_script=parameters.get("inference_entry_script", "score.py"),
            runtime=parameters.get("runtime", "python"),
            conda_file=parameters.get("conda_file", "environment.yml"),
            extra_docker_file_steps=parameters.get("extra_docker_file_steps",
                                                   None),
            source_directory=parameters.get("inference_source_directory",
                                            "code/deploy/"),
            enable_gpu=parameters.get("enable_gpu", None),
            description=parameters.get("description", None),
            base_image=parameters.get("base_image", None),
            base_image_registry=container_registry,
            cuda_version=parameters.get("cuda_version", None))
    except WebserviceException as exception:
        print(
            f"::debug::Failed to create InferenceConfig. Trying to create no code deployment: {exception}"
        )
        inference_config = None
    except TypeError as exception:
        print(
            f"::debug::Failed to create InferenceConfig. Trying to create no code deployment: {exception}"
        )
        inference_config = None

    # Loading run config
    print("::debug::Loading run config")
    model_resource_config = model.resource_configuration
    cpu_cores = get_resource_config(config=parameters.get("cpu_cores", None),
                                    resource_config=model_resource_config,
                                    config_name="cpu")
    memory_gb = get_resource_config(config=parameters.get("memory_gb", None),
                                    resource_config=model_resource_config,
                                    config_name="memory_in_gb")
    gpu_cores = get_resource_config(config=parameters.get("gpu_cores", None),
                                    resource_config=model_resource_config,
                                    config_name="gpu")

    # Creating deployment config
    print("::debug::Creating deployment config")
    if type(deployment_target) is AksCompute:
        deployment_config = AksWebservice.deploy_configuration(
            autoscale_enabled=parameters.get("autoscale_enabled", None),
            autoscale_min_replicas=parameters.get("autoscale_min_replicas",
                                                  None),
            autoscale_max_replicas=parameters.get("autoscale_max_replicas",
                                                  None),
            autoscale_refresh_seconds=parameters.get(
                "autoscale_refresh_seconds", None),
            autoscale_target_utilization=parameters.get(
                "autoscale_target_utilization", None),
            collect_model_data=parameters.get("model_data_collection_enabled",
                                              None),
            auth_enabled=parameters.get("authentication_enabled", None),
            cpu_cores=cpu_cores,
            memory_gb=memory_gb,
            enable_app_insights=parameters.get("app_insights_enabled", None),
            scoring_timeout_ms=parameters.get("scoring_timeout_ms", None),
            replica_max_concurrent_requests=parameters.get(
                "replica_max_concurrent_requests", None),
            max_request_wait_time=parameters.get("max_request_wait_time",
                                                 None),
            num_replicas=parameters.get("num_replicas", None),
            primary_key=os.environ.get("PRIMARY_KEY", None),
            secondary_key=os.environ.get("SECONDARY_KEY", None),
            tags=parameters.get("tags", None),
            properties=parameters.get("properties", None),
            description=parameters.get("description", None),
            gpu_cores=gpu_cores,
            period_seconds=parameters.get("period_seconds", None),
            initial_delay_seconds=parameters.get("initial_delay_seconds",
                                                 None),
            timeout_seconds=parameters.get("timeout_seconds", None),
            success_threshold=parameters.get("success_threshold", None),
            failure_threshold=parameters.get("failure_threshold", None),
            namespace=parameters.get("namespace", None),
            token_auth_enabled=parameters.get("token_auth_enabled", None))
    else:
        deployment_config = AciWebservice.deploy_configuration(
            cpu_cores=cpu_cores,
            memory_gb=memory_gb,
            tags=parameters.get("tags", None),
            properties=parameters.get("properties", None),
            description=parameters.get("description", None),
            location=parameters.get("location", None),
            auth_enabled=parameters.get("authentication_enabled", None),
            ssl_enabled=parameters.get("ssl_enabled", None),
            enable_app_insights=parameters.get("app_insights_enabled", None),
            ssl_cert_pem_file=parameters.get("ssl_cert_pem_file", None),
            ssl_key_pem_file=parameters.get("ssl_key_pem_file", None),
            ssl_cname=parameters.get("ssl_cname", None),
            dns_name_label=parameters.get("dns_name_label", None),
            primary_key=os.environ.get("PRIMARY_KEY", None),
            secondary_key=os.environ.get("SECONDARY_KEY", None),
            collect_model_data=parameters.get("model_data_collection_enabled",
                                              None),
            cmk_vault_base_url=os.environ.get("CMK_VAULT_BASE_URL", None),
            cmk_key_name=os.environ.get("CMK_KEY_NAME", None),
            cmk_key_version=os.environ.get("CMK_KEY_VERSION", None))

    # Deploying model
    print("::debug::Deploying model")
    try:
        # Default service name
        repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1]
        branch_name = os.environ.get("GITHUB_REF").split("/")[-1]
        default_service_name = f"{repository_name}-{branch_name}".lower(
        ).replace("_", "-")[:32]

        service = Model.deploy(workspace=ws,
                               name=parameters.get("name",
                                                   default_service_name),
                               models=[model],
                               inference_config=inference_config,
                               deployment_config=deployment_config,
                               deployment_target=deployment_target,
                               overwrite=True)
        service.wait_for_deployment(show_output=True)
    except WebserviceException as exception:
        print(f"::error::Model deployment failed with exception: {exception}")
        service_logs = service.get_logs()
        raise AMLDeploymentException(
            f"Model deployment failedlogs: {service_logs} \nexception: {exception}"
        )

    # Checking status of service
    print("::debug::Checking status of service")
    if service.state != "Healthy":
        service_logs = service.get_logs()
        print(
            f"::error::Model deployment failed with state '{service.state}': {service_logs}"
        )
        raise AMLDeploymentException(
            f"Model deployment failed with state '{service.state}': {service_logs}"
        )

    if parameters.get("test_enabled", False):
        # Testing service
        print("::debug::Testing service")
        root = os.environ.get("GITHUB_WORKSPACE", default=None)
        test_file_path = parameters.get("test_file_path", "code/test/test.py")
        test_file_function_name = parameters.get("test_file_function_name",
                                                 "main")

        print("::debug::Adding root to system path")
        sys.path.insert(1, f"{root}")

        print("::debug::Importing module")
        test_file_path = f"{test_file_path}.py" if not test_file_path.endswith(
            ".py") else test_file_path
        try:
            test_spec = importlib.util.spec_from_file_location(
                name="testmodule", location=test_file_path)
            test_module = importlib.util.module_from_spec(spec=test_spec)
            test_spec.loader.exec_module(test_module)
            test_function = getattr(test_module, test_file_function_name, None)
        except ModuleNotFoundError as exception:
            print(
                f"::error::Could not load python script in your repository which defines theweb service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
            raise AMLConfigurationException(
                f"Could not load python script in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
        except FileNotFoundError as exception:
            print(
                f"::error::Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
            raise AMLConfigurationException(
                f"Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
        except AttributeError as exception:
            print(
                f"::error::Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
            raise AMLConfigurationException(
                f"Could not load python script or function in your repository which defines the web service tests (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )

        # Load experiment config
        print("::debug::Loading experiment config")
        try:
            test_function(service)
        except TypeError as exception:
            print(
                f"::error::Could not load experiment config from your module (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
            raise AMLConfigurationException(
                f"Could not load experiment config from your module (Script: /{test_file_path}, Function: {test_file_function_name}()): {exception}"
            )
        except Exception as exception:
            print(
                f"::error::The webservice tests did not complete successfully: {exception}"
            )
            raise AMLDeploymentException(
                f"The webservice tests did not complete successfully: {exception}"
            )

    # Deleting service if desired
    if parameters.get("delete_service_after_deployment", False):
        service.delete()
    else:
        # Create outputs
        print("::debug::Creating outputs")
        print(f"::set-output name=service_scoring_uri::{service.scoring_uri}")
        print(f"::set-output name=service_swagger_uri::{service.swagger_uri}")
    print(
        "::debug::Successfully finished Azure Machine Learning Deploy Action")
                    dest="path",
                    required=True)
args = parser.parse_args()

print("Argument 1: %s" % args.aml_compute_target)
print("Argument 2: %s" % args.model_name)
print("Argument 3: %s" % args.build_number)
print("Argument 4: %s" % args.image_name)
print("Argument 5: %s" % args.path)

print('creating AzureCliAuthentication...')
cli_auth = AzureCliAuthentication()
print('done creating AzureCliAuthentication!')

print('get workspace...')
ws = Workspace.from_config(path=args.path, auth=cli_auth)
print('done getting workspace!')

print("looking for existing compute target.")
aml_compute = AmlCompute(ws, args.aml_compute_target)
print("found existing compute target.")

# Create a new runconfig object
run_amlcompute = RunConfiguration()

# Use the cpu_cluster you created above.
run_amlcompute.target = args.aml_compute_target

# Enable Docker
run_amlcompute.environment.docker.enabled = True
from azureml.core.model import InferenceConfig
from azureml.core.environment import Environment
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.webservice import AciWebservice

ws = Workspace.from_config(path='./.azureml', _file_name='config.json')
model = Model(ws, name='digits_model', version=2)

env = Environment.from_conda_specification(
    name='sklearn-aml-env', file_path='./.azureml/sklearn-env-aml.yml')

inference_config = InferenceConfig(entry_script="./src/score2.py",
                                   environment=env)

deployment_config = AciWebservice.deploy_configuration(cpu_cores=1,
                                                       memory_gb=1)

aci_service = Model.deploy(workspace=ws,
                           name='digits-model-service2',
                           models=[model],
                           inference_config=inference_config,
                           deployment_config=deployment_config)

aci_service.wait_for_deployment(show_output=True)
print(aci_service.state)
Exemplo n.º 6
0
# tutorial/02-create-compute.py
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

ws = Workspace.from_config(
)  # This automatically looks for a directory .azureml

# Choose a name for your CPU cluster
cpu_cluster_name = "cpu-cluster"

# Verify that the cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_D2_V2',
        max_nodes=6,
        idle_seconds_before_scaledown=2400)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)
Exemplo n.º 7
0
}

akscomputes = {
    "aks-cpu-deploy": {
        "vm_size": "STANDARD_DS3_V2",
        "agent_count": 3,
    },
    "aks-gpu-deploy": {
        "vm_size": "STANDARD_NC6S_V3",
        "agent_count": 3,
    },
}

# create or get Workspace
try:
    ws = Workspace.from_config(args.config)
except:
    ws = Workspace.create(
        args.workspace_name,
        subscription_id=args.subscription_id,
        resource_group=args.resource_group,
        location=args.location,
        create_resource_group=True,
        exist_ok=True,
        show_output=True,
    )
    ws.write_config()

# create aml compute targets
for ct_name in amlcomputes:
    if ct_name not in ws.compute_targets:
Exemplo n.º 8
0
print("Argument 1: %s" % args.service_name)
print("Argument 2: %s" % args.aks_name)
print("Argument 3: %s" % args.aks_region)
print("Argument 4: %s" % args.description)
print('..4.completed')
print('')
print('')

print('5. Authenticating with AzureCliAuthentication...')
clientAuthn = AzureCliAuthentication()
print('..5.completed')
print('')
print('')

print('6. Instantiate AML workspace')
amlWs = Workspace.from_config(auth=clientAuthn)
print('..6.completed')
print('')
print('')

print('7. Instantiate image')
containerImage = Image(amlWs, id=image_id)
print(containerImage)
print('..7.completed')
print('')
print('')

print('8. Check for and delete any existing web service instance')

aksName = args.aks_name
aksRegion = args.aks_region
Exemplo n.º 9
0
 def __init__(self, ctx):
     self.ctx = ctx
     self.name = ctx.config['config'].get('name', None)
     self.source = ctx.config['config'].get('source', None)
     self.target = ctx.config['config'].get('target', None)
     self.exclude = ctx.config['config'].get('exclude', None)
     self.budget = ctx.config['config'].get('budget', None)
     # per provider experiment settings
     self.metric = ctx.config['azure'].get('experiment/metric',
                                           'spearman_correlation')
     self.cross_validation_folds = ctx.config['azure'].get(
         'experiment/cross_validation_folds', 5)
     self.max_total_time = ctx.config['azure'].get(
         'experiment/max_total_time', 60)
     self.iteration_timeout_minutes = ctx.config['azure'].get(
         'experiment/iteration_timeout_minutes', 10)
     self.max_n_trials = ctx.config['azure'].get('experiment/max_n_trials',
                                                 10)
     self.use_ensemble = ctx.config['azure'].get('experiment/use_ensemble',
                                                 False)
     # per provider compute settings
     self.subscription_id = ctx.config['azure'].get(
         'subscription_id', os.environ.get("AZURE_SUBSCRIPTION_ID"))
     self.workspace = ctx.config['azure'].get('workspace',
                                              self.name + '_ws')
     self.resource_group = ctx.config['azure'].get('resource_group',
                                                   self.name + '_resources')
     # cluster specific options
     self.compute_cluster = ctx.config['azure'].get('cluster/name',
                                                    'cpucluster')
     self.compute_region = ctx.config['azure'].get('cluster/region',
                                                   'eastus2')
     self.compute_min_nodes = ctx.config['azure'].get(
         'cluster/min_nodes', 0)
     self.compute_max_nodes = ctx.config['azure'].get(
         'cluster/max_nodes', 4)
     self.compute_sku = ctx.config['azure'].get('cluster/type',
                                                'STANDARD_D2_V2')
     # check core SDK version number
     print("Azure ML SDK Version: {}".format(azureml.core.VERSION))
     print("Current directory: {}".format(os.getcwd()))
     try:  # get the preloaded workspace definition
         self.ws = Workspace.from_config(path='./.azureml/config.json')
     except:  # or create a new one
         self.ws = Workspace.create(name=self.workspace,
                                    subscription_id=self.subscription_id,
                                    resource_group=self.resource_group,
                                    create_resource_group=True,
                                    location=self.compute_region)
     self.ws.write_config()
     if self.compute_cluster in self.ws.compute_targets:
         compute_target = self.ws.compute_targets[self.compute_cluster]
         if compute_target and type(compute_target) is AmlCompute:
             print('Found compute target. Just use it: ' +
                   self.compute_cluster)
     else:
         print('Creating new AML compute context.')
         provisioning_config = AmlCompute.provisioning_configuration(
             vm_size=self.compute_sku,
             min_nodes=self.compute_min_nodes,
             max_nodes=self.compute_max_nodes)
         compute_target = ComputeTarget.create(self.workspace,
                                               self.compute_cluster,
                                               provisioning_config)
         compute_target.wait_for_completion(show_output=True)
Exemplo n.º 10
0
def create_pipeline():
    ws = Workspace.from_config(auth=authenticate())
    def_data_store = ws.get_default_datastore()
    run = Run.get_context()

    project_folder = "project"

    read_output = PipelineData("read_output",
                               datastore=def_data_store,
                               output_name="read_output")
    process_out = PipelineData("process_out",
                               datastore=def_data_store,
                               output_name="process_out")

    # hist, line, scatter
    chart_type = PipelineParameter(name="chart_type", default_value="line")

    # Check if compute exist
    compute_name = "Dedicated-DS3-v2"
    vm_size = "STANDARD_D3_V2"
    if compute_name in ws.compute_targets:
        compute_target = ws.compute_targets[compute_name]
        if compute_target and type(compute_target) is AmlCompute:
            print('Found compute target: ' + compute_name)
    else:
        # create the compute target
        print('Creating a new compute target...')
        provisioning_config = AmlCompute.provisioning_configuration(
            vm_size=vm_size, min_nodes=0, max_nodes=4)
        compute_target = ComputeTarget.create(ws, compute_name,
                                              provisioning_config)
        compute_target.wait_for_completion(show_output=True,
                                           min_node_count=None,
                                           timeout_in_minutes=20)

    # create run config for our python steps
    def conda_deps():
        deps = CondaDependencies(f'{project_folder}/environment.yml')
        deps.add_channel("conda-forge")
        deps.add_conda_package('curl')
        return deps

    run_config = RunConfiguration(conda_dependencies=conda_deps())
    run_config.environment.docker.enabled = True
    run_config.environment.spark.precache_packages = False

    # Create each step for our pipeline
    read_data = PythonScriptStep(
        name="read_data",
        script_name="read_data.py",
        arguments=["read-data", "--output-path", read_output],
        outputs=[read_output],
        compute_target=compute_target,
        source_directory=project_folder,
        runconfig=run_config)

    pre_process = PythonScriptStep(name="pre_process",
                                   script_name="pre_process.py",
                                   arguments=[
                                       "pre-process", "--input-path",
                                       read_output, "--output-path",
                                       process_out
                                   ],
                                   inputs=[read_output],
                                   outputs=[process_out],
                                   compute_target=compute_target,
                                   source_directory=project_folder,
                                   runconfig=run_config)

    visualize = PythonScriptStep(name="visualize",
                                 script_name="visualize.py",
                                 arguments=[
                                     "visualize", "--input-path", process_out,
                                     "--chart", chart_type
                                 ],
                                 inputs=[process_out],
                                 compute_target=compute_target,
                                 source_directory=project_folder,
                                 runconfig=run_config)

    # list of steps to run
    steps = [read_data, pre_process, visualize]

    # Build the pipeline
    test_pipeline = Pipeline(workspace=ws, steps=[steps])

    # Submit the pipeline to be run - In the same experiment
    pipeline_run = run.experiment.submit(test_pipeline)
    pipeline_run.wait_for_completion()
Exemplo n.º 11
0
def analyse_with_gordo():
    ws = Workspace.from_config()  # Azure ML
    # Get an experiment object from Azure Machine Learning
    experiment_name = "dummy_test"
    experiment = Experiment(workspace=ws, name=experiment_name)  # Azure ML
    mlflow.set_experiment(experiment_name)  # MLFlow

    resamples_for_model = ["1T", "1H"]
    aggregation_methods = ["max", "mean"]
    batch_sizes = [1, 10, 100]
    epochs = [1, 10]
    number_of_permutations = len(
        list(
            itertools.product(
                aggregation_methods, resamples_for_model, batch_sizes, epochs
            )
        )
    )

    resampled_original_data = read_and_resample("2nd_test.hdf", "1S")

    if PLOTTING:
        plotnum = 0
        f, axarr = plt.subplots(number_of_permutations + 1, sharex=True)
        axarr[plotnum].plot(
            resampled_original_data, linewidth=1, label="sensor_data_1S_mean"
        )
        axarr[plotnum].legend(loc="upper left")
        plotnum += 1

    for aggregation_method, interval, batch_size, epoch in itertools.product(
        aggregation_methods, resamples_for_model, batch_sizes, epochs
    ):
        run = experiment.start_logging()
        with mlflow.start_run():
            mlflow.log_param("interval", interval)  # MLFlow
            mlflow.log_param("aggregation_method", aggregation_method)  # MLFlow
            mlflow.log_param("batch_size", batch_size)  # MLFlow
            mlflow.log_param("epochs", epoch)  # MLFlow

            run.log("interval", interval)  # Azure ML
            run.log("aggregation_method", aggregation_method)  # Azure ML
            run.log("batch_size", batch_size)  # Azure ML
            run.log("epochs", epoch)  # Azure ML

            print(
                f"Build model for data resampled with {interval} resolution,  method {aggregation_method}, batch size {batch_size} and number of epochs {epoch}"
            )
            resampled = read_and_resample(
                "2nd_test.hdf", interval, aggregation_method=aggregation_method
            )
            anomalies, avg_train_anomaly, predicted_data, train_until_index = build_model(
                resampled, epoch, batch_size
            )

            r2_train, expl_train, r2_test, expl_test = calc_scores(
                resampled, predicted_data, train_until_index
            )
            run.log("r2_train", r2_train)  # Azure ML
            run.log("explained_variance_train", expl_train)  # Azure ML
            run.log("r2_test", r2_test)  # Azure ML
            run.log("explained_variance_test", expl_test)  # Azure ML

            mlflow.log_metric("r2_train", r2_train)  # MLFlow
            mlflow.log_metric("explained_variance_train", expl_train)  # MLFlow
            mlflow.log_metric("r2_test", r2_test)  # MLFlow
            mlflow.log_metric("explained_variance_test", expl_test)  # MLFlow

            anomalies = anomalies.rolling(
                resamples_for_model[-1]
            ).mean()  # Use the last of the experiment resamples as the anomaly resample
            if PLOTTING:
                axarr[plotnum].plot(
                    anomalies, label=interval + "-" + aggregation_method + "-model"
                )
                axarr[plotnum].axhline(avg_train_anomaly, color="r")
                axarr[plotnum].legend(loc="upper left")
                plotnum += 1

        run.complete()  # Azure ML

    if PLOTTING:
        plt.show()
Exemplo n.º 12
0
def main():
    # Loading azure credentials
    print("::debug::Loading azure credentials")
    azure_credentials = os.environ.get("INPUT_AZURE_CREDENTIALS", default="{}")
    try:
        azure_credentials = json.loads(azure_credentials)
    except JSONDecodeError:
        print(
            "::error::Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS"
        )
        raise AMLConfigurationException(
            "Incorrect or poorly formed output from azure credentials saved in AZURE_CREDENTIALS secret. See setup in https://github.com/Azure/aml-workspace/blob/master/README.md"
        )

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=azure_credentials,
                  schema=azure_credentials_schema,
                  input_name="AZURE_CREDENTIALS")

    # Mask values
    print("::debug::Masking parameters")
    mask_parameter(parameter=azure_credentials.get("tenantId", ""))
    mask_parameter(parameter=azure_credentials.get("clientId", ""))
    mask_parameter(parameter=azure_credentials.get("clientSecret", ""))
    mask_parameter(parameter=azure_credentials.get("subscriptionId", ""))

    # Loading parameters file
    print("::debug::Loading parameters file")
    parameters_file = os.environ.get("INPUT_PARAMETERS_FILE",
                                     default="run.json")
    parameters_file_path = os.path.join(".cloud", ".azure", parameters_file)
    try:
        with open(parameters_file_path) as f:
            parameters = json.load(f)
    except FileNotFoundError:
        print(
            f"::debug::Could not find parameter file in {parameters_file_path}. Please provide a parameter file in your repository if you do not want to use default settings (e.g. .cloud/.azure/run.json)."
        )
        parameters = {}

    # Checking provided parameters
    print("::debug::Checking provided parameters")
    validate_json(data=parameters,
                  schema=parameters_schema,
                  input_name="PARAMETERS_FILE")

    # Define target cloud
    if azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.usgovcloudapi.net"):
        cloud = "AzureUSGovernment"
    elif azure_credentials.get(
            "resourceManagerEndpointUrl",
            "").startswith("https://management.chinacloudapi.cn"):
        cloud = "AzureChinaCloud"
    else:
        cloud = "AzureCloud"

    # Loading Workspace
    print("::debug::Loading AML Workspace")
    sp_auth = ServicePrincipalAuthentication(
        tenant_id=azure_credentials.get("tenantId", ""),
        service_principal_id=azure_credentials.get("clientId", ""),
        service_principal_password=azure_credentials.get("clientSecret", ""),
        cloud=cloud)
    config_file_path = os.environ.get("GITHUB_WORKSPACE",
                                      default=".cloud/.azure")
    config_file_name = "aml_arm_config.json"
    try:
        ws = Workspace.from_config(path=config_file_path,
                                   _file_name=config_file_name,
                                   auth=sp_auth)
    except AuthenticationException as exception:
        print(
            f"::error::Could not retrieve user token. Please paste output of `az ad sp create-for-rbac --name <your-sp-name> --role contributor --scopes /subscriptions/<your-subscriptionId>/resourceGroups/<your-rg> --sdk-auth` as value of secret variable: AZURE_CREDENTIALS: {exception}"
        )
        raise AuthenticationException
    except AuthenticationError as exception:
        print(f"::error::Microsoft REST Authentication Error: {exception}")
        raise AuthenticationError
    except AdalError as exception:
        print(
            f"::error::Active Directory Authentication Library Error: {exception}"
        )
        raise AdalError
    except ProjectSystemException as exception:
        print(f"::error::Workspace authorizationfailed: {exception}")
        raise ProjectSystemException

    # Create experiment
    print("::debug::Creating experiment")
    try:
        # Default experiment name
        repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1]
        branch_name = os.environ.get("GITHUB_REF").split("/")[-1]
        default_experiment_name = f"{repository_name}-{branch_name}"

        experiment = Experiment(
            workspace=ws,
            name=parameters.get("experiment_name",
                                default_experiment_name)[:36])
    except TypeError as exception:
        experiment_name = parameters.get("experiment", None)
        print(
            f"::error::Could not create an experiment with the specified name {experiment_name}: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not create an experiment with the specified name {experiment_name}: {exception}"
        )
    except UserErrorException as exception:
        experiment_name = parameters.get("experiment", None)
        print(
            f"::error::Could not create an experiment with the specified name {experiment_name}: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not create an experiment with the specified name {experiment_name}: {exception}"
        )

    # Loading run config
    print("::debug::Loading run config")
    run_config = None
    if run_config is None:
        # Loading run config from runconfig yaml file
        print("::debug::Loading run config from runconfig yaml file")
        run_config = load_runconfig_yaml(runconfig_yaml_file=parameters.get(
            "runconfig_yaml_file", "code/train/run_config.yml"))
    if run_config is None:
        # Loading run config from pipeline yaml file
        print("::debug::Loading run config from pipeline yaml file")
        run_config = load_pipeline_yaml(workspace=ws,
                                        pipeline_yaml_file=parameters.get(
                                            "pipeline_yaml_file",
                                            "code/train/pipeline.yml"))
    if run_config is None:
        # Loading run config from python runconfig file
        print("::debug::Loading run config from python runconfig file")
        run_config = load_runconfig_python(
            workspace=ws,
            runconfig_python_file=parameters.get("runconfig_python_file",
                                                 "code/train/run_config.py"),
            runconfig_python_function_name=parameters.get(
                "runconfig_python_function_name", "main"))
    if run_config is None:
        # Loading values for errors
        pipeline_yaml_file = parameters.get("pipeline_yaml_file",
                                            "code/train/pipeline.yml")
        runconfig_yaml_file = parameters.get("runconfig_yaml_file",
                                             "code/train/run_config.yml")
        runconfig_python_file = parameters.get("runconfig_python_file",
                                               "code/train/run_config.py")
        runconfig_python_function_name = parameters.get(
            "runconfig_python_function_name", "main")

        print(
            f"::error::Error when loading runconfig yaml definition your repository (Path: /{runconfig_yaml_file})."
        )
        print(
            f"::error::Error when loading pipeline yaml definition your repository (Path: /{pipeline_yaml_file})."
        )
        print(
            f"::error::Error when loading python script or function in your repository which defines the experiment config (Script path: '/{runconfig_python_file}', Function: '{runconfig_python_function_name}()')."
        )
        print(
            "::error::You have to provide either a yaml definition for your run, a yaml definition of your pipeline or a python script, which returns a runconfig (Pipeline, ScriptRunConfig, AutoMlConfig, Estimator, etc.). Please read the documentation for more details."
        )
        raise AMLExperimentConfigurationException(
            "You have to provide a yaml definition for your run, a yaml definition of your pipeline or a python script, which returns a runconfig. Please read the documentation for more details."
        )

    # Submit run config
    print("::debug::Submitting experiment config")
    try:
        # Defining default tags
        print("::debug::Defining default tags")
        default_tags = {
            "GITHUB_ACTOR": os.environ.get("GITHUB_ACTOR"),
            "GITHUB_REPOSITORY": os.environ.get("GITHUB_REPOSITORY"),
            "GITHUB_SHA": os.environ.get("GITHUB_SHA"),
            "GITHUB_REF": os.environ.get("GITHUB_REF")
        }

        run = experiment.submit(config=run_config,
                                tags=dict(parameters.get("tags", {}),
                                          **default_tags))
    except AzureMLException as exception:
        print(
            f"::error::Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )
    except TypeError as exception:
        print(
            f"::error::Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )
        raise AMLExperimentConfigurationException(
            f"Could not submit experiment config. Your script passed object of type {type(run_config)}. Object must be correctly configured and of type e.g. estimator, pipeline, etc.: {exception}"
        )

    # Create outputs
    print("::debug::Creating outputs")
    print(f"::set-output name=experiment_name::{run.experiment.name}")
    print(f"::set-output name=run_id::{run.id}")
    print(f"::set-output name=run_url::{run.get_portal_url()}")

    # Waiting for run to complete
    print("::debug::Waiting for run to complete")
    if parameters.get("wait_for_completion", True):
        run.wait_for_completion(show_output=True)

        # Creating additional outputs of finished run
        run_metrics = run.get_metrics(recursive=True)
        print(f"::set-output name=run_metrics::{run_metrics}")
        run_metrics_markdown = convert_to_markdown(run_metrics)
        print(
            f"::set-output name=run_metrics_markdown::{run_metrics_markdown}")

        # Download artifacts if enabled
        if parameters.get("download_artifacts", False):
            # Defining artifacts folder
            print("::debug::Defining artifacts folder")
            root_path = os.environ.get("GITHUB_WORKSPACE", default=None)
            folder_name = f"aml_artifacts_{run.id}"
            artifact_path = os.path.join(root_path, folder_name)

            # Downloading artifacts
            print("::debug::Downloading artifacts")
            run.download_files(
                output_directory=os.path.join(artifact_path, "parent"))
            children = run.get_children(recursive=True)
            for i, child in enumerate(children):
                child.download_files(
                    output_directory=os.path.join(artifact_path, f"child_{i}"))

            # Creating additional outputs
            print(f"::set-output name=artifact_path::{artifact_path}")

    # Publishing pipeline
    print("::debug::Publishing pipeline")
    if type(run) is PipelineRun and parameters.get("publish_pipeline", False):
        # Default pipeline name
        repository_name = os.environ.get("GITHUB_REPOSITORY").split("/")[-1]
        branch_name = os.environ.get("GITHUB_REF").split("/")[-1]
        default_pipeline_name = f"{repository_name}-{branch_name}"

        published_pipeline = run.publish_pipeline(
            name=parameters.get("pipeline_name", default_pipeline_name),
            description="Pipeline registered by GitHub Run Action",
            version=parameters.get("pipeline_version", None),
            continue_on_step_failure=parameters.get(
                "pipeline_continue_on_step_failure", False))

        # Creating additional outputs
        print(
            f"::set-output name=published_pipeline_id::{published_pipeline.id}"
        )
        print(
            f"::set-output name=published_pipeline_status::{published_pipeline.status}"
        )
        print(
            f"::set-output name=published_pipeline_endpoint::{published_pipeline.endpoint}"
        )
    elif parameters.get("publish_pipeline", False):
        print(
            "::error::Could not register pipeline because you did not pass a pipeline to the action"
        )

    print("::debug::Successfully finished Azure Machine Learning Train Action")
def main():
    """
    Run the experiment for training
    """
    interactive_auth = InteractiveLoginAuthentication(
        tenant_id=os.getenv("TENANT_ID"))
    work_space = Workspace.from_config(auth=interactive_auth)

    # Set up the dataset for training
    datastore = work_space.get_default_datastore()
    dataset = Dataset.File.from_files(path=(datastore, "datasets/mnist"))

    # Set up the experiment for training
    experiment = Experiment(workspace=work_space, name="keras-lenet-train")
    #     azureml._restclient.snapshots_client.SNAPSHOT_MAX_SIZE_BYTES = 2000000000
    config = ScriptRunConfig(
        source_directory=".",
        script="train_keras.py",
        compute_target="cpu-cluster",
        arguments=[
            "--data_folder",
            dataset.as_named_input("input").as_mount(),
            "--log_folder",
            "./logs",
        ],
    )

    # Set up the Tensoflow/Keras environment
    environment = Environment("keras-environment")

    # environment = Environment.from_conda_specification(
    #     name='keras-environment',
    #     file_path='keras-environment.yml'
    # )
    environment.python.conda_dependencies = CondaDependencies.create(
        python_version="3.7.7",
        pip_packages=["azureml-defaults", "numpy", "tensorflow==2.3.1"])
    config.run_config.environment = environment

    # Run the experiment for training
    run = experiment.submit(config)
    aml_url = run.get_portal_url()
    print(
        "Submitted to an Azure Machine Learning compute cluster. Click on the link below"
    )
    print("")
    print(aml_url)

    tboard = Tensorboard([run])
    # If successful, start() returns a string with the URI of the instance.
    tboard.start(start_browser=True)
    run.wait_for_completion(show_output=True)
    # After your job completes, be sure to stop() the streaming otherwise it will continue to run.
    print("Press enter to stop")
    input()
    tboard.stop()

    # Register Model
    metrics = run.get_metrics()
    run.register_model(
        model_name="keras_mnist",
        tags={
            "data": "mnist",
            "model": "classification"
        },
        model_path="outputs/keras_lenet.h5",
        model_framework=Model.Framework.TENSORFLOW,
        model_framework_version="2.3.1",
        properties={
            "train_loss": metrics["train_loss"][-1],
            "train_accuracy": metrics["train_accuracy"][-1],
            "val_loss": metrics["val_loss"][-1],
            "val_accuracy": metrics["val_accuracy"][-1],
        },
    )
Exemplo n.º 14
0
IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THE SOFTWARE CODE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
"""
import os, json, datetime, sys
from operator import attrgetter
from azureml.core import Workspace
from azureml.core.model import Model
from azureml.core.image import Image
from azureml.core.webservice import Webservice
from azureml.core.webservice import AciWebservice
from azureml.core.authentication import AzureCliAuthentication

cli_auth = AzureCliAuthentication()
# Get workspace
ws = Workspace.from_config(auth=cli_auth)  # Get the Image to deploy details
try:
    with open("aml_config/image.json") as f:
        config = json.load(f)
except:
    print("No new model, thus no deployment on ACI")
    # raise Exception('No new model to register as production model perform better')
    sys.exit(0)

image_name = config["image_name"]
image_version = config["image_version"]

images = Image.list(workspace=ws)
image, = (m for m in images
          if m.version == image_version and m.name == image_name)
print(
Exemplo n.º 15
0
from azureml.core import Workspace
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from ml_service.util.env_variables import Env

if __name__ == "__main__":
    # Environment variables
    env = Env()

    ws = Workspace.from_config()  # Load config from .azureml

    # Choose a name for your CPU cluster
    cpu_cluster_name = env.aml_compute_name

    # Verify that the cluster does not exist already
    try:
        cpu_cluster = AmlCompute(workspace=ws, name=cpu_cluster_name)
        print('Found existing cluster, use it.')
    except ComputeTargetException:
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_D2_V2',
            idle_seconds_before_scaledown=2400,
            min_nodes=0,
            max_nodes=4)
        cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name,
                                           compute_config)

    cpu_cluster.wait_for_completion(show_output=True)
Exemplo n.º 16
0
import azureml
from azureml.core import Workspace, Run
from azureml.core.compute import ComputeTarget, BatchAiCompute
from azureml.core.compute_target import ComputeTargetException

# check Azure ML SDK version
print("Azure ML SDK Version: ", azureml.core.VERSION)

# check workspace
ws = Workspace.from_config('aml_config/config.json')
print(f'Using Azure ML Workspace {ws.name} in location {ws.location}')

# check and create the Batch AI Compute cluster
try:
    compute_target = ComputeTarget(workspace=ws, name='tgschallenge')
    print('The BatchAI cluster already exists.')
except ComputeTargetException:
    compute_config = BatchAiCompute.provisioning_configuration(
        vm_size='STANDARD_NC6',
        vm_priority='dedicated',
        autoscale_enabled=True,
        cluster_min_nodes=0,
        cluster_max_nodes=4)
    compute_target = ComputeTarget.create(ws, 'tgschallenge', compute_config)
    compute_target.wait_for_completion(show_output=True)
    print(compute_target.get_status())
Exemplo n.º 17
0
    parser = argparse.ArgumentParser()
    parser.add_argument("--glue_task",
                        default="cola",
                        help="Name of GLUE task used for finetuning.")
    parser.add_argument(
        "--model_checkpoint",
        default="distilbert-base-uncased",
        help="Pretrained transformers model name.",
    )
    args = parser.parse_args()

    print(
        f"Finetuning {args.glue_task} with model {args.model_checkpoint} on Azure ML..."
    )

    ws: Workspace = Workspace.from_config()

    target: ComputeTarget = ws.compute_targets["gpu-K80-2"]

    env: Environment = transformers_environment(use_gpu=True)

    exp: Experiment = Experiment(ws, "transformers-glue-finetuning")

    run: Run = submit_glue_finetuning_to_aml(
        glue_task=args.glue_task,
        model_checkpoint=args.model_checkpoint,  # try: "bert-base-uncased"
        environment=env,
        target=target,
        experiment=exp,
    )
Exemplo n.º 18
0
    plt.subplot(1,2,1)
    plot_image(i, predictions, test_labels, test_images)
    plt.subplot(1,2,2)
    plot_value_array(i, predictions,  test_labels)
    _ = plt.xticks(range(10), class_names, rotation=45)
    plt.show()

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--config', default='..\\aml_config\\config.json')
    parser.add_argument('--spconfig', default='..\\aml_config\\spconfig.json')
    parser.add_argument('--modelName', default='fmnist')
    parser.add_argument('--modelPath', default='outputs/fmnist.h5')
    parser.add_argument('--modelFileName', default='fmnist.h5')
    parser.add_argument('--selected_item', type=int, default=0)
    parser.add_argument('-v', '--verbose', dest='verbose', action='store_true')
    parser.add_argument('--version', action='version', version='%(prog)s 1.0')
    args = parser.parse_args()
    folders = createFolders()
    if (args.verbose):
        print ("config file: {0}".format(args.config))
        print ("verbose value: {0}".format(args.verbose))
        print ("local script folder: {0}".format(folders.script_folder))
        print ("local data folder: {0}".format(folders.data_folder))
        print ("local output folder: {0}".format(folders.output_folder))
        
    svc_pr = loadAuthCredentials(args)        
    ws = Workspace.from_config(path=args.config, auth=svc_pr)
    downloadModel(ws,args,folders)
    testModel(args,folders)
"""
Check the logs of a cloud deployed Azure ML webservice in a workspace
"""
from azureml.core import Workspace
from azureml.core.webservice import Webservice


ws = Workspace.from_config(path='./.azureml/config.json')
webservices = ws.webservices

for name, webservice in webservices.items():
    print("Webservice: {}, scoring URI: {}".format(name, webservice.scoring_uri))
    if name == "keras-yolov3-firearms":
        print(webservice.get_logs())
        print(list(webservice.get_keys()))
        print(webservice.scoring_uri)
Exemplo n.º 20
0
def main():
    # e = Env()
    # print(e.workspace_name)

    # svc_pr = ServicePrincipalAuthentication(
    # tenant_id=os.environ.get("TENANT_ID"),
    # service_principal_id=os.environ.get("AZURE_SP_ID"),
    # service_principal_password=os.environ.get("AZURE_SP_PASSWORD"))

    # # Get Azure machine learning workspace
    # ws = Workspace.get(
    #     name=os.environ.get("WORKSPACE_NAME"),
    #     subscription_id=os.environ.get("SUBSCRIPTION_ID"),
    #     resource_group=os.environ.get("AZURE_RESOURCE_GROUP")
    #     ,auth=svc_pr
    # )

    #ex = Experiment(ws, 'iris-pipeline')
    #ex.archive()

    ws = Workspace.from_config()

    print("get_workspace:")
    print(ws)
    # ws.write_config(path="", file_name="config.json")
    print("writing config.json.")

    # Get Azure machine learning cluster
    aml_compute = get_compute(ws, compute_name='cpu1', vm_size='STANDARD_D1')

    # Data stores
    data_dir = "pipelines/modelout"
    def_data_store = ws.get_default_datastore()
    output_dir = PipelineData(name="scores",
                              datastore=def_data_store,
                              output_path_on_compute=data_dir)

    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azureml-pipeline', 'azure-storage',
            'azure-storage-blob', 'azureml-dataprep'
        ]))
    run_config.environment.docker.enabled = True

    ######### TRAIN ################

    # model_path  = "outputs/model.pkl"
    # data_dir = "./outputs/pipelines/modelout/"
    # train_step = PythonScriptStep(
    #     name="Train",
    #     source_directory="./",
    #     script_name="train.py",
    #     compute_target=aml_compute,
    #     arguments=["--model_path", model_path,
    #      "--data_dir",data_dir],
    #     outputs=[output_dir],
    #     runconfig=run_config,
    #     allow_reuse=False,
    # )
    # print("Train Step created")

    ######### REGISTER ################
    # model_path = "trained-model/model.pkl"
    # register_step = PythonScriptStep(
    #     name="Register",
    #     source_directory="./",
    #     script_name="register.py",
    #     compute_target=aml_compute,
    #     arguments=["--model_path", model_path],
    #     inputs=[output_dir],
    #     runconfig=run_config,
    #     allow_reuse=False,
    # )
    # print("Register Step created")

    ######### DEPLOY ################

    # print("Uploading entry script")
    # score_path = "./deploy/deploy.py"
    # datastore = ws.get_default_datastore()
    # datastore.upload_files(files = [model_path], target_path = 'deploy/', overwrite = True,show_progress = True)
    # print("done!")

    deploy_step = PythonScriptStep(
        name="Deploy",
        source_directory="./deploy",
        script_name="deploy.py",
        compute_target=aml_compute,
        arguments=[],
        inputs=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Deploy Step created")

    #evaluate_step.run_after(train_step)
    # register_step.run_after(deploy_step)
    steps = [deploy_step]
    train_pipeline = Pipeline(workspace=ws, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()

    published_pipeline = train_pipeline.publish(name="aks-deployment-pipeline",
                                                description="")
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

    pipeline_parameters = {"model_name": "sklearn_regression_model.pkl"}
    run = published_pipeline.submit(ws, "compute-instance-pipeline-experiment",
                                    pipeline_parameters)
config_json = os.path.join(base_dir, 'config.json')
with open(config_json, 'r') as f:
    config = json.load(f)

try:
    svc_pr = ServicePrincipalAuthentication(
        tenant_id=config['tenant_id'],
        service_principal_id=config['service_principal_id'],
        service_principal_password=config['service_principal_password'])
except KeyError as e:
    print("Getting Service Principal Authentication from Azure Devops")
    svc_pr = None
    pass

ws = Workspace.from_config(path=config_json, auth=svc_pr)

# folder for scripts that need to be uploaded to Aml compute target
script_folder = './scripts/'
try:
    os.makedirs(script_folder)
except BaseException as e:
    print("Deleting:", script_folder)
    shutil.rmtree(script_folder)
    os.makedirs(script_folder)

cpu_compute_name = config['cpu_compute']
try:
    cpu_compute_target = AmlCompute(ws, cpu_compute_name)
    print("found existing compute target: %s" % cpu_compute_name)
except:  # ComputeTargetException:
Exemplo n.º 22
0
 def setUpClass(cls) -> None:
     cls.workspace = Workspace.from_config(
         str(Path(__file__).parent.parent / 'config.json'))
     cls.base_path = Path(__file__).parent.parent / 'data'
Exemplo n.º 23
0
parser.add_argument("--aks_name", type=str, help="aks name", dest="aks_name", required=True)
parser.add_argument("--aks_region", type=str, help="aks region", dest="aks_region", required=True)
parser.add_argument("--description", type=str, help="description", dest="description", required=True)
args = parser.parse_args()

print("Argument 1: %s" % args.service_name)
print("Argument 2: %s" % args.aks_name)
print("Argument 3: %s" % args.aks_region)
print("Argument 4: %s" % args.description)

print('creating AzureCliAuthentication...')
cli_auth = AzureCliAuthentication()
print('done creating AzureCliAuthentication!')

print('get workspace...')
ws = Workspace.from_config(auth=cli_auth)
print('done getting workspace!')

image = Image(ws, id = image_id)
print(image)

aks_name = args.aks_name 
aks_region = args.aks_region
aks_service_name = args.service_name

try:
    service = Webservice(name=aks_service_name, workspace=ws)
    print("Deleting AKS service {}".format(aks_service_name))
    service.delete()
except:
    print("No existing webservice found: ", aks_service_name)
print("Argument 1: %s" % args.aml_compute_target)
print("Argument 2: %s" % args.path)
print('1. Parse arguments...END')
print('')
print('')

print('2. Authenticating...START')
print('.............................................')
cliAuth = AzureCliAuthentication()
print('2. Authenticating...END')
print('')
print('')

print('3.  Get workspace reference...START')
print('.............................................')
amlWs = Workspace.from_config(path=args.path, auth=cliAuth)
print('3.  Get workspace reference...END')
print('')
print('')

print('4.  Get compute reference or create new...START')
print('.............................................')
try:
    amlCompute = AmlCompute(amlWs, args.aml_compute_target)
    print("....found existing compute target.")
except ComputeTargetException:
    print("....creating new compute target")

    amlComputeProvisioningConfig = AmlCompute.provisioning_configuration(
        vm_size="STANDARD_D12_V2", min_nodes=0, max_nodes=1)
    amlCompute = ComputeTarget.create(amlWs, args.aml_compute_target,
Exemplo n.º 25
0
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Workspace
from azureml.core.authentication import AzureCliAuthentication

# load Azure ML workspace
workspace = Workspace.from_config(auth=AzureCliAuthentication())

# Create compute target if not present
# Choose a name for your CPU cluster
cpu_cluster_name = "alwaysoncluster"

# Verify that cluster does not exist already
try:
    cu_cluster = ComputeTarget(workspace=workspace, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(
        vm_size='STANDARD_D3_V2', min_nodes=2, max_nodes=2)
    cpu_cluster = ComputeTarget.create(workspace, cpu_cluster_name,
                                       compute_config)

cpu_cluster.wait_for_completion(show_output=True)
def main(req: func.HttpRequest) -> (func.HttpResponse):
    logging.info('Python HTTP trigger function processed a request.')

    # For now this can be a POST where we have <base url>/api/HttpTrigger?start=<any string>
    image_url = req.params.get('start')
    logging.info(type(image_url))

    # Write a config.json (fill in template values with system vars)
    config_temp = {
        'subscription_id': os.getenv('AZURE_SUB', ''),
        'resource_group': os.getenv('RESOURCE_GROUP', ''),
        'workspace_name': os.getenv('WORKSPACE_NAME', '')
    }
    with open(os.path.join(os.getcwd(), 'HttpTrigger', 'config.json'),
              'w') as f:
        json.dump(config_temp, f)

    # Get the workspace from config.json
    try:
        ws = Workspace.from_config(
            os.path.join(os.getcwd(), 'HttpTrigger', 'config.json'))
    # Authentication didn't work
    except ProjectSystemException as err:
        return json.dumps('ProjectSystemException')
    # Need to create the workspace
    except Exception as err:
        ws = Workspace.create(
            name=os.getenv('WORKSPACE_NAME', ''),
            subscription_id=os.getenv('AZURE_SUB', ''),
            resource_group=os.getenv('RESOURCE_GROUP', ''),
            create_resource_group=True,
            location='eastus2'  # Or other supported Azure region   
        )

    # choose a name for your cluster
    cluster_name = "gpuclusterplease"

    try:
        compute_target = ComputeTarget(workspace=ws, name=cluster_name)
        print('Found existing compute target.')
    except ComputeTargetException:
        print('Creating a new compute target...')
        # AML Compute config - if max_nodes are set, it becomes persistent storage that scales
        compute_config = AmlCompute.provisioning_configuration(
            vm_size='STANDARD_NC6', max_nodes=4)
        # create the cluster
        compute_target = ComputeTarget.create(ws, cluster_name, compute_config)
        compute_target.wait_for_completion(show_output=True)

    # use get_status() to get a detailed status for the current cluster.
    print(compute_target.get_status().serialize())

    # Create a project directory and copy training script to ii
    project_folder = os.path.join(os.getcwd(), 'HttpTrigger', 'project')
    os.makedirs(project_folder, exist_ok=True)
    shutil.copy(os.path.join(os.getcwd(), 'HttpTrigger', 'pytorch_train.py'),
                project_folder)

    # Create an experiment
    experiment_name = 'fish-no-fish'
    experiment = Experiment(ws, name=experiment_name)

    # Use an AML Data Store for training data
    ds = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name='funcdefaultdatastore',
        container_name=os.getenv('STORAGE_CONTAINER_NAME_TRAINDATA', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''),
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Use an AML Data Store to save models back up to
    ds_models = Datastore.register_azure_blob_container(
        workspace=ws,
        datastore_name='modelsdatastorage',
        container_name=os.getenv('STORAGE_CONTAINER_NAME_MODELS', ''),
        account_name=os.getenv('STORAGE_ACCOUNT_NAME', ''),
        account_key=os.getenv('STORAGE_ACCOUNT_KEY', ''),
        create_if_not_exists=True)

    # Set up for training ("trans" flag means - use transfer learning and
    # this should download a model on compute)
    script_params = {
        '--data_dir': ds.as_mount(),
        '--num_epochs': 30,
        '--learning_rate': 0.01,
        '--output_dir': './outputs',
        '--trans': 'True'
    }

    # Instantiate PyTorch estimator with upload of final model to
    # a specified blob storage container (this can be anything)
    estimator = PyTorch(
        source_directory=project_folder,
        script_params=script_params,
        compute_target=compute_target,
        entry_script='pytorch_train.py',
        use_gpu=True,
        inputs=[
            ds_models.as_upload(
                path_on_compute='./outputs/model_finetuned.pth')
        ])

    run = experiment.submit(estimator)
    run.wait_for_completion(show_output=True)

    return json.dumps('Job complete')
Exemplo n.º 27
0
from azureml.exceptions import ComputeTargetException
from azureml.core.authentication import AzureCliAuthentication

# Load the JSON settings file
print("Loading settings")
with open(os.path.join("aml_service", "settings.json")) as f:
    settings = json.load(f)
aml_settings = settings["compute_target"]["training"]["amlcompute"]

# Get workspace
print("Loading Workspace")
cli_auth = AzureCliAuthentication()
config_file_path = os.environ.get("GITHUB_WORKSPACE", default="aml_service")
config_file_name = "aml_arm_config.json"
ws = Workspace.from_config(
    path=config_file_path,
    auth=cli_auth,
    _file_name=config_file_name)
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep = '\n')

try:
    # Loading AMLCompute
    print("Loading existing AML Compute")
    cluster = AmlCompute(workspace=ws, name=aml_settings["name"])

    # Check settings and redeploy if required settings have changed
    print("Found existing cluster")
    if cluster.vm_size.lower() != aml_settings["vm_size"].lower() or cluster.vm_priority.lower() != aml_settings["vm_priority"].lower():
        cluster.delete()
        cluster.wait_for_completion(show_output=True)
        raise ComputeTargetException("Cluster is of incorrect size or has incorrect priority. Deleting cluster and provisioning a new one.")
    
Exemplo n.º 28
0
from azureml.core import Workspace
import requests
import pandas as pd

features = ["age", "fare", "sex", "embarked", "home.dest", "pclass", "sibsp", "parch", "cabin"]
pd.read_csv("data/titanic.csv")[features].iloc[6:7, :].to_json("data/records.json", orient="index")

ws = Workspace.from_config("code/config_ws.json")
service = ws.webservices['titanic-webservice-new']

with open("data/records.json") as file:
    test_samples = file.read()

print(service.run(input_data=str(test_samples)))
print(requests.post(service.scoring_uri, test_samples, headers={'Content-Type': 'application/json'}).text)

service.delete()
Exemplo n.º 29
0
from azureml.core import Workspace
from azureml.core.webservice import Webservice

# Requires the config to be downloaded first to the current working directory
ws = Workspace.from_config()

# Set with the deployment name
# chosen deployment name in a previous stage, ie: depproj02-ht
name = "dep-p2-htamayo"

# load existing web service
service = Webservice(name=name, workspace=ws)

#enabling app insights
service.update(enable_app_insights=True)

logs = service.get_logs()

for line in logs.split('\n'):
    print(line)
Exemplo n.º 30
0
# Load the JSON settings file and relevant section
print("Loading settings")
with open(os.path.join("aml_service", "settings.json")) as f:
    settings = json.load(f)
experiment_settings = settings["experiment"]
compute_target_to_use = settings["compute_target"][
    "compute_target_to_use_for_training"].strip().lower()
compute_target_name = settings["compute_target"]["training"][
    compute_target_to_use]["name"]
workspace_config_settings = settings["workspace"]["config"]

# Get workspace
print("Loading Workspace")
cli_auth = AzureCliAuthentication()
ws = Workspace.from_config(path=workspace_config_settings["path"],
                           auth=cli_auth,
                           _file_name=workspace_config_settings["file_name"])
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

# Attach Experiment
print("Loading Experiment")
exp = Experiment(workspace=ws, name=experiment_settings["name"])
print(exp.name, exp.workspace.name, sep="\n")

# Load compute target
print("Loading Compute Target")
compute_target = ComputeTarget(workspace=ws, name=compute_target_name)

# Create image registry configuration
if experiment_settings["docker"]["custom_image"]:
    container_registry = ContainerRegistry()