Пример #1
0
def get_compute(workspace: Workspace, compute_name: str, vm_size: str):
    try:
        if compute_name in workspace.compute_targets:
            compute_target = workspace.compute_targets[compute_name]
            if compute_target and type(compute_target) is AmlCompute:
                print('Found existing compute target ' + compute_name +
                      ' so using it.')
        else:
            e = Env()
            compute_config = AmlCompute.provisioning_configuration(
                vm_size=vm_size,
                vm_priority=e.vm_priority,
                min_nodes=e.min_nodes,
                max_nodes=e.max_nodes,
                idle_seconds_before_scaledown="300"
                #    #Uncomment the below lines for VNet support
                #    vnet_resourcegroup_name=vnet_resourcegroup_name,
                #    vnet_name=vnet_name,
                #    subnet_name=subnet_name
            )
            compute_target = ComputeTarget.create(workspace, compute_name,
                                                  compute_config)
            compute_target.wait_for_completion(show_output=True,
                                               min_node_count=None,
                                               timeout_in_minutes=10)
        return compute_target
    except ComputeTargetException as e:
        print(e)
        print('An error occurred trying to provision compute.')
        exit()
def main():

    parser = argparse.ArgumentParser("smoke_test_scoring_service.py")

    parser.add_argument("--type",
                        type=str,
                        choices=["AKS", "ACI", "Webapp"],
                        required=True,
                        help="type of service")
    parser.add_argument("--service",
                        type=str,
                        required=True,
                        help="Name of the image to test")
    args = parser.parse_args()

    e = Env()
    if args.type == "Webapp":
        output = call_web_app(args.service, {})
    else:
        output = call_web_service(e, args.type, args.service)
    print("Verifying service output")

    assert "result" in output
    assert len(output["result"]) == output_len
    print("Smoke test successful.")
def main():

    run = Run.get_context()
    if (run.id.startswith('OfflineRun')):
        from dotenv import load_dotenv
        sys.path.append(os.path.abspath("./code/util"))  # NOQA: E402
        from model_helper import get_model_by_tag
        # For local development, set values in this section
        load_dotenv()
        workspace_name = os.environ.get("WORKSPACE_NAME")
        experiment_name = os.environ.get("EXPERIMENT_NAME")
        resource_group = os.environ.get("RESOURCE_GROUP")
        subscription_id = os.environ.get("SUBSCRIPTION_ID")
        build_id = os.environ.get('BUILD_BUILDID')
        aml_workspace = Workspace.get(name=workspace_name,
                                      subscription_id=subscription_id,
                                      resource_group=resource_group)
        ws = aml_workspace
        exp = Experiment(ws, experiment_name)
    else:
        sys.path.append(os.path.abspath("./util"))  # NOQA: E402
        from model_helper import get_model_by_tag
        ws = run.experiment.workspace
        exp = run.experiment

    e = Env()

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--build_id",
        type=str,
        help="The Build ID of the build triggering this pipeline run",
    )
    parser.add_argument("--output_model_version_file",
                        type=str,
                        default="model_version.txt",
                        help="Name of a file to write model version to")

    args = parser.parse_args()
    if (args.build_id is not None):
        build_id = args.build_id
    model_name = e.model_name

    try:
        tag_name = 'BuildId'
        model = get_model_by_tag(model_name, tag_name, build_id, exp.workspace)
        if (model is not None):
            print("Model was registered for this build.")
        if (model is None):
            print("Model was not registered for this run.")
            sys.exit(1)
    except Exception as e:
        print(e)
        print("Model was not registered for this run.")
        sys.exit(1)

    # Save the Model Version for other AzDO jobs after script is complete
    if args.output_model_version_file is not None:
        with open(args.output_model_version_file, "w") as out_file:
            out_file.write(str(model.version))
Пример #4
0
def main():

    parser = argparse.ArgumentParser("register")
    parser.add_argument(
        "--output_pipeline_id_file",
        type=str,
        default="pipeline_id.txt",
        help="Name of a file to write pipeline ID to"
    )
    parser.add_argument(
        "--skip_train_execution",
        action="store_true",
        help=("Do not trigger the execution. "
              "Use this in Azure DevOps when using a server job to trigger")
    )
    args = parser.parse_args()

    e = Env()

    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group
    )

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.name == e.pipeline_name:
            if p.version == e.build_id:
                matched_pipes.append(p)

    if(len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(f"Multiple active pipelines are published for build {e.build_id}.")  # NOQA: E501
    elif(len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]
        print("published pipeline id is", published_pipeline.id)

        # Save the Pipeline ID for other AzDO jobs after script is complete
        if args.output_pipeline_id_file is not None:
            with open(args.output_pipeline_id_file, "w") as out_file:
                out_file.write(published_pipeline.id)

        if(args.skip_train_execution is False):
            pipeline_parameters = {"model_name": e.model_name}
            run = published_pipeline.submit(
                aml_workspace,
                e.experiment_name,
                pipeline_parameters)

            print("Pipeline run initiated ", run.id)
Пример #5
0
def main():
    e = Env()
    service_principal = ServicePrincipalAuthentication(
        tenant_id=e.tenant_id,
        service_principal_id=e.app_id,
        service_principal_password=e.app_secret)

    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group,
                                  auth=service_principal)

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.name == e.pipeline_name:
            if p.version == e.build_id:
                matched_pipes.append(p)

    if (len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(
            f"Multiple active pipelines are published for build {e.build_id}."
        )  # NOQA: E501
    elif (len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(
            f"Unable to find a published pipeline for this build {e.build_id}"
        )  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]
        print("published pipeline id is", published_pipeline.id)

        # Save the Pipeline ID for other AzDO jobs after script is complete
        os.environ['amlpipeline_id'] = published_pipeline.id
        savePIDcmd = 'echo "export AMLPIPELINE_ID=$amlpipeline_id" >tmp.sh'
        os.system(savePIDcmd)

        # Set this to True for local development or
        # if not using Azure DevOps pipeline execution task
        skip_train_execution = True
        if (skip_train_execution is False):
            pipeline_parameters = {"model_name": e.model_name}
            response = published_pipeline.submit(aml_workspace,
                                                 e.experiment_name,
                                                 pipeline_parameters)

            run_id = response.id
            print("Pipeline run initiated ", run_id)
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group
    )
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=['numpy', 'pandas',
                        'scikit-learn', 'tensorflow', 'keras'],
        pip_packages=['azure', 'azureml-core',
                      'azure-storage',
                      'azure-storage-blob'])
    )
    run_config.environment.docker.enabled = True
    run_config.environment.docker.base_image = "mcr.microsoft.com/mlops/python"

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="code/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name + "_with_R",
        description="Model training/retraining pipeline",
        version=e.build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Пример #7
0
def test_get_workspace():
    e = Env()
    workspace_name = e.workspace_name
    resource_group = e.resource_group
    subscription_id = e.subscription_id
    tenant_id = e.tenant_id
    app_id = e.app_id
    app_secret = e.app_secret

    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)

    assert aml_workspace.name == workspace_name
Пример #8
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = get_workspace(
        e.workspace_name,
        e.resource_group,
        e.subscription_id,
        e.tenant_id,
        e.app_id,
        e.app_secret)
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print(aml_compute)

    train_step = DatabricksStep(
        name="DBPythonInLocalMachine",
        num_workers=1,
        python_script_name="train_with_r_on_databricks.py",
        source_directory="code/training/R",
        run_name='DB_Python_R_demo',
        existing_cluster_id=e.db_cluster_id,
        compute_target=aml_compute,
        allow_reuse=False
    )

    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name + "_with_R_on_DB",
        description="Model training/retraining pipeline",
        version=e.build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Пример #9
0
def main():
    e = Env()
    service_principal = ServicePrincipalAuthentication(
            tenant_id=e.tenant_id,
            service_principal_id=e.app_id,
            service_principal_password=e.app_secret)

    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group,
        auth=service_principal
        )

    # Find the pipeline that was published by the specified build ID
    pipelines = PublishedPipeline.list(aml_workspace)
    matched_pipes = []

    for p in pipelines:
        if p.version == e.build_id:
            matched_pipes.append(p)

    if(len(matched_pipes) > 1):
        published_pipeline = None
        raise Exception(f"Multiple active pipelines are published for build {e.build_id}.")  # NOQA: E501
    elif(len(matched_pipes) == 0):
        published_pipeline = None
        raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}")  # NOQA: E501
    else:
        published_pipeline = matched_pipes[0]

    pipeline_parameters = {"model_name": e.model_name}

    response = published_pipeline.submit(
        aml_workspace,
        e.experiment_name,
        pipeline_parameters)

    run_id = response.id
    print("Pipeline run initiated ", run_id)
Пример #10
0
def get_environment(
    workspace: Workspace,
    environment_name: str,
    create_new: bool = False
):
    try:
        e = Env()
        environments = Environment.list(workspace=workspace)
        restored_environment = None
     #   for env in environments:
     #       if env == environment_name:
     #           restored_environment = environments[environment_name]

        if restored_environment is None or create_new:
            new_env = Environment.from_conda_specification(environment_name, os.path.join(e.sources_directory_train, "conda_dependencies.yml"))  # NOQA: E501
            restored_environment = new_env
            restored_environment.register(workspace)

        if restored_environment is not None:
            print(restored_environment)
        return restored_environment
    except Exception as e:
        print(e)
        exit(1)
Пример #11
0
import os
import sys
from azureml.core import Workspace
from azureml.core.image import ContainerImage, Image
from azureml.core.model import Model
sys.path.append(os.path.abspath("./ml_service/util"))  # NOQA: E402
from env_variables import Env

e = Env()

# Get Azure machine learning workspace
ws = Workspace.get(
    name=e.workspace_name,
    subscription_id=e.subscription_id,
    resource_group=e.resource_group
)

model = Model(ws, name=e.model_name, version=e.model_version)
os.chdir("./code/scoring")

image_config = ContainerImage.image_configuration(
    execution_script="score.py",
    runtime="python",
    conda_file="conda_dependencies.yml",
    description="Image with ridge regression model",
    tags={"area": "diabetes", "type": "regression"},
)

image = Image.create(
    name=e.image_name, models=[model], image_config=image_config, workspace=ws
)
Пример #12
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True

    config_envvar = {}
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        config_envvar["BUILDURI_BASE"] = builduri_base
    run_config.environment.environment_variables = config_envvar

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=e.build_id)
    hyperparameter_alpha_param = PipelineParameter(name="hyperparameter_alpha",
                                                   default_value=0.5)

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--alpha",
            hyperparameter_alpha_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")

    evaluate_step.run_after(train_step)
    register_step.run_after(evaluate_step)
    steps = [train_step, evaluate_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Пример #13
0
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = get_workspace(e.workspace_name, e.resource_group,
                                  e.subscription_id, e.tenant_id, e.app_id,
                                  e.app_secret)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azure-storage', 'azure-storage-blob'
        ]))
    run_config.environment.docker.enabled = True

    model_name = PipelineParameter(name="model_name",
                                   default_value=e.model_name)
    release_id = PipelineParameter(name="release_id", default_value="0")

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--release_id",
            release_id,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--release_id",
            release_id,
            "--model_name",
            model_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    evaluate_step.run_after(train_step)
    steps = [evaluate_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
def main():
    e = Env()
    print(e.workspace_name)

    svc_pr = ServicePrincipalAuthentication(
        tenant_id=os.environ.get("TENANT_ID"),
        service_principal_id=os.environ.get("AZURE_SP_ID"),
        service_principal_password=os.environ.get("AZURE_SP_PASSWORD"))

    # Get Azure machine learning workspace
    ws = Workspace.get(name=os.environ.get("WORKSPACE_NAME"),
                       subscription_id=os.environ.get("SUBSCRIPTION_ID"),
                       resource_group=os.environ.get("AZURE_RESOURCE_GROUP"),
                       auth=svc_pr)

    #ex = Experiment(ws, 'iris-pipeline')
    #ex.archive()

    print("get_workspace:")
    print(ws)
    ws.write_config(path="", file_name="config.json")
    print("writing config.json.")

    # Get Azure machine learning cluster
    aml_compute = get_compute(ws, "train-cluster", "STANDARD_DS2_V2")
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    run_config = RunConfiguration(conda_dependencies=CondaDependencies.create(
        conda_packages=[
            'numpy', 'pandas', 'scikit-learn', 'tensorflow', 'keras'
        ],
        pip_packages=[
            'azure', 'azureml-core', 'azureml-pipeline', 'azure-storage',
            'azure-storage-blob', 'azureml-dataprep'
        ]))
    run_config.environment.docker.enabled = True

    ######### TRAIN ################
    train_step = PythonScriptStep(
        name="Train",
        source_directory="models/python/iris/train",
        script_name="train.py",
        compute_target=aml_compute,
        arguments=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Train Step created")

    ######### EVALUATE ################
    evaluate_step = PythonScriptStep(
        name="Evaluate",
        source_directory="models/python/iris/evaluate",
        script_name="evaluate.py",
        compute_target=aml_compute,
        arguments=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Evaluate Step created")

    ######### REGISTER ################
    register_step = PythonScriptStep(
        name="Register",
        source_directory="models/python/iris/register",
        script_name="register.py",
        compute_target=aml_compute,
        arguments=[],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Register Step created")

    #evaluate_step.run_after(train_step)
    register_step.run_after(train_step)
    steps = [train_step, register_step]
    train_pipeline = Pipeline(workspace=ws, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()

    published_pipeline = train_pipeline.publish(name="iris-pipeline",
                                                description="")
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')

    pipeline_parameters = {"model_name": "iris-pipeline-param"}
    run = published_pipeline.submit(ws, "iris-pipeline-experiment",
                                    pipeline_parameters)