Пример #1
0
def main():
    cluster_id = os.environ.get("DATABRICKS_CLUSTER_ID", None)

    # If databricks_cluster_id is not None, but it's an empty string: its None
    if cluster_id is not None and not cluster_id:
        cluster_id = None

    workspace_name = os.environ.get("AML_WORKSPACE_NAME", None)
    resource_group = os.environ.get("RESOURCE_GROUP", None)
    subscription_id = os.environ.get("SUBSCRIPTION_ID", None)
    tenant_id = os.environ.get("TENANT_ID", None)
    app_id = os.environ.get("SP_APP_ID", None)
    app_secret = os.environ.get("SP_APP_SECRET", None)
    experiment_subfolder = os.environ.get("EXPERIMENT_FOLDER",
                                          'aml_service/experiment')
    sources_directory = os.environ.get("SOURCES_DIR", None)
    experiment_folder = os.path.join(sources_directory, experiment_subfolder)
    train_script_path = os.environ.get("TRAIN_SCRIPT_PATH", None)
    databricks_workspace_name = os.environ.get("DATABRICKS_WORKSPACE_NAME",
                                               None)
    databricks_access_token = os.environ.get("DATABRICKS_ACCESS_TOKEN", None)
    databricks_compute_name_aml = os.environ.get("DATABRICKS_COMPUTE_NAME_AML",
                                                 None)
    model_dir = os.environ.get("MODEL_DIR", 'dbfs:/model')
    model_name = os.environ.get("MODEL_NAME", 'torchcnn')

    path_components = model_dir.split("/", 1)
    model_path = "/dbfs/" + path_components[1] + "/" + model_name + ".pth"

    print("The model path will be %s" % (model_path))

    aml_workspace = get_workspace(workspace_name, resource_group,
                                  subscription_id, tenant_id, app_id,
                                  app_secret)
    print(aml_workspace)

    databricks_compute = get_compute(aml_workspace,
                                     databricks_compute_name_aml,
                                     resource_group, databricks_workspace_name,
                                     databricks_access_token)
    print(databricks_compute)

    step1 = DatabricksStep(name="DBPythonInLocalMachine",
                           num_workers=1,
                           python_script_name=train_script_path,
                           source_directory=sources_directory,
                           run_name='DB_Python_Local_demo',
                           existing_cluster_id=cluster_id,
                           compute_target=databricks_compute,
                           allow_reuse=False,
                           python_script_params=['--MODEL_PATH', model_path])

    step2 = DatabricksStep(name="RegisterModel",
                           num_workers=1,
                           python_script_name="register_model.py",
                           source_directory=experiment_folder,
                           run_name='Register_model',
                           existing_cluster_id=cluster_id,
                           compute_target=databricks_compute,
                           allow_reuse=False,
                           python_script_params=[
                               '--MODEL_PATH', model_path, '--TENANT_ID',
                               tenant_id, '--APP_ID', app_id, '--APP_SECRET',
                               app_secret, '--MODEL_NAME', model_name
                           ])

    step2.run_after(step1)
    print("Step lists created")

    pipeline = Pipeline(
        workspace=aml_workspace,
        # steps=[step1])
        steps=[step1, step2])
    print("Pipeline is built")

    pipeline.validate()
    print("Pipeline validation complete")

    pipeline_run = pipeline.submit(experiment_name="pipetest")

    print("Pipeline is submitted for execution")

    pipeline_details = pipeline_run.get_details()

    pipeline_run_id = pipeline_details['runId']

    azure_run_url = get_experiment_run_url(subscription_id, resource_group,
                                           workspace_name, pipeline_run_id)

    print("To check details of the Pipeline run, go to " + azure_run_url)

    pipeline_status = pipeline_run.get_status()

    timer_mod = 0

    while pipeline_status == 'Running' or pipeline_status == 'NotStarted':
        timer_mod = timer_mod + 10
        time.sleep(10)
        if (timer_mod % 30) == 0:
            print("Status: %s. %s seconds have passed." %
                  (pipeline_status, timer_mod))
        pipeline_status = pipeline_run.get_status()

    if pipeline_status == 'Failed':
        print("AML Pipelne failed. Check %s for details." % (azure_run_url))
        sys.exit(1)
    else:
        print(pipeline_status)

    print("Pipeline completed")
Пример #2
0
def main(cluster_id, workspace_name, resource_group, subscription_id,
         tenant_id, app_id, app_secret, experiment_folder, project_folder,
         train_script_path, databricks_workspace, databricks_access_token,
         databricks_compute_name, mdl_dir, mdl_name):
    mdl_file_name = "%s.pth" % (mdl_name)
    mdl_path = os.path.join(mdl_dir, mdl_file_name)

    print("The model path will be %s" % (mdl_path))

    ws = get_workspace(workspace_name, resource_group, subscription_id,
                       tenant_id, app_id, app_secret)
    print(ws)

    databricks_compute = get_compute(ws, databricks_compute_name,
                                     resource_group, databricks_workspace,
                                     databricks_access_token)
    print(databricks_compute)

    step1 = DatabricksStep(name="DBPythonInLocalMachine",
                           num_workers=1,
                           python_script_name=train_script_path,
                           source_directory=project_folder,
                           run_name='DB_Python_Local_demo',
                           existing_cluster_id=cluster_id,
                           compute_target=databricks_compute,
                           allow_reuse=False,
                           python_script_params=['--MODEL_PATH', mdl_path])

    step2 = DatabricksStep(name="RegisterModel",
                           num_workers=1,
                           python_script_name="Register.py",
                           source_directory=experiment_folder,
                           run_name='Register_model',
                           existing_cluster_id=cluster_id,
                           compute_target=databricks_compute,
                           allow_reuse=False,
                           python_script_params=[
                               '--MODEL_PATH', mdl_path, '--TENANT_ID',
                               tenant_id, '--APP_ID', app_id, '--APP_SECRET',
                               app_secret, '--MODEL_NAME', mdl_name
                           ])

    step2.run_after(step1)
    print("Step lists created")

    pipeline = Pipeline(
        workspace=ws,
        # steps=[step1])
        steps=[step1, step2])
    print("Pipeline is built")

    pipeline.validate()
    print("Pipeline validation complete")

    pipeline_run = pipeline.submit(experiment_name="pipetest")

    print("Pipeline is submitted for execution")

    pipeline_details = pipeline_run.get_details()

    pipeline_run_id = pipeline_details['runId']

    azure_run_url = get_experiment_run_url(subscription_id, resource_group,
                                           workspace_name, pipeline_run_id)

    print("To check details of the Pipeline run, go to " + azure_run_url)

    pipeline_status = pipeline_run.get_status()

    timer_mod = 0

    while (pipeline_status == 'Running'):
        timer_mod = timer_mod + 10
        time.sleep(10)
        if ((timer_mod % 30) == 0):
            print("Still running. %s seconds have passed." % (timer_mod))
        pipeline_status = pipeline_run.get_status()

    if pipeline_status == 'Failed':
        sys.exit("AML Pipeline failed")
    else:
        print(pipeline_status)

    print("Pipeline completed")