def main(): load_dotenv() workspace_name = os.environ.get("BASE_NAME")+"-AML-WS" resource_group = os.environ.get("BASE_NAME")+"-AML-RG" subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") experiment_name = os.environ.get("EXPERIMENT_NAME") model_name = os.environ.get("MODEL_NAME") app_id = os.environ.get('SP_APP_ID') app_secret = os.environ.get('SP_APP_SECRET') release_id = os.environ.get('RELEASE_RELEASEID') build_id = os.environ.get('BUILD_BUILDID') storageacctname = os.environ.get('STORAGE_ACCT_NAME') storageacctkey = os.environ.get('STORAGE_ACCT_KEY') containername = os.environ.get('STORAGE_BLOB_NAME') service_principal = ServicePrincipalAuthentication( tenant_id=tenant_id, service_principal_id=app_id, service_principal_password=app_secret) aml_workspace = Workspace.get( name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, auth=service_principal ) # Find the pipeline that was published by the specified build ID pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] for p in pipelines: if p.version == build_id: matched_pipes.append(p) if(len(matched_pipes) > 1): published_pipeline = None raise Exception(f"Multiple active pipelines are published for build {build_id}.") # NOQA: E501 elif(len(matched_pipes) == 0): published_pipeline = None raise KeyError(f"Unable to find a published pipeline for this build {build_id}") # NOQA: E501 else: published_pipeline = matched_pipes[0] pipeline_parameters = { "model_name": model_name, "release_id": release_id, "storageacctname": storageacctname, "storageacctkey": storageacctkey, "containername": containername } response = published_pipeline.submit( aml_workspace, experiment_name, pipeline_parameters) run_id = response.id print("Pipeline run initiated ", run_id)
def main(): parser = argparse.ArgumentParser("register") parser.add_argument("--output_pipeline_id_file", type=str, default="pipeline_id.txt", help="Name of a file to write pipeline ID to") parser.add_argument( "--skip_train_execution", action="store_true", help=("Do not trigger the execution. " "Use this in Azure DevOps when using a server job to trigger")) args = parser.parse_args() e = Env() aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) # Find the pipeline that was published by the specified build ID pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] for p in pipelines: if p.name == e.pipeline_name: if p.version == e.build_id: matched_pipes.append(p) if (len(matched_pipes) > 1): published_pipeline = None raise Exception( f"Multiple active pipelines are published for build {e.build_id}." ) # NOQA: E501 elif (len(matched_pipes) == 0): published_pipeline = None raise KeyError( f"Unable to find a published pipeline for this build {e.build_id}" ) # NOQA: E501 else: published_pipeline = matched_pipes[0] print("published pipeline id is", published_pipeline.id) # Save the Pipeline ID for other AzDO jobs after script is complete if args.output_pipeline_id_file is not None: with open(args.output_pipeline_id_file, "w") as out_file: out_file.write(published_pipeline.id) if (args.skip_train_execution is False): pipeline_parameters = {"model_name": e.model_name} tags = {"BuildId": e.build_id} if (e.build_uri is not None): tags["BuildUri"] = e.build_uri experiment = Experiment(workspace=aml_workspace, name=e.experiment_name) run = experiment.submit(published_pipeline, tags=tags, pipeline_parameters=pipeline_parameters) print("Pipeline run initiated ", run.id)
def main(): load_dotenv() workspace_name = os.environ.get("WORKSPACE_NAME") resource_group = os.environ.get("RESOURCE_GROUP_NAME") subscription_id = os.environ.get("SUBSCRIPTION_ID") tenant_id = os.environ.get("TENANT_ID") experiment_name = os.environ.get("EXPERIMENT_NAME") model_name = os.environ.get("MODEL_NAME") ckpt_path = os.environ.get("MODEL_CHECKPOINT_PATH") app_id = os.environ.get('SP_APP_ID') app_secret = os.environ.get('SP_APP_SECRET') build_id = os.environ.get('BUILD_BUILDID') datastore = os.environ.get('EPIS_DATASTORE') container_name = os.environ.get('EPIS_CONTAINER') service_principal = ServicePrincipalAuthentication( tenant_id=tenant_id, service_principal_id=app_id, service_principal_password=app_secret) aml_workspace = Workspace.get(name=workspace_name, subscription_id=subscription_id, resource_group=resource_group, auth=service_principal) pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] for p in pipelines: if p.version == build_id: matched_pipes.append(p) if (len(matched_pipes) > 1): published_pipeline = None raise Exception( f"Multiple active pipelines are published for build {build_id}.") elif (len(matched_pipes) == 0): published_pipeline = None raise KeyError( f"Unable to find a published pipeline for this build {build_id}") else: published_pipeline = matched_pipes[0] pipeline_parameters = { "model_name": model_name, "ckpt_path": ckpt_path, "datastore": datastore, "storage_container": container_name } response = published_pipeline.submit(aml_workspace, experiment_name, pipeline_parameters) run_id = response.id print("Pipeline run initiated ", run_id)
def main(): e = Env() service_principal = ServicePrincipalAuthentication( tenant_id=e.tenant_id, service_principal_id=e.app_id, service_principal_password=e.app_secret) aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group, auth=service_principal) # Find the pipeline that was published by the specified build ID pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] for p in pipelines: if p.name == e.pipeline_name: if p.version == e.build_id: matched_pipes.append(p) if (len(matched_pipes) > 1): published_pipeline = None raise Exception( f"Multiple active pipelines are published for build {e.build_id}." ) # NOQA: E501 elif (len(matched_pipes) == 0): published_pipeline = None raise KeyError( f"Unable to find a published pipeline for this build {e.build_id}" ) # NOQA: E501 else: published_pipeline = matched_pipes[0] print("published pipeline id is", published_pipeline.id) # Save the Pipeline ID for other AzDO jobs after script is complete os.environ['amlpipeline_id'] = published_pipeline.id savePIDcmd = 'echo "export AMLPIPELINE_ID=$amlpipeline_id" >tmp.sh' os.system(savePIDcmd) # Set this to True for local development or # if not using Azure DevOps pipeline execution task skip_train_execution = True if (skip_train_execution is False): pipeline_parameters = {"model_name": e.model_name} response = published_pipeline.submit(aml_workspace, e.experiment_name, pipeline_parameters) run_id = response.id print("Pipeline run initiated ", run_id)
def get_pipeline(workspace, env, pipeline_id=None): if pipeline_id is not None: scoring_pipeline = PublishedPipeline.get(workspace,id=pipeline_id) else: pipeline_list = PublishedPipeline.list(workspace) scoring_pipeline = [pl for pl in pipeline_list if pl.name == env.scoring_pipeline_name] if len(scoring_pipeline) == 0: raise ValueError('no available pipeline to download!') else: scoring_pipeline = scoring_pipeline[0] return scoring_pipeline
def main(): parser = argparse.ArgumentParser('trigger_pipeline') arg = parser.add_argument arg('--output-write-file', type=str, default='pipeline_id_recorder', help='the text file to write piepeline id') arg('--skip-train-exc', action='store_true', help='option to skip train excecution') args = parser.parse_args() e = ENV() print(e.build_id) aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) published_pipeline = PublishedPipeline.list(aml_workspace) matched_pipes = [] for pipe in published_pipeline: if pipe.name == e.pipeline_name: if pipe.version == e.build_id: matched_pipes.append(pipe) if len(matched_pipes) > 1: published_pipeline = None raise Exception('there should be only one published pipeline') elif len(matched_pipes) == 0: published_pipeline = None raise Exception('no pipeline is published on the provided workspace!') else: published_pipeline = matched_pipes[0] print(f'published piepeline id is {published_pipeline.id}') if args.output_write_file is not None: with open(args.output_write_file, 'w') as output_file: output_file.write(published_pipeline.id) if args.skip_train_exc is False: pipeline_param = {'model_name': e.model_name} tags = {'build_id': e.build_id} if e.build_uri is not None: tags = {'build_uri': e.build_uri} exp = Experiment(workspace=aml_workspace, name=e.experiment_name) run = exp.submit(published_pipeline, tags=tags, pipeline_param=pipeline_param) print( f'pipeline {published_pipeline.id} initiated,run id: {run.id}')
def startup(): import azureml.core from azureml.core import Run, Workspace, Experiment from azureml.pipeline.core import PublishedPipeline from azureml.core import Datastore, Dataset import pandas as pd print("SDK version:", azureml.core.VERSION) pd.set_option('display.max_colwidth', 120) from azureml.core import Datastore, Dataset workspace = Workspace.from_config() ds = workspace.get_default_datastore() #target_column_name = 'volume' #time_column_name = 'date' #time_series_id_column_names = 'team_tag' experiment_name = 'azure-stackoverflow-classifier' experiment = Experiment(workspace, name=experiment_name) train = pd.read_csv('./data/train.csv', names=['ID', 'IssueTitle', 'Label']) try: run = Run(experiment, 'azure-stackoverflow-classifier_1592684426_3767f390') hd_run = Run(experiment, 'HD_ddfd3027-4b17-4afd-a42f-cec512ec544b') aks_service = workspace.webservices['stackoverflow-classifier'] pipelines = PublishedPipeline.list(workspace) published_pipeline = pipelines[0] except: print( "demo not initialized ... to speed up demo, after you have run through demo script all the way, set the values for the Run, HD_Run and AKS Service to fetch from existing entities instead of running realtime" ) run = "" hd_run = "" aks_service = "" published_pipeline = "" stackoverflow_dataset, raw_dataset, azure_support_volume_timeseries_train, azure_support_volume_timeseries_test = register_data( ) return ds, run, hd_run, aks_service, published_pipeline, stackoverflow_dataset, raw_dataset, train, azure_support_volume_timeseries_train, azure_support_volume_timeseries_test
def find_pipeline_by_name( aml_workspace: Workspace, pipeline_name: str) -> Union[PublishedPipeline, None]: pipelines = PublishedPipeline.list(aml_workspace) matched_pipelines = list( filter(lambda p: p.name == pipeline_name, pipelines)) date_matched_pipelines = [ dt.strptime(pipeline.version, "%Y-%m-%dT%H:%M:%S.%f") for pipeline in matched_pipelines ] matched_pipelines = [ matched_pipelines[idx] for idx in np.argsort(date_matched_pipelines) ] if matched_pipelines: return matched_pipelines[-1] return None
def disable_pipeline(pipeline_name="", dry_run=True): from azureml.pipeline.core import PublishedPipeline from azureml.pipeline.core.schedule import Schedule if dry_run: print("Dry run: only printing what would be done") else: print("Disabling pipelines") ws = get_workspace() # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.list(ws) # We will iterate through the list of published pipelines and # use the last ID in the list for Schedule operations: print("Published pipelines found in the workspace:") for pub_pipeline in all_pub_pipelines: if (pub_pipeline.name.startswith("prednet") and pub_pipeline.name == pipeline_name or pipeline_name == ""): print("Found pipeline:", pub_pipeline.name, pub_pipeline.id) pub_pipeline_id = pub_pipeline.id schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id) # We will iterate through the list of schedules and # use the last ID in the list for further operations: print("Found these schedules for the pipeline id {}:".format( pub_pipeline_id)) for schedule in schedules: print(schedule.name, schedule.id) if not dry_run: schedule_id = schedule.id print("Schedule id to be used for schedule " "operations: {}".format(schedule_id)) fetched_schedule = Schedule.get(ws, schedule_id) print("Using schedule with id: {}".format( fetched_schedule.id)) fetched_schedule.disable(wait_for_provisioning=True) fetched_schedule = Schedule.get(ws, schedule_id) print("Disabled schedule {}. New status is: {}".format( fetched_schedule.id, fetched_schedule.status)) if not dry_run: print("Disabling pipeline") pub_pipeline.disable()
def get_pipeline(pipeline_id, ws: Workspace, env: Env): if pipeline_id is not None: scoringpipeline = PublishedPipeline.get(ws, pipeline_id) else: pipelines = PublishedPipeline.list(ws) scoringpipelinelist = [ pl for pl in pipelines if pl.name == env.scoring_pipeline_name ] # noqa E501 if scoringpipelinelist.count == 0: raise Exception("No pipeline found matching name:{}".format( env.scoring_pipeline_name) # NOQA: E501 ) else: # latest published scoringpipeline = scoringpipelinelist[0] return scoringpipeline
def main(): e = Env() service_principal = ServicePrincipalAuthentication( tenant_id=e.tenant_id, service_principal_id=e.app_id, service_principal_password=e.app_secret) aml_workspace = Workspace.get( name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group, auth=service_principal ) # Find the pipeline that was published by the specified build ID pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] for p in pipelines: if p.version == e.build_id: matched_pipes.append(p) if(len(matched_pipes) > 1): published_pipeline = None raise Exception(f"Multiple active pipelines are published for build {e.build_id}.") # NOQA: E501 elif(len(matched_pipes) == 0): published_pipeline = None raise KeyError(f"Unable to find a published pipeline for this build {e.build_id}") # NOQA: E501 else: published_pipeline = matched_pipes[0] pipeline_parameters = {"model_name": e.model_name} response = published_pipeline.submit( aml_workspace, e.experiment_name, pipeline_parameters) run_id = response.id print("Pipeline run initiated ", run_id)
def getExistingPipeline(workspace, pipeline_name): ''' Look for an return an exising azureml.pipeline.core.PublishedPipeline instance based on name PARAMS: workspace : azureml.core.Workspace : Existing AMLS Workspace pipeline_name : string : Name of the published pipeline to find. RETURNS: azureml.pipeline.core.PublishedPipeline if found, None otherwise ''' return_pipeline = None pipelines = PublishedPipeline.list(workspace) if len(pipelines) > 0: for pipe in pipelines: if pipe.name == pipeline_name: return_pipeline = pipe break return return_pipeline
def clean_azml_workspace(ctx): """ [WARNING] Only use in test-only workspace. Remove or disable all compute clusters, published pipelines, published pipeline endpoints and schedules from Azure ML workspace. """ ws = Workspace.from_config() # remove compute clusters for _, compute in ws.compute_targets.items(): if not compute.provisioning_state == "Deleting": compute.delete() # deactivate schedules for s in Schedule.list(ws): s.disable() # remove pipeline endpoints for pe in PipelineEndpoint.list(ws): pe.disable() # remove pipelines for p in PublishedPipeline.list(ws): p.disable()
from azureml.core import Experiment, Workspace from azureml.pipeline.core import PublishedPipeline, Schedule, ScheduleRecurrence ws = Workspace.from_config() exp = Experiment(ws, "MaxFreezerTemperatureExceededPipeline", _create_in_cloud=True) pipeline_id = PublishedPipeline.list(ws)[0] schedule = Schedule.create( ws, name="four_updates_per_day", description="runs the pipeline every 6 hours", pipeline_id=pipeline_id, recurrence=ScheduleRecurrence( frequency="Hour", interval=6, start_time=None, # run instantly time_zone=None, # default UTC ), experiment_name=exp.name, ) # Schedule.list(ws) # schedule = Schedule.list(ws)[0] # schedule.get_last_pipeline_run()
# create a list of datasets stored in blob print("Checking for new datasets") blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key) generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/raw_data") datasets = [] for blob in generator: dataset = blob.name.split("/")[3] if (dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt")): datasets.append(dataset) print("Found dataset:", dataset) # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.list(ws) # Create a list of datasets for which we have (old) and don't have (new) a # published pipeline old_datasets = [] new_datasets = [] for dataset in datasets: for pub_pipeline in all_pub_pipelines: if pub_pipeline.name.endswith(dataset): old_datasets.append(dataset) if dataset not in old_datasets: new_datasets.append(dataset) for dataset in new_datasets: print("Creating pipeline for dataset", dataset) build_prednet_pipeline(dataset, ws)
from azureml.data.data_reference import DataReference from azureml.pipeline.core import Pipeline, PipelineData from azureml.pipeline.steps import PythonScriptStep, EstimatorStep from azureml.train.estimator import Estimator # Check core SDK version number print("SDK version:", azureml.core.VERSION) from azureml.core import Workspace ws = Workspace.from_config() print(ws.get_details()) from azureml.pipeline.core import PublishedPipeline, Schedule old_pipes = PublishedPipeline.list(ws) for old_pipe in old_pipes: old_schedules = Schedule.list(ws, pipeline_id=old_pipe.id) for schedule in old_schedules: schedule.disable(wait_for_provisioning=True) old_pipe.disable() ds = ws.get_default_datastore() params = { '--data_path': ws.get_default_datastore().path('data'), '--analyze': '', '--load_open': '', '--load_closed': '',