from azureml.core import Experiment, Workspace from azureml.pipeline.core import PublishedPipeline, Schedule, ScheduleRecurrence ws = Workspace.from_config() exp = Experiment(ws, "MaxFreezerTemperatureExceededPipeline", _create_in_cloud=True) pipeline_id = PublishedPipeline.list(ws)[0] schedule = Schedule.create( ws, name="four_updates_per_day", description="runs the pipeline every 6 hours", pipeline_id=pipeline_id, recurrence=ScheduleRecurrence( frequency="Hour", interval=6, start_time=None, # run instantly time_zone=None, # default UTC ), experiment_name=exp.name, ) # Schedule.list(ws) # schedule = Schedule.list(ws)[0] # schedule.get_last_pipeline_run()
def main(): parser = argparse.ArgumentParser("register") parser.add_argument("--aml_pipeline_name", type=str, help="Name of a the aml pipeline to retrieve ID from") parser.add_argument("--output_pipeline_id_file", type=str, default="preprocessing_pipeline_id.txt", help="Name of a file to write pipeline ID to") parser.add_argument( "--skip_preprocessing_execution", action="store_true", help=("Do not trigger the execution. " "Use this in Azure DevOps when using a server job to trigger")) args = parser.parse_args() e = Env() aml_workspace = Workspace.get(name=e.workspace_name, subscription_id=e.subscription_id, resource_group=e.resource_group) # Find the pipeline that was published by the specified build ID pipelines = PublishedPipeline.list(aml_workspace) matched_pipes = [] # TODO: delete latest_version logic latest_version = 0 latest_pipe = None for p in pipelines: if p.name == args.aml_pipeline_name: if p.version == e.build_id: matched_pipes.append(p) elif int(p.version) > latest_version: latest_version = int(p.version) latest_pipe = p if len(matched_pipes) == 0 and latest_version > 0: matched_pipes.append(latest_pipe) if (len(matched_pipes) > 1): published_pipeline = None raise Exception( f"Multiple active pipelines are published for build {e.build_id}." ) # NOQA: E501 elif (len(matched_pipes) == 0): published_pipeline = None raise KeyError( f"Unable to find a published pipeline for this build {e.build_id}" ) # NOQA: E501 else: published_pipeline = matched_pipes[0] print("published pipeline id is", published_pipeline.id) # Save the Pipeline ID for other AzDO jobs after script is complete if args.output_pipeline_id_file is not None: with open(args.output_pipeline_id_file, "w") as out_file: out_file.write(published_pipeline.id) if (args.skip_preprocessing_execution is False): tags = {"BuildId": e.build_id} if (e.build_uri is not None): tags["BuildUri"] = e.build_uri experiment = Experiment(workspace=aml_workspace, name=e.experiment_name + "_preprocess") run = experiment.submit(published_pipeline, tags=tags) print("Pipeline run initiated ", run.id)
print("Blobstore's name: {}".format(def_blob_store.name)) # create a list of datasets stored in blob print("Checking for new datasets") blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key) generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/video") datasets = [] for blob in generator: dataset = blob.name.split('/')[3] if dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt"): datasets.append(dataset) print("Found dataset:", dataset) # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.get_all(ws) # Create a list of datasets for which we have (old) and don't have (new) a published pipeline old_datasets = [] new_datasets = [] for dataset in datasets: for pub_pipeline in all_pub_pipelines: if pub_pipeline.name.endswith(dataset): old_datasets.append(dataset) if not dataset in old_datasets: new_datasets.append(dataset) for dataset in new_datasets: print("Creating pipeline for dataset", dataset) build_pipeline(dataset, ws, config)
from azureml.pipeline.core import PublishedPipeline from azureml.core.experiment import Experiment from azureml.core import Workspace workspace = Workspace.from_config() published_pipeline_id = "" is_debug = True debug_relay_connection_name = "test" if published_pipeline_id is None or published_pipeline_id == "": raise ValueError("Initialize published_pipeline_id") pipeline_parameters = {"is_debug": is_debug} if is_debug: if debug_relay_connection_name == "": raise ValueError("Hybrid connection name cannot be empty!") pipeline_parameters.update( {"debug_relay_connection_name": debug_relay_connection_name}) experiment = Experiment(workspace, "Pipeline_debug_experiment") published_pipeline = PublishedPipeline.get(workspace=workspace, id=published_pipeline_id) experiment.submit(published_pipeline, pipeline_parameters=pipeline_parameters)
cli_auth = AzureCliAuthentication() ##------------- Get Workspace subscriptionId = "<your subscription id>" # make this a parameter resourceGroup = "<your resource group>" # make this a parameter workspaceName = "<your ml workspace name>" # make this a parameter ws = Workspace(subscriptionId, resourceGroup, workspaceName, auth=cli_auth) ##------------- Run Published pipeline using REST endpoint aad_token = cli_auth.get_authentication_header() published_pipeline_id = "ab0691a9-438f-416b-a146-5c7660d1be11" # Replace this with the published pipeline id published_pipeline = PublishedPipeline.get(ws, published_pipeline_id) rest_endpoint = published_pipeline.endpoint print("Rest endpoint: " + rest_endpoint) response = requests.post(rest_endpoint, headers=aad_token, json={ "ExperimentName": "quality_prediction_gb", "RunSource": "SDK", "ParameterAssignments": { "modelName": "quality_gbm_model.pkl", "datasetName": "qualitydataset", "datasetStorePath": "/inputdata/train.csv" } }) print(response)
def disable_pipeline(pipeline_name="", dry_run=True): from azureml.pipeline.core import PublishedPipeline from azureml.pipeline.core.schedule import Schedule if dry_run: print("Dry run: only printing what would be done") else: print("Disabling pipelines") ws = get_workspace() # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.list(ws) # We will iterate through the list of published pipelines and # use the last ID in the list for Schedule operations: print("Published pipelines found in the workspace:") for pub_pipeline in all_pub_pipelines: if ( pub_pipeline.name.startswith("prednet") and pub_pipeline.name == pipeline_name or pipeline_name == "" ): print("Found pipeline:", pub_pipeline.name, pub_pipeline.id) pub_pipeline_id = pub_pipeline.id schedules = Schedule.list(ws, pipeline_id=pub_pipeline_id) # We will iterate through the list of schedules and # use the last ID in the list for further operations: print( "Found these schedules for the pipeline id {}:".format( pub_pipeline_id ) ) for schedule in schedules: print(schedule.name, schedule.id) if not dry_run: schedule_id = schedule.id print( "Schedule id to be used for schedule " "operations: {}".format( schedule_id ) ) fetched_schedule = Schedule.get(ws, schedule_id) print( "Using schedule with id: {}".format( fetched_schedule.id ) ) fetched_schedule.disable(wait_for_provisioning=True) fetched_schedule = Schedule.get(ws, schedule_id) print( "Disabled schedule {}. New status is: {}".format( fetched_schedule.id, fetched_schedule.status ) ) if not dry_run: print("Disabling pipeline") pub_pipeline.disable()
def pipeline(): return PublishedPipeline.get(workspace=ws, id=pipeline_id)
# create a list of datasets stored in blob print("Checking for new datasets") blob_service = BlockBlobService(def_blob_store.account_name, def_blob_store.account_key) generator = blob_service.list_blobs(def_blob_store.container_name, prefix="prednet/data/raw_data") datasets = [] for blob in generator: dataset = blob.name.split("/")[3] if (dataset not in datasets and dataset.startswith("UCSD") and not dataset.endswith("txt")): datasets.append(dataset) print("Found dataset:", dataset) # Get all published pipeline objects in the workspace all_pub_pipelines = PublishedPipeline.list(ws) # Create a list of datasets for which we have (old) and don't have (new) a # published pipeline old_datasets = [] new_datasets = [] for dataset in datasets: for pub_pipeline in all_pub_pipelines: if pub_pipeline.name.endswith(dataset): old_datasets.append(dataset) if dataset not in old_datasets: new_datasets.append(dataset) for dataset in new_datasets: print("Creating pipeline for dataset", dataset) build_prednet_pipeline(dataset, ws)
from azureml.data.data_reference import DataReference from azureml.pipeline.core import Pipeline, PipelineData from azureml.pipeline.steps import PythonScriptStep, EstimatorStep from azureml.train.estimator import Estimator # Check core SDK version number print("SDK version:", azureml.core.VERSION) from azureml.core import Workspace ws = Workspace.from_config() print(ws.get_details()) from azureml.pipeline.core import PublishedPipeline, Schedule old_pipes = PublishedPipeline.list(ws) for old_pipe in old_pipes: old_schedules = Schedule.list(ws, pipeline_id=old_pipe.id) for schedule in old_schedules: schedule.disable(wait_for_provisioning=True) old_pipe.disable() ds = ws.get_default_datastore() params = { '--data_path': ws.get_default_datastore().path('data'), '--analyze': '', '--load_open': '', '--load_closed': '',