def create_and_submit_experiment( azure_config: AzureConfig, source_config: SourceConfig, model_config_overrides: str, azure_dataset_id: str) -> Run: """ Creates an AzureML experiment in the workspace and submits it for execution. :param azure_config: azure related configurations to setup valid workspace :param source_config: The information about which code should be submitted, and which arguments should be used. :param model_config_overrides: A string that describes which model parameters were overwritten by commandline arguments in the present run. This is only used for diagnostic purposes (it is set as a Tag on the run). :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. :returns: Run object for the submitted AzureML run """ workspace = azure_config.get_workspace() experiment_name = create_experiment_name(azure_config) exp = Experiment(workspace=workspace, name=azure_util.to_azure_friendly_string(experiment_name)) script_run_config = create_run_config(azure_config, source_config, azure_dataset_id) # submit a training/testing run associated with the experiment run: Run = exp.submit(script_run_config) # set metadata for the run set_run_tags(run, azure_config, model_config_overrides) print("\n==============================================================================") print(f"Successfully queued new run {run.id} in experiment: {exp.name}") if azure_config.run_recovery_id: print(f"\nRecovered from: {azure_config.run_recovery_id}") recovery_id = azure_util.create_run_recovery_id(run) recovery_file = Path(RUN_RECOVERY_FILE) if recovery_file.exists(): recovery_file.unlink() recovery_file.write_text(recovery_id) print("Experiment URL: {}".format(exp.get_portal_url())) print("Run URL: {}".format(run.get_portal_url())) print("If this run fails, re-start runner.py and supply these additional arguments: " f"--run_recovery_id={recovery_id}") print(f"The run recovery ID has been written to this file: {recovery_file}") print("==============================================================================") if azure_config.tensorboard and azure_config.azureml: print("Starting TensorBoard now because you specified --tensorboard") monitor(monitor_config=AMLTensorBoardMonitorConfig(run_ids=[run.id]), azure_config=azure_config) else: print(f"To monitor this run locally using TensorBoard, run the script: " f"InnerEye/Azure/tensorboard_monitor.py --run_ids={run.id}") print("==============================================================================") return run
def after_submission_hook(azure_run: Run) -> None: """ A function that will be called right after job submission. """ # Set the default display name to what was provided as the "tag". This will affect single runs # and Hyperdrive parent runs if self.azure_config.tag: azure_run.display_name = self.azure_config.tag # Add an extra tag that depends on the run that was actually submitted. This is used for later filtering # run in cross validation analysis recovery_id = create_run_recovery_id(azure_run) azure_run.tag(RUN_RECOVERY_ID_KEY_NAME, recovery_id) print( "If this run fails, re-start runner.py and supply these additional arguments: " f"--run_recovery_id={recovery_id}") if self.azure_config.tensorboard: print( "Starting TensorBoard now because you specified --tensorboard" ) monitor(monitor_config=AMLTensorBoardMonitorConfig( run_ids=[azure_run.id]), azure_config=self.azure_config) else: print( f"To monitor this run locally using TensorBoard, run the script: " f"InnerEye/Azure/tensorboard_monitor.py --run_ids={azure_run.id}" ) if self.azure_config.wait_for_completion: # We want the job output to be visible on the console. Do not exit yet if the job fails, because we # may need to download the pytest result file. azure_run.wait_for_completion(show_output=True, raise_on_error=False) if self.azure_config.pytest_mark: # The AzureML job can optionally run pytest. Attempt to download it to the current directory. # A build step will pick up that file and publish it to Azure DevOps. # If pytest_mark is set, this file must exist. logging.info("Downloading pytest result file.") download_pytest_result(azure_run) if azure_run.status == RunStatus.FAILED: raise ValueError( f"The AzureML run failed. Please check this URL for details: " f"{azure_run.get_portal_url()}")
def patch_and_parse(args: List[str]) -> AMLTensorBoardMonitorConfig: """ Returns a MonitorArguments object created using the mock arguments. """ with mock.patch("sys.argv", [""] + args): return AMLTensorBoardMonitorConfig.parse_args()
def create_and_submit_experiment(azure_config: AzureConfig, script_run_config: ScriptRunConfig, commandline_args: str) -> Run: """ Creates an AzureML experiment in the workspace and submits it for execution. :param azure_config: azure related configurations to setup a valid workspace. :param script_run_config: The configuration for the script that should be run inside of AzureML. :param commandline_args: A string with all commandline arguments that were provided to the runner. These are only used to set a tag on the submitted AzureML run. :returns: Run object for the submitted AzureML run """ workspace = azure_config.get_workspace() experiment_name = create_experiment_name(azure_config) exp = Experiment(workspace=workspace, name=azure_util.to_azure_friendly_string(experiment_name)) # submit a training/testing run associated with the experiment run: Run = exp.submit(script_run_config) if is_offline_run_context(run): # This codepath will only be executed in unit tests, when exp.submit is mocked. return run # Set metadata for the run. set_run_tags(run, azure_config, commandline_args=commandline_args) print( "\n==============================================================================" ) print(f"Successfully queued new run {run.id} in experiment: {exp.name}") if azure_config.run_recovery_id: print(f"\nRecovered from: {azure_config.run_recovery_id}") recovery_id = azure_util.create_run_recovery_id(run) recovery_file = Path(RUN_RECOVERY_FILE) if recovery_file.exists(): recovery_file.unlink() recovery_file.write_text(recovery_id) print("Experiment URL: {}".format(exp.get_portal_url())) print("Run URL: {}".format(run.get_portal_url())) print( "If this run fails, re-start runner.py and supply these additional arguments: " f"--run_recovery_id={recovery_id}") print( f"The run recovery ID has been written to this file: {recovery_file}") print( "==============================================================================" ) if azure_config.tensorboard and azure_config.azureml: print("Starting TensorBoard now because you specified --tensorboard") monitor(monitor_config=AMLTensorBoardMonitorConfig(run_ids=[run.id]), azure_config=azure_config) else: print( f"To monitor this run locally using TensorBoard, run the script: " f"InnerEye/Azure/tensorboard_monitor.py --run_ids={run.id}") print( "==============================================================================" ) return run