def get_comparison_baselines(outputs_folder: Path, azure_config: AzureConfig, comparison_blob_storage_paths: List[Tuple[str, str]]) -> \ List[ComparisonBaseline]: comparison_baselines = [] for (comparison_name, comparison_path) in comparison_blob_storage_paths: # Discard the experiment part of the run rec ID, if any. comparison_path = comparison_path.split(":")[-1] run_rec_id, blob_path_str = comparison_path.split("/", 1) run_rec_id = strip_prefix(run_rec_id, AZUREML_RUN_FOLDER_PREFIX) blob_path = Path( strip_prefix(blob_path_str, DEFAULT_AML_UPLOAD_DIR + "/")) run = azure_config.fetch_run(run_rec_id) (comparison_dataset_path, comparison_metrics_path) = get_comparison_baseline_paths( outputs_folder, blob_path, run, DATASET_CSV_FILE_NAME) # If both dataset.csv and metrics.csv were downloaded successfully, read their contents and # add a tuple to the comparison data. if comparison_dataset_path is not None and comparison_metrics_path is not None and \ comparison_dataset_path.exists() and comparison_metrics_path.exists(): comparison_baselines.append( ComparisonBaseline(comparison_name, pd.read_csv(comparison_dataset_path), pd.read_csv(comparison_metrics_path), run_rec_id)) else: raise ValueError( f"could not find comparison data for run {run_rec_id}") return comparison_baselines
def get_first_child_run(azure_config: AzureConfig) -> Run: """ Download first child run in order to download data :param azure_config: :return: first child run """ if not azure_config.run_recovery_id: raise ValueError("azure_config.run_recovery_id is not provided.") hyperdrive_run = azure_config.fetch_run(azure_config.run_recovery_id) child_runs = fetch_child_runs(hyperdrive_run, status=RunStatus.COMPLETED) return child_runs[0]