Python SourceConfig示例，InnerEye.Azure.azure_config.SourceConfig Python示例

示例#1

0

显示文件

文件： test_parsing.py 项目： rataxe/InnerEye-DeepLearning

def test_source_config_set_params() -> None:
    """
    Check that commandline arguments are set correctly when submitting the script to AzureML.
    In particular, the azureml flag should be omitted, irrespective of how the argument is written.
    """
    s = SourceConfig(root_folder=Path(""),
                     entry_script=Path("something.py"),
                     conda_dependencies_files=[])

    def assert_has_params(expected_args: str) -> None:
        assert s.script_params is not None
        # Arguments are in the keys of the dictionary only, and should have been added in the right order
        assert " ".join(s.script_params.keys()) == expected_args

    with mock.patch("sys.argv", [
            "", "some", "--param", "1",
            f"--{AZURECONFIG_SUBMIT_TO_AZUREML}=True", "more"
    ]):
        s.set_script_params_except_submit_flag()
    assert_has_params("some --param 1 more")
    with mock.patch("sys.argv", [
            "", "some", "--param", "1", f"--{AZURECONFIG_SUBMIT_TO_AZUREML}",
            "False", "more"
    ]):
        s.set_script_params_except_submit_flag()
    assert_has_params("some --param 1 more")
    # Arguments where azureml is just the prefix should not be removed.
    with mock.patch(
            "sys.argv",
        ["", "some", f"--{AZURECONFIG_SUBMIT_TO_AZUREML}foo", "False", "more"
         ]):
        s.set_script_params_except_submit_flag()
    assert_has_params(f"some --{AZURECONFIG_SUBMIT_TO_AZUREML}foo False more")

示例#2

0

显示文件

def test_create_python_env() -> None:
    """
    Checks if environment variables in the SourceConfig are correctly passed through to the Python environment.
    Environment variables in SourceConfig are only used in the internal InnerEye repo.
    :return:
    """
    foo = "foo"
    bar = "bar"
    entry_script = Path("something.py")
    conda_file = get_environment_yaml_file()
    s = SourceConfig(root_folder=Path(""),
                     entry_script=entry_script,
                     conda_dependencies_files=[conda_file],
                     environment_variables={foo: bar})
    env = get_or_create_python_environment(
        source_config=s,
        azure_config=get_default_azure_config(),
        register_environment=False)
    assert foo in env.environment_variables
    assert env.environment_variables[foo] == bar

    # Check that some of the basic packages that we expect to always exist are picked up correctly in the Conda env
    def remove_version_number(items: Iterator[str]) -> Set[str]:
        return set(c.split("=")[0] for c in items)

    assert "pytorch" in remove_version_number(
        env.python.conda_dependencies.conda_packages)
    assert "pytorch-lightning" in remove_version_number(
        env.python.conda_dependencies.pip_packages)

示例#3

0

显示文件

def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      all_azure_dataset_ids: List[str],
                      all_dataset_mountpoints: List[str],
                      environment_name: str = "") -> ScriptRunConfig:
    """
    Creates a configuration to run the InnerEye training script in AzureML.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param all_azure_dataset_ids: The name of all datasets on blob storage that will be used for this run.
    :param all_dataset_mountpoints: When using the datasets in AzureML, these are the per-dataset mount points.
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
    when running inference for an existing model.
    :return: The configured script run.
    """
    dataset_consumptions = create_dataset_consumptions(
        azure_config, all_azure_dataset_ids, all_dataset_mountpoints)
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = source_config.entry_script.relative_to(
        source_config.root_folder).as_posix()
    logging.info(
        f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
        f"source directory {source_config.root_folder})")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(
            azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    run_config = RunConfiguration(
        script=entry_script_relative_path,
        arguments=source_config.script_params,
    )
    run_config.environment = get_or_create_python_environment(
        azure_config, source_config, environment_name=environment_name)
    run_config.target = azure_config.cluster
    run_config.max_run_duration_seconds = max_run_duration
    if azure_config.num_nodes > 1:
        distributed_job_config = MpiConfiguration(
            node_count=azure_config.num_nodes)
        run_config.mpi = distributed_job_config
        run_config.framework = "Python"
        run_config.communicator = "IntelMpi"
        run_config.node_count = distributed_job_config.node_count
    if len(dataset_consumptions) > 0:
        run_config.data = {
            dataset.name: dataset
            for dataset in dataset_consumptions
        }
    # Use blob storage for storing the source, rather than the FileShares section of the storage account.
    run_config.source_directory_data_store = workspace.datastores.get(
        WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
    script_run_config = ScriptRunConfig(
        source_directory=str(source_config.root_folder),
        run_config=run_config,
    )
    if azure_config.hyperdrive:
        script_run_config = source_config.hyperdrive_config_func(
            script_run_config)  # type: ignore
    return script_run_config

示例#4

0

显示文件

 def submit_to_azureml(self) -> Run:
     """
     Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for
     completion and the Run did not succeed.
     """
     # The adal package creates a logging.info line each time it gets an authentication token, avoid that.
     logging.getLogger('adal-python').setLevel(logging.WARNING)
     if not self.model_config.azure_dataset_id:
         raise ValueError(
             "When running on AzureML, the 'azure_dataset_id' property must be set."
         )
     model_config_overrides = str(self.model_config.overrides)
     source_config = SourceConfig(
         root_folder=self.project_root,
         entry_script=Path(sys.argv[0]).resolve(),
         conda_dependencies_files=[
             get_environment_yaml_file(),
             self.project_root / fixed_paths.ENVIRONMENT_YAML_FILE_NAME
         ],
         hyperdrive_config_func=lambda estimator: self.model_config.
         get_hyperdrive_config(estimator),
         # For large jobs, upload of results times out frequently because of large checkpoint files. Default is 600
         upload_timeout_seconds=86400,
     )
     source_config.set_script_params_except_submit_flag()
     assert self.model_config.azure_dataset_id is not None  # to stop mypy complaining about next line
     azure_run = submit_to_azureml(self.azure_config, source_config,
                                   model_config_overrides,
                                   self.model_config.azure_dataset_id)
     logging.info("Job submission to AzureML done.")
     if self.azure_config.pytest_mark:
         # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
         # A build step will pick up that file and publish it to Azure DevOps.
         # If pytest_mark is set, this file must exist.
         logging.info("Downloading pytest result file.")
         download_pytest_result(azure_run)
     else:
         logging.info(
             "No pytest_mark present, hence not downloading the pytest result file."
         )
     status = azure_run.get_status()
     # For PR builds where we wait for job completion, the job must have ended in a COMPLETED state.
     # If a pytest failed, the runner has exited with code -1 (see below)
     if self.azure_config.wait_for_completion and status != RunStatus.COMPLETED:
         logging.error(f"Job completed with status {status}. Exiting.")
         exit(-1)
     return azure_run

示例#5

0

显示文件

文件： submit_for_inference.py 项目： simplesoftMX/InnerEye-DeepLearning

def submit_for_inference(args: SubmitForInferenceConfig,
                         azure_config: AzureConfig) -> Optional[Path]:
    """
    Create and submit an inference to AzureML, and optionally download the resulting segmentation.
    :param azure_config: An object with all necessary information for accessing Azure.
    :param args: configuration, see SubmitForInferenceConfig
    :return: path to downloaded segmentation on local disc, or None if none.
    """
    logging.info(f"Building Azure configuration from {args.settings}")
    logging.info("Getting workspace")
    workspace = azure_config.get_workspace()
    logging.info("Identifying model")
    model = Model(workspace=workspace, id=args.model_id)
    model_id = model.id
    logging.info(f"Identified model {model_id}")
    source_directory = tempfile.TemporaryDirectory()
    source_directory_name = source_directory.name
    logging.info(
        f"Building inference run submission in {source_directory_name}")
    source_directory_path = Path(source_directory_name)
    copy_image_file(args.image_file,
                    source_directory_path / DEFAULT_DATA_FOLDER)
    # We copy over run_scoring.py, and score.py as well in case the model we're using
    # does not have sufficiently recent versions of those files.
    for base in ["run_scoring.py", "score.py"]:
        shutil.copyfile(base, str(source_directory_path / base))
    source_config = SourceConfig(
        root_folder=source_directory_name,
        entry_script=str(source_directory_path / "run_scoring.py"),
        script_params={
            "--data-folder": ".",
            "--spawnprocess": "python",
            "--model-id": model_id,
            "score.py": ""
        },
        conda_dependencies_files=download_conda_dependency_files(
            model, source_directory_path))
    estimator = create_estimator_from_configs(workspace, azure_config,
                                              source_config, [])
    exp = Experiment(workspace=workspace, name=args.experiment_name)
    run = exp.submit(estimator)
    logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}")
    logging.info(f"Run URL: {run.get_portal_url()}")
    if not args.keep_upload_folder:
        source_directory.cleanup()
        logging.info(f"Deleted submission directory {source_directory_name}")
    if args.download_folder is None:
        return None
    logging.info("Awaiting run completion")
    run.wait_for_completion()
    logging.info(f"Run has completed with status {run.get_status()}")
    download_path = choose_download_path(args.download_folder)
    logging.info(f"Attempting to download segmentation to {download_path}")
    run.download_file(DEFAULT_RESULT_IMAGE_NAME, str(download_path))
    if download_path.exists():
        logging.info(f"Downloaded segmentation to {download_path}")
    else:
        logging.warning("Segmentation NOT downloaded")
    return download_path

示例#6

0

显示文件

def create_run_config(azure_config: AzureConfig,
                      source_config: SourceConfig,
                      azure_dataset_id: str = "",
                      environment_name: str = "") -> ScriptRunConfig:
    """
    Creates a configuration to run the InnerEye training script in AzureML.
    :param azure_config: azure related configurations to use for model scale-out behaviour
    :param source_config: configurations for model execution, such as name and execution mode
    :param azure_dataset_id: The name of the dataset in blob storage to be used for this run. This can be an empty
    string to not use any datasets.
    :param environment_name: If specified, try to retrieve the existing Python environment with this name. If that
    is not found, create one from the Conda files provided in `source_config`. This parameter is meant to be used
    when running inference for an existing model.
    :return: The configured script run.
    """
    if azure_dataset_id:
        azureml_dataset = get_or_create_dataset(azure_config, azure_dataset_id=azure_dataset_id)
        if not azureml_dataset:
            raise ValueError(f"AzureML dataset {azure_dataset_id} could not be found or created.")
        named_input = azureml_dataset.as_named_input(INPUT_DATA_KEY)
        dataset_consumption = named_input.as_mount() if azure_config.use_dataset_mount else named_input.as_download()
    else:
        dataset_consumption = None
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = source_config.entry_script.relative_to(source_config.root_folder).as_posix()
    logging.info(f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
                 f"source directory {source_config.root_folder})")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    run_config = RunConfiguration(
        script=entry_script_relative_path,
        arguments=source_config.script_params,
    )
    run_config.environment = get_or_create_python_environment(azure_config, source_config,
                                                              environment_name=environment_name)
    run_config.target = azure_config.cluster
    run_config.max_run_duration_seconds = max_run_duration
    if azure_config.num_nodes > 1:
        distributed_job_config = MpiConfiguration(node_count=azure_config.num_nodes)
        run_config.mpi = distributed_job_config
        run_config.framework = "Python"
        run_config.communicator = "IntelMpi"
        run_config.node_count = distributed_job_config.node_count
    if dataset_consumption:
        run_config.data = {dataset_consumption.name: dataset_consumption}
    # Use blob storage for storing the source, rather than the FileShares section of the storage account.
    run_config.source_directory_data_store = workspace.datastores.get(WORKSPACE_DEFAULT_BLOB_STORE_NAME).name
    script_run_config = ScriptRunConfig(
        source_directory=str(source_config.root_folder),
        run_config=run_config,
    )
    if azure_config.hyperdrive:
        script_run_config = source_config.hyperdrive_config_func(script_run_config)  # type: ignore
    return script_run_config

示例#7

0

显示文件

def test_get_hyperdrive_config(
        number_of_cross_validation_splits: int,
        number_of_cross_validation_splits_per_fold: int,
        test_output_dirs: TestOutputDirectories) -> None:
    """
    Test to make sure the number of dataset reader workers are set correctly
    """
    if number_of_cross_validation_splits_per_fold > 0:
        config = HyperDriveTestModelScalar()
        config.number_of_cross_validation_splits_per_fold = number_of_cross_validation_splits_per_fold

    else:
        config = HyperDriveTestModelSegmentation()

    config.number_of_cross_validation_splits = number_of_cross_validation_splits
    # create HyperDrive config with dummy estimator for testing
    source_config = SourceConfig(root_folder=test_output_dirs.root_dir,
                                 entry_script="something.py",
                                 conda_dependencies_files=[])
    estimator = Estimator(source_directory=source_config.root_folder,
                          entry_script=source_config.entry_script,
                          compute_target="Local")

    hd_config = config.get_hyperdrive_config(estimator=estimator)

    assert hd_config.estimator.source_directory == source_config.root_folder
    assert hd_config.estimator.run_config.script == source_config.entry_script
    assert hd_config.estimator._script_params == source_config.script_params

    if number_of_cross_validation_splits > 0 and number_of_cross_validation_splits_per_fold > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits * \
               number_of_cross_validation_splits_per_fold
    elif number_of_cross_validation_splits > 0:
        assert hd_config._max_total_runs == number_of_cross_validation_splits
    else:
        assert hd_config._max_total_runs == HYPERDRIVE_TOTAL_RUNS

    if config.perform_cross_validation:
        # check sampler is as expected
        sampler = config.get_cross_validation_hyperdrive_sampler()

        expected_sampler_dict = {
            CROSS_VALIDATION_SPLIT_INDEX_TAG_KEY:
            choice(list(range(number_of_cross_validation_splits)))
        }

        if number_of_cross_validation_splits_per_fold > 0:
            expected_sampler_dict[
                CROSS_VALIDATION_SUB_FOLD_SPLIT_INDEX_TAG_KEY] = choice(
                    list(range(number_of_cross_validation_splits_per_fold)))

        assert sampler._parameter_space == expected_sampler_dict
    else:
        assert vars(config.get_hyperdrive_config(estimator)) \
               == vars(_create_dummy_hyperdrive_param_search_config(estimator))

示例#8

0

显示文件

 def submit_to_azureml(self) -> Run:
     """
     Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for
     completion and the Run did not succeed.
     """
     # The adal package creates a logging.info line each time it gets an authentication token, avoid that.
     logging.getLogger('adal-python').setLevel(logging.WARNING)
     # Azure core prints full HTTP requests even in INFO mode
     logging.getLogger('azure').setLevel(logging.WARNING)
     # PyJWT prints out warnings that are beyond our control
     warnings.filterwarnings("ignore", category=DeprecationWarning)
     if isinstance(self.model_config, DeepLearningConfig) and not self.lightning_container.azure_dataset_id:
         raise ValueError("When running an InnerEye built-in model in AzureML, the 'azure_dataset_id' "
                          "property must be set.")
     hyperdrive_func = lambda run_config: self.model_config.get_hyperdrive_config(run_config)  # type: ignore
     source_config = SourceConfig(
         root_folder=self.project_root,
         entry_script=Path(sys.argv[0]).resolve(),
         conda_dependencies_files=get_all_environment_files(self.project_root),
         hyperdrive_config_func=hyperdrive_func,
         # For large jobs, upload of results can time out because of large checkpoint files. Default is 600
         upload_timeout_seconds=86400,
     )
     source_config.set_script_params_except_submit_flag()
     azure_run = submit_to_azureml(self.azure_config, source_config,
                                   self.lightning_container.all_azure_dataset_ids(),
                                   self.lightning_container.all_dataset_mountpoints())
     logging.info("Job submission to AzureML done.")
     if self.azure_config.pytest_mark and self.azure_config.wait_for_completion:
         # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
         # A build step will pick up that file and publish it to Azure DevOps.
         # If pytest_mark is set, this file must exist.
         logging.info("Downloading pytest result file.")
         download_pytest_result(azure_run)
     else:
         logging.info("No pytest_mark present, hence not downloading the pytest result file.")
     # For PR builds where we wait for job completion, the job must have ended in a COMPLETED state.
     if self.azure_config.wait_for_completion and not is_run_and_child_runs_completed(azure_run):
         raise ValueError(f"Run {azure_run.id} in experiment {azure_run.experiment.name} or one of its child "
                          "runs failed.")
     return azure_run

示例#9

0

显示文件

文件： azure_runner.py 项目： gechunqiang/InnerEye-DeepLearning

def create_estimator_from_configs(
        azure_config: AzureConfig, source_config: SourceConfig,
        estimator_inputs: List[DatasetConsumptionConfig]) -> PyTorch:
    """
    Create an return a PyTorch estimator from the provided configuration information.
    :param azure_config: Azure configuration, used to store various values for the job to be submitted
    :param source_config: source configutation, for other needed values
    :param estimator_inputs: value for the "inputs" field of the estimator.
    :return:
    """
    # AzureML seems to sometimes expect the entry script path in Linux format, hence convert to posix path
    entry_script_relative_path = Path(source_config.entry_script).relative_to(
        source_config.root_folder).as_posix()
    logging.info(
        f"Entry script {entry_script_relative_path} ({source_config.entry_script} relative to "
        f"source directory {source_config.root_folder})")
    environment_variables = {
        "AZUREML_OUTPUT_UPLOAD_TIMEOUT_SEC":
        str(source_config.upload_timeout_seconds),
        "MKL_SERVICE_FORCE_INTEL":
        "1",
        **(source_config.environment_variables or {})
    }
    # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not be
    # necessary if the innereye package is installed. It is necessary when working with an outer project and
    # InnerEye as a git submodule and submitting jobs from the local machine.
    # In case of version conflicts, the package version in the outer project is given priority.
    conda_dependencies = merge_conda_dependencies(
        source_config.conda_dependencies_files)  # type: ignore
    if azure_config.pip_extra_index_url:
        # When an extra-index-url is supplied, swap the order in which packages are searched for.
        # This is necessary if we need to consume packages from extra-index that clash with names of packages on
        # pypi
        conda_dependencies.set_pip_option(
            f"--index-url {azure_config.pip_extra_index_url}")
        conda_dependencies.set_pip_option(
            "--extra-index-url https://pypi.org/simple")
    # create Estimator environment
    framework_version = pytorch_version_from_conda_dependencies(
        conda_dependencies)
    logging.info(f"PyTorch framework version: {framework_version}")
    max_run_duration = None
    if azure_config.max_run_duration:
        max_run_duration = run_duration_string_to_seconds(
            azure_config.max_run_duration)
    workspace = azure_config.get_workspace()
    estimator = PyTorch(
        source_directory=source_config.root_folder,
        entry_script=entry_script_relative_path,
        script_params=source_config.script_params,
        compute_target=azure_config.cluster,
        # Use blob storage for storing the source, rather than the FileShares section of the storage account.
        source_directory_data_store=workspace.datastores.get(
            WORKSPACE_DEFAULT_BLOB_STORE_NAME),
        inputs=estimator_inputs,
        environment_variables=environment_variables,
        shm_size=azure_config.docker_shm_size,
        use_docker=True,
        use_gpu=True,
        framework_version=framework_version,
        max_run_duration_seconds=max_run_duration)
    estimator.run_config.environment.python.conda_dependencies = conda_dependencies
    # We'd like to log the estimator config, but conversion to string fails when the Estimator has some inputs.
    # logging.info(azure_util.estimator_to_string(estimator))
    if azure_config.hyperdrive:
        estimator = source_config.hyperdrive_config_func(
            estimator)  # type: ignore
    return estimator

示例#10

0

显示文件

文件： runner.py 项目： JacopoTeneggi/InnerEye-DeepLearning

    def submit_to_azureml_if_needed(self) -> AzureRunInfo:
        """
        Submit a job to AzureML, returning the resulting Run object, or exiting if we were asked to wait for
        completion and the Run did not succeed.
        """
        if self.azure_config.azureml and isinstance(self.model_config, DeepLearningConfig) \
                and not self.lightning_container.azure_dataset_id:
            raise ValueError(
                "When running an InnerEye built-in model in AzureML, the 'azure_dataset_id' "
                "property must be set.")
        # https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility
        env_variables = {
            "CUBLAS_WORKSPACE_CONFIG": ":4096:8"
        } if self.lightning_container.pl_deterministic else {}
        source_config = SourceConfig(
            root_folder=self.project_root,
            entry_script=Path(sys.argv[0]).resolve(),
            script_params=sys.argv[1:],
            conda_dependencies_files=get_all_environment_files(
                self.project_root),
            hyperdrive_config_func=(
                self.model_config.get_hyperdrive_config if self.model_config
                else self.lightning_container.get_hyperdrive_config),
            # For large jobs, upload of results can time out because of large checkpoint files. Default is 600
            upload_timeout_seconds=86400,
            environment_variables=env_variables)
        # Reduce the size of the snapshot by adding unused folders to amlignore. The Test* subfolders are only needed
        # when running pytest.
        ignored_folders = []
        if not self.azure_config.pytest_mark:
            ignored_folders.extend(["Tests", "TestsOutsidePackage"])
        if not self.lightning_container.regression_test_folder:
            ignored_folders.append("RegressionTestResults")

        all_local_datasets = self.lightning_container.all_local_dataset_paths()
        input_datasets = \
            create_dataset_configs(self.azure_config,
                                   all_azure_dataset_ids=self.lightning_container.all_azure_dataset_ids(),
                                   all_dataset_mountpoints=self.lightning_container.all_dataset_mountpoints(),
                                   all_local_datasets=all_local_datasets)  # type: ignore

        def after_submission_hook(azure_run: Run) -> None:
            """
            A function that will be called right after job submission.
            """
            # Set the default display name to what was provided as the "tag". This will affect single runs
            # and Hyperdrive parent runs
            if self.azure_config.tag:
                azure_run.display_name = self.azure_config.tag
            # Add an extra tag that depends on the run that was actually submitted. This is used for later filtering
            # run in cross validation analysis
            recovery_id = create_run_recovery_id(azure_run)
            azure_run.tag(RUN_RECOVERY_ID_KEY_NAME, recovery_id)
            print(
                "If this run fails, re-start runner.py and supply these additional arguments: "
                f"--run_recovery_id={recovery_id}")
            if self.azure_config.tensorboard:
                print(
                    "Starting TensorBoard now because you specified --tensorboard"
                )
                monitor(monitor_config=AMLTensorBoardMonitorConfig(
                    run_ids=[azure_run.id]),
                        azure_config=self.azure_config)
            else:
                print(
                    f"To monitor this run locally using TensorBoard, run the script: "
                    f"InnerEye/Azure/tensorboard_monitor.py --run_ids={azure_run.id}"
                )

            if self.azure_config.wait_for_completion:
                # We want the job output to be visible on the console. Do not exit yet if the job fails, because we
                # may need to download the pytest result file.
                azure_run.wait_for_completion(show_output=True,
                                              raise_on_error=False)
                if self.azure_config.pytest_mark:
                    # The AzureML job can optionally run pytest. Attempt to download it to the current directory.
                    # A build step will pick up that file and publish it to Azure DevOps.
                    # If pytest_mark is set, this file must exist.
                    logging.info("Downloading pytest result file.")
                    download_pytest_result(azure_run)
                if azure_run.status == RunStatus.FAILED:
                    raise ValueError(
                        f"The AzureML run failed. Please check this URL for details: "
                        f"{azure_run.get_portal_url()}")

        hyperdrive_config = None
        if self.azure_config.hyperdrive:
            hyperdrive_config = self.lightning_container.get_hyperdrive_config(
                ScriptRunConfig(source_directory=""))

        # Create a temporary file for the merged conda file, that will be removed after submission of the job.
        temp_conda: Optional[Path] = None
        try:
            if len(source_config.conda_dependencies_files) > 1:
                temp_conda = source_config.root_folder / f"temp_environment-{uuid.uuid4().hex[:8]}.yml"
                # Merge the project-specific dependencies with the packages that InnerEye itself needs. This should not
                # be necessary if the innereye package is installed. It is necessary when working with an outer project
                # and InnerEye as a git submodule and submitting jobs from the local machine.
                # In case of version conflicts, the package version in the outer project is given priority.
                merge_conda_files(source_config.conda_dependencies_files,
                                  temp_conda)

            # Calls like `self.azure_config.get_workspace()` will fail if we have no AzureML credentials set up, and so
            # we should only attempt them if we intend to elevate this to AzureML
            if self.azure_config.azureml:
                if not self.azure_config.cluster:
                    raise ValueError(
                        "self.azure_config.cluster not set, but we need a compute_cluster_name to submit"
                        "the script to run in AzureML")
                azure_run_info = submit_to_azure_if_needed(
                    entry_script=source_config.entry_script,
                    snapshot_root_directory=source_config.root_folder,
                    script_params=source_config.script_params,
                    conda_environment_file=temp_conda
                    or source_config.conda_dependencies_files[0],
                    aml_workspace=self.azure_config.get_workspace(),
                    compute_cluster_name=self.azure_config.cluster,
                    environment_variables=source_config.environment_variables,
                    default_datastore=self.azure_config.azureml_datastore,
                    experiment_name=to_azure_friendly_string(
                        create_experiment_name(self.azure_config)),
                    max_run_duration=self.azure_config.max_run_duration,
                    input_datasets=input_datasets,
                    num_nodes=self.azure_config.num_nodes,
                    wait_for_completion=False,
                    ignored_folders=ignored_folders,
                    pip_extra_index_url=self.azure_config.pip_extra_index_url,
                    submit_to_azureml=self.azure_config.azureml,
                    docker_base_image=DEFAULT_DOCKER_BASE_IMAGE,
                    docker_shm_size=self.azure_config.docker_shm_size,
                    tags=additional_run_tags(azure_config=self.azure_config,
                                             commandline_args=" ".join(
                                                 source_config.script_params)),
                    after_submission=after_submission_hook,
                    hyperdrive_config=hyperdrive_config)
                if self.azure_config.tag and azure_run_info.run:
                    if self.lightning_container.perform_cross_validation:
                        # This code is only reached inside Azure. Set display name again - this will now affect
                        # Hypdrive child runs (for other jobs, this has already been done after submission)
                        cv_index = self.lightning_container.cross_validation_split_index
                        full_display_name = f"{self.azure_config.tag} {cv_index}"
                        azure_run_info.run.display_name = full_display_name
            else:
                azure_run_info = submit_to_azure_if_needed(
                    input_datasets=input_datasets, submit_to_azureml=False)
        finally:
            if temp_conda:
                temp_conda.unlink()
        # submit_to_azure_if_needed calls sys.exit after submitting to AzureML. We only reach this when running
        # the script locally or in AzureML.
        return azure_run_info

示例#11

0

显示文件

文件： submit_for_inference.py 项目： mmachua/InnerEye-DeepLearning

def submit_for_inference(args: SubmitForInferenceConfig,
                         azure_config: AzureConfig) -> Optional[Path]:
    """
    Create and submit an inference to AzureML, and optionally download the resulting segmentation.
    :param azure_config: An object with all necessary information for accessing Azure.
    :param args: configuration, see SubmitForInferenceConfig
    :return: path to downloaded segmentation on local disc, or None if none.
    """
    logging.info(f"Building Azure configuration from {args.settings}")
    logging.info("Getting workspace")
    workspace = azure_config.get_workspace()
    logging.info("Identifying model")
    model = Model(workspace=workspace, id=args.model_id)
    model_id = model.id
    logging.info(f"Identified model {model_id}")
    source_directory = tempfile.TemporaryDirectory()
    source_directory_path = Path(source_directory.name)
    logging.info(
        f"Building inference run submission in {source_directory_path}")
    image_folder = source_directory_path / DEFAULT_DATA_FOLDER
    image = copy_image_file(args.image_file, image_folder, args.use_dicom)
    model_sas_urls = model.get_sas_urls()
    # Identifies all the files with basename "environment.yml" in the model and downloads them.
    # These downloads should go into a temp folder that will most likely not be included in the model itself,
    # because the AzureML run will later download the model into the same folder structure, and the file names might
    # clash.
    temp_folder = source_directory_path / "temp_for_scoring"
    conda_files = download_files_from_model(model_sas_urls,
                                            ENVIRONMENT_YAML_FILE_NAME,
                                            dir_path=temp_folder)
    if not conda_files:
        raise ValueError(
            "At least 1 Conda environment definition must exist in the model.")
    # Retrieve the name of the Python environment that the training run used. This environment should have been
    # registered. If no such environment exists, it will be re-create from the Conda files provided.
    python_environment_name = model.tags.get(PYTHON_ENVIRONMENT_NAME, "")
    # Copy the scoring script from the repository. This will start the model download from Azure, and invoke the
    # scoring script.
    entry_script = source_directory_path / Path(RUN_SCORING_SCRIPT).name
    shutil.copyfile(str(repository_root_directory(RUN_SCORING_SCRIPT)),
                    str(entry_script))
    source_config = SourceConfig(
        root_folder=source_directory_path,
        entry_script=entry_script,
        script_params=[
            "--model-folder",
            ".",
            "--model-id",
            model_id,
            SCORE_SCRIPT,
            # The data folder must be relative to the root folder of the AzureML job. test_image_files
            # is then just the file relative to the data_folder
            "--data_folder",
            image.parent.name,
            "--image_files",
            image.name,
            "--use_dicom",
            str(args.use_dicom)
        ],
        conda_dependencies_files=conda_files,
    )
    run_config = create_run_config(azure_config,
                                   source_config,
                                   environment_name=python_environment_name)
    exp = Experiment(workspace=workspace, name=args.experiment_name)
    run = exp.submit(run_config)
    logging.info(f"Submitted run {run.id} in experiment {run.experiment.name}")
    logging.info(f"Run URL: {run.get_portal_url()}")
    if not args.keep_upload_folder:
        source_directory.cleanup()
        logging.info(f"Deleted submission directory {source_directory_path}")
    if args.download_folder is None:
        return None
    logging.info("Awaiting run completion")
    run.wait_for_completion()
    logging.info(f"Run has completed with status {run.get_status()}")
    download_path = choose_download_path(args.download_folder)
    logging.info(f"Attempting to download segmentation to {download_path}")
    run.download_file(DEFAULT_RESULT_IMAGE_NAME, str(download_path))
    if download_path.exists():
        logging.info(f"Downloaded segmentation to {download_path}")
    else:
        logging.warning("Segmentation NOT downloaded")
    return download_path