示例#1
0
    def update_io(inputs, outputs):
        for key, value in inputs.items():
            if isinstance(value, _Dataset):
                raise UserErrorException(
                    "Dataset cannot be used without providing a name for the run. Please provide "
                    "a name by calling the as_named_input instance method on dataset."
                )
            elif isinstance(value, DatasetConsumptionConfig):
                value.dataset._ensure_saved(workspace)
                inputs[key] = Data.create(value)
                input_data.append(value)

                # Set the environment variable for mount validation
                if value.dataset._consume_latest:
                    env_vars = run_config.environment.environment_variables
                    if _SKIP_VALIDATE_DATASETS not in env_vars:
                        env_vars[_SKIP_VALIDATE_DATASETS] = value.name
                    else:
                        env_vars[_SKIP_VALIDATE_DATASETS] = ",".join(
                            [env_vars[_SKIP_VALIDATE_DATASETS], value.name])
            elif isinstance(value, Data):
                input_data.append(value)
            else:
                raise UserErrorException("{} cannot be used as input.".format(
                    type(value).__name__))
        for key, value in outputs.items():
            if isinstance(value, OutputDatasetConfig):
                outputs[key] = output_data[key] = value._to_output_data()
            elif isinstance(value, OutputData):
                output_data[key] = value
            else:
                raise UserErrorException("{} cannot be used as output.".format(
                    type(value).__name__))
示例#2
0
def _get_upload_from_files(file_paths, target_path, relative_root,
                           skip_root_check):
    paths_to_upload = []
    target_path = _sanitize_target_path(target_path)
    for file_path in file_paths:
        if not skip_root_check and relative_root not in file_path and relative_root != "/":
            raise UserErrorException(
                "relative_root: '{}' is not part of the file_path: '{}'.".
                format(relative_root, file_path))
        if not os.path.isfile(file_path):
            err_msg = "'{}' does not point to a file. " + \
                "Please upload the file to cloud first if running in a cloud notebook."
            raise UserErrorException(err_msg.format(file_path))

        target_file_path = to_unix_path(file_path)
        if relative_root != "/":
            # need to do this because Windows doesn't support relpath if the partition is different
            target_file_path = os.path.relpath(target_file_path,
                                               to_unix_path(relative_root))
        else:
            # strip away / otherwise we will create a folder in the container with no name
            target_file_path = target_file_path.lstrip("/")

        if target_path:
            target_file_path = os.path.join(target_path, target_file_path)

        paths_to_upload.append((file_path, target_file_path))

    return paths_to_upload
示例#3
0
def reactivate_dataset(workspace=None,
                       dataset_name=None,
                       dataset_id=None,
                       logger=None):
    if _check_python() is False:
        raise UserErrorException(
            'The dataset command subgroup is only supported with Python 3.5 or more'
        )
    dataset = Dataset.get(workspace, dataset_name, dataset_id)
    dataset_state = dataset.state
    if dataset_state == 'active':
        raise UserErrorException("Dataset '{}' ({}) is already active".format(
            dataset.name, dataset.id))
    dataset.reactivate()
    dataset = Dataset.get(workspace, name=dataset.name)
    if dataset.state == 'active':
        logger.info("Dataset '{}' ({}) was reactivated successfully".format(
            dataset.name, dataset.id))
        return dataset._get_base_info_dict_show()
    else:
        logger.debug(
            "dataset reactivate error. name: {} id: {} state: {}".format(
                dataset.name, dataset.id, dataset.state))
        raise Exception("Error, Dataset '{}' ({}) was not reactivated".format(
            dataset.name, dataset.id))
示例#4
0
    def _create(cls,
                definition,
                properties=None,
                registration=None,
                telemetry_info=None):
        if registration is not None and not isinstance(registration,
                                                       _DatasetRegistration):
            raise UserErrorException(
                'registration must be instance of `_DatasetRegistration`')
        if telemetry_info is not None and not isinstance(
                telemetry_info, _DatasetTelemetryInfo):
            raise UserErrorException(
                'telemetry_info must be instance of `_DatasetTelemetryInfo`')
        dataset = cls()
        dataset._definition = definition  # definition is either str or Dataflow which is immutable
        dataset._properties = deepcopy(properties) if properties else {}
        dataset._registration = registration
        dataset._telemetry_info = telemetry_info

        from azureml.data._partition_format import parse_partition_format

        steps = dataset._dataflow._get_steps()
        partition_keys = []
        for step in steps:
            if step.step_type == 'Microsoft.DPrep.AddColumnsFromPartitionFormatBlock' and \
                    step.arguments['partitionFormat']:
                parsed_result = parse_partition_format(
                    step.arguments['partitionFormat'])
                if len(parsed_result) == 3 and parsed_result[2]:
                    partition_keys = parsed_result[2]
                break
        dataset._properties[_PARTITION_KEYS] = partition_keys
        return dataset
示例#5
0
    def save(self, path=None):
        """Save the conda dependencies object to file.

        :param path: The fully qualified path of the file you want to save to.
        :type path: str
        :return: The normalized conda path.
        :rtype: str
        :raises azureml.exceptions.UserErrorException: Raised for issues saving the dependencies.
        """
        if os.path.isdir(path):
            raise UserErrorException("Cannot save a conda environment specification file to a directory. "
                                     "Please specify a fully qualified path along with the "
                                     "file name to save the file.")

        parent_dir = os.path.dirname(path)

        if parent_dir == "" or os.path.exists(parent_dir) and os.path.isdir(parent_dir):
            normalized_conda_path = normalize_windows_paths(path)
        else:
            raise UserErrorException(
                "Cannot save the conda environment specification file to an invalid path.")

        self._validate()

        with open(normalized_conda_path, 'w') as outfile:
            ruamel.yaml.round_trip_dump(self._conda_dependencies, outfile)
        return normalized_conda_path
示例#6
0
 def _validate_yaml(ruamel_yaml_object):
     if not isinstance(ruamel_yaml_object, dict):
         raise UserErrorException("Environment error: not a valid YAML structure")
     for key in ruamel_yaml_object.keys():
         if not str(key) in CondaDependencies._VALID_YML_KEYS:
             msg = "Environment error: unknown {} key in environment specification".format(str(key))
             raise UserErrorException(msg)
示例#7
0
    def get_partition_key_values(self, partition_keys=None):
        """Return unique key values of partition_keys.

        validate if partition_keys is a valid subset of full set of partition keys, return unique key values of
        partition_keys, default to return the unique key combinations by taking the full set of partition keys of this
        dataset if partition_keys is None

        .. code-block:: python

            # get all partition key value pairs
            partitions = ds.get_partition_key_values()
            # Return [{'country': 'US', 'state': 'WA', 'partition_date': datetime('2020-1-1')}]

            partitions = ds.get_partition_key_values(['country'])
            # Return [{'country': 'US'}]

        :param partition_keys: partition keys
        :type partition_keys: builtin.list[str]
        """
        import time
        starting_time = time.process_time()

        if not self.partition_keys or len(self.partition_keys) == 0:
            raise UserErrorException(
                "get_partition_key_values is not available to a dataset that has no "
                "partition keys")

        if not partition_keys:
            partition_keys = self.partition_keys

        invalid_keys = []
        for key in partition_keys:
            if key not in self.partition_keys:
                invalid_keys.append(key)
        if len(invalid_keys) != 0:
            raise UserErrorException(
                "{0} are invalid partition keys".format(invalid_keys))

        dataflow = self._dataflow.keep_columns(partition_keys)
        for step in dataflow._steps:
            if step.step_type == 'Microsoft.DPrep.ReadParquetFileBlock' or \
                    step.step_type == 'Microsoft.DPrep.ParseDelimitedBlock' or \
                    step.step_type == 'Microsoft.DPrep.ParseJsonLinesBlock':
                dataflow._steps.remove(step)
        dataflow = dataflow.distinct_rows()
        pd = dataflow.to_pandas_dataframe()
        partition_key_values = pd[partition_keys].to_dict(
            orient='records') if pd.shape[0] != 0 else []

        if self._registration and self._registration.workspace:
            collect_datasets_usage(
                _get_logger(), _PATITION_KEY_VALUES_ACTIVITY, [self],
                self._registration.workspace, "{}", {
                    "execution_time": time.process_time() - starting_time,
                    "number_of_partition_keys": len(partition_keys)
                })
        return partition_key_values
示例#8
0
 def _validate_config(self, data_reference, key):
     from azureml.exceptions import UserErrorException
     if not data_reference.data_store_name:
         raise UserErrorException(
             "DataReference {} misses the datastore name".format(key))
     if self._is_upload(
             data_reference) and not data_reference.path_on_compute:
         raise UserErrorException(
             "DataReference {} misses the relative path on the compute".
             format(key))
示例#9
0
def _text_input(prompt_message, allow_empty=False):
    text_1 = input(prompt_message)
    if len(text_1) <= 0 and not allow_empty:
        raise UserErrorException("Empty value not allowed. Please try again.")

    text_2 = input("Re-enter the value for confirmation:")
    if text_1 == text_2:
        return text_1
    else:
        raise UserErrorException(
            "Entered values don't match. Please try again.")
示例#10
0
def _password_input(prompt_message, allow_empty=False):
    password_1 = getpass.getpass(prompt_message)
    if len(password_1) <= 0 and not allow_empty:
        raise UserErrorException(
            "Empty password not allowed. Please try again.")

    password_2 = getpass.getpass("Re-enter the password for confirmation:")
    if password_1 == password_2:
        return password_1
    else:
        raise UserErrorException(
            "Entered passwords don't match. Please try again.")
示例#11
0
def submit_pipeline(
        workspace=None,  # Auto populated args + object
        pipeline_id=None,
        experiment_name=None,
        pipeline_yaml=None,
        pipeline_params=None,
        datapath_params=None,
        output_file=None,
        # We enforce a logger
        logger=None):
    """
    Submit a pipeline run based on a published pipeline ID
    """

    if pipeline_id is None and pipeline_yaml is None:
        raise UserErrorException("Please specify a pipeline ID or a pipeline YAML file")

    published_pipeline = None
    pipeline = None

    if pipeline_id is not None:
        from azureml.pipeline.core import PublishedPipeline
        published_pipeline = PublishedPipeline.get(workspace, pipeline_id)
        if experiment_name is None or experiment_name == '':
            # Use the pipeline name as the experiment name
            experiment_name = published_pipeline._sanitize_name()

    else:
        from azureml.pipeline.core import Pipeline
        pipeline = Pipeline.load_yaml(workspace, pipeline_yaml)

    if experiment_name is None:
        raise UserErrorException("Please specify an experiment name")

    assigned_params = _parse_key_values(pipeline_params, 'Parameter assignment')

    datapaths = _parse_key_values(datapath_params, 'Datapath assignment')
    for datapath_param_name in datapaths:
        datastore_with_path = datapaths[datapath_param_name]
        if '/' not in datastore_with_path:
            raise UserErrorException("Datapath value %s should have format datastore/path" % datastore_with_path)
        path_tokens = datastore_with_path.split('/', 1)
        from azureml.core import Datastore
        from azureml.data.datapath import DataPath
        datastore = Datastore(workspace, path_tokens[0])
        assigned_params[datapath_param_name] = DataPath(datastore=datastore, path_on_datastore=path_tokens[1])

    dict_output = _pipeline_run_submit(experiment_name, assigned_params, published_pipeline, pipeline,
                                       workspace, output_file, logger)

    return dict_output
示例#12
0
    def _validate_inputs(dataset, compute_target):

        if not isinstance(dataset, TabularDataset):
            raise UserErrorException(
                'Invalid type. dataset should be of type '
                'azureml.data.tabular_dataset.TabularDataset but was found to be '
                'of type {0}.'.format(type(dataset)))

        if not (isinstance(compute_target, ComputeTarget)
                or isinstance(compute_target, str)):
            raise UserErrorException(
                'Invalid type. compute_target should be either of type ComputeTarget or '
                'string but was found to be of type {0}.'.format(
                    type(compute_target)))
示例#13
0
def convert_seconds_to_duration(duration_in_seconds):
    """
    Convert duration in seconds into ISO-8601 formatted seconds string.

    """
    try:
        duration_in_seconds = int(duration_in_seconds)
    except:
        raise UserErrorException(
            'Invalid input, provide an integer duration in seconds')

    if duration_in_seconds < 0:
        raise UserErrorException('Invalid input, provide duration in seconds')

    return "PT{}S".format(duration_in_seconds)
示例#14
0
    def add_pip_package(self, pip_package):
        r"""Add a pip package.

        .. note::

            Adding a dependency of an already referenced package will remove the previous reference and add a new \
            reference to the end of the dependencies list. This may change the order of the dependencies.

        :param pip_package: The pip package to be add.
        :type pip_package: str
        """
        if self._is_option(pip_package):
            raise UserErrorException(
                "Invalid package name {}".format(
                    pip_package
                ))

        self.remove_pip_package(pip_package)

        if not self._has_pip_package():
            pip_obj = {PIP: [pip_package]}
            if PACKAGES in self._conda_dependencies:
                self._conda_dependencies[PACKAGES].append(pip_obj)
            else:
                self._conda_dependencies[PACKAGES] = [pip_obj]
        elif pip_package not in self.pip_packages:
            for pitem in self._conda_dependencies[PACKAGES]:
                if PIP in pitem and isinstance(pitem, dict):
                    pitem[PIP].append(pip_package)
示例#15
0
    def set_pip_option(self, pip_option):
        """Add a pip option.

        :param pip_option: The pip option to add.
        :type pip_option: str
        """
        if not self._is_option(pip_option):
            raise UserErrorException(
                "Invalid pip option {}".format(
                    pip_option
                ))

        if not self._has_pip_package():
            pip_obj = {PIP: [pip_option]}
            if PACKAGES in self._conda_dependencies:
                self._conda_dependencies[PACKAGES].append(pip_obj)
            else:
                self._conda_dependencies[PACKAGES] = [pip_obj]

        else:
            options = [x.split()[0] for x in self.pip_options]
            option_to_add = pip_option.split()[0]
            for pitem in self._conda_dependencies[PACKAGES]:
                if PIP in pitem and isinstance(pitem, dict):
                    if option_to_add not in options:
                        pitem[PIP].append(pip_option)
                    else:
                        for i in range(len(pitem[PIP])):
                            if pitem[PIP][i].split()[0] == option_to_add:
                                pitem[PIP][i] = pip_option
示例#16
0
 def upload_dir(self,
                dir_path,
                origin,
                container,
                path_to_name_fn=None,
                datastore_name=None):
     """
     upload all files in path
     :rtype: list[BatchArtifactContentInformationDto]
     """
     if not os.path.isdir(dir_path):
         raise UserErrorException(
             "Cannot upload path: {} since it is not a valid directory.".
             format(dir_path))
     paths_to_upload = []
     names = []
     for pathl, _subdirs, files in os.walk(dir_path):
         for _file in files:
             fpath = os.path.join(pathl, _file)
             paths_to_upload.append(fpath)
             if path_to_name_fn is not None:
                 name = path_to_name_fn(fpath)
             else:
                 name = fpath
             names.append(name)
     self._logger.debug("Uploading {}".format(names))
     result = self.upload_files(paths_to_upload,
                                origin,
                                container,
                                names,
                                datastore_name=datastore_name)
     return result
示例#17
0
 def reactivate_experiment(self,
                           experiment_id,
                           new_name=None,
                           caller=None,
                           custom_headers=None,
                           is_async=False):
     """
     Reactivate an archived experiment
     :param experiment_id: experiment id (required)
     :type experiment_id: str
     :param new_name: new experiment name (optional)
     :type new_name: str
     :param is_async: execute request asynchronously
     :type is_async: bool
     :param caller: caller function name (optional)
     :type caller: optional[string]
     :param custom_headers: headers that will be added to the request (optional)
     :type custom_headers: optional[dict]
     :return:
         the return type is based on is_async parameter.
         If is_async parameter is True,
         the request is called asynchronously.
     rtype: ~_restclient.models.ExperimentDto (is_async is False) or
         azureml._async.AsyncTask (is_async is True)
     """
     if new_name is not None:
         raise UserErrorException(
             "Cannot rename an experiment. If the archived experiment name conflicts"
             " with an active experiment name, you can delete the active experiment"
             " before unarchiving this experiment.")
     modify_experiment_dto = ModifyExperimentDto(archive=False)
     return self.update_experiment(experiment_id, modify_experiment_dto,
                                   caller, custom_headers, is_async)
示例#18
0
def get_workspace_or_default_name(workspace_name,
                                  throw_error=False,
                                  subscription_id=None,
                                  auth=None,
                                  project_path=None):
    """
    Order is
    1) Get workspace name from the specified parameter,
    2) From project context,
    3) Using az configure defaults.
    :param auth:
    :type auth: azureml.core.authentication.AbstractAuthentication
    :param workspace_name:
    :type workspace_name: str
    :param throw_error: throw_error = True throws an error if eventual workspace_name=None
    :type throw_error: bool
    :return: Returns the provided or default value of the workspace name.
    """
    if workspace_name:
        return workspace_name

    project_object = _get_project_object(subscription_id=subscription_id,
                                         auth=auth,
                                         project_path=project_path)
    if project_object:
        return project_object.workspace.name

    if throw_error:
        raise UserErrorException(
            'Error, default workspace not set and workspace name parameter not provided.'
            '\nPlease set a default workspace using "az ml folder attach -w myworkspace -g '
            'myresourcegroup" or provide a value for the workspace name parameter.'
        )
    else:
        return workspace_name
示例#19
0
 def _handle_http_operation_error(self,
                                  operation_error,
                                  origin,
                                  container,
                                  path,
                                  prefix=False):
     """
     Handles HttpOperationError received from Artifact Service
     :param operation_error: the error received
     :type operation_error: HttpOperationError
     :param origin: origin component of the artifactId
     :type origin: str
     :param container: container component of the artifactId
     :type container: str
     :param path: path component of the artifactId
     :type path: str
     :param prefix: boolean, true if the path represents a directory, false if a single file
     :type prefix: bool
     """
     if operation_error.response.status_code == 404:
         existing_files = self.get_file_paths(origin, container)
         type_string = "Prefix" if prefix else "File"
         raise UserErrorException("{0} with path {1} was not found,\n"
                                  "available files include: "
                                  "{2}.".format(type_string, path,
                                                ",".join(existing_files)))
     else:
         raise operation_error
示例#20
0
 def handle_error(error):
     if error.response.status_code == 404:
         return UserErrorException(
             'Cannot find dataset registered with name "{}"{} in the workspace.'
             .format(
                 name, '' if version == 'latest' else
                 ' (version: {})'.format(version)))
示例#21
0
    def remove_tags(self, tags=None):
        """Remove the specified keys from tags dictionary of this dataset.

        :param tags: The list of keys to remove.
        :type tags: builtin.list[str]
        :return: The updated dataset object.
        :rtype: typing.Union[azureml.data.TabularDataset, azureml.data.FileDataset]
        """
        if not self._registration or not self._registration.workspace or not self._registration.registered_id:
            return UserErrorException(
                'To remove tags from this dataset it must be registered.')
        workspace = self._registration.workspace

        def request():
            updatedTags = deepcopy(self._registration.tags)
            for item in set(tags).intersection(updatedTags):
                del updatedTags[item]

            return _restclient(workspace).dataset.update_dataset(
                workspace.subscription_id,
                workspace.resource_group,
                workspace.name,
                dataset_id=self._registration.registered_id,
                new_dataset_dto=_dataset_to_dto(
                    self, self.name, self.description, updatedTags,
                    self._registration.registered_id),
                custom_headers=self._get_telemetry_headers())

        success, result = _make_request(request)
        if not success:
            raise result
        result_dto = _dto_to_dataset(workspace, result)
        self._registration.tags = result_dto.tags
        return result_dto
示例#22
0
    def _validate_mode(self, mode):
        from azureml.core.runconfig import SUPPORTED_DATAREF_MODES
        from azureml.exceptions import UserErrorException

        message = "Invalid mode {0}. Only mount, download, upload are supported"
        if mode not in SUPPORTED_DATAREF_MODES:
            raise UserErrorException(message.format(mode))
示例#23
0
    def upload_dir(self,
                   dir_path,
                   path_to_name_fn=None,
                   skip_first_level=False):
        """
        Upload all files in path.

        :rtype: list[BatchArtifactContentInformationDto]
        """
        if self._run_id is None:
            raise UserErrorException("Cannot upload when run_id is None")
        paths_to_upload = []
        names = []
        for pathl, _subdirs, files in os.walk(dir_path):
            for _file in files:
                fpath = os.path.join(pathl, _file)
                paths_to_upload.append(fpath)
                if path_to_name_fn is not None:
                    name = path_to_name_fn(fpath)
                elif skip_first_level:
                    subDir = pathl.split("/", 1)[1]
                    name = os.path.join(subDir, _file)
                else:
                    name = fpath
                names.append(name)
        self._logger.debug("Uploading {}".format(names))
        result = self.upload_files(paths_to_upload, names)
        return result
示例#24
0
def attach_folder_to_workspace_and_experiment(
        workspace=None,
        experiment_name=None,
        path=None,
        # We should enforce a logger
        logger=None):

    path = os.path.abspath(path)
    if os.path.exists(path) and not os.path.isdir(path):
        raise UserErrorException("The provided path [{}] must be a directory".format(path))
    elif not os.path.exists(path):
        logger.info("Creating non-existent path %s", path)
        os.makedirs(path, exist_ok=True)

    logger.debug("Workspace to attach is %s", workspace._workspace_id)

    if experiment_name is None:
        path = path.rstrip('\\/')
        experiment_to_attach = os.path.basename(path)
        logger.debug("No experiment name was provided")
    else:
        experiment_to_attach = experiment_name

    logger.debug("Attaching folder %s to experiment %s", path, experiment_to_attach)
    project = workspace._initialize_folder(experiment_to_attach, directory=path)

    return project._serialize_to_dict()
示例#25
0
 def _ensure_workspace(self, workspace):
     if workspace is not None:
         return workspace
     if self._registration is None or self._registration.workspace is None:
         raise UserErrorException(
             'The dataset does not belong to a workspace. Please pass in the workspace '
             'from argument.')
     return self._registration.workspace
示例#26
0
 def wrapped(self, *args, **kwargs):
     if self._id is None:
         raise UserErrorException(
             "{} doesn't have an id set therefore, the {} cannot "
             "modify the experiment. Please call the Experiment "
             "constructor by setting _create_in_cloud to True".format(
                 self, self.__class__.__name__))
     return func(self, *args, **kwargs)
示例#27
0
def _check_paramiko():
    try:
        import paramiko
        return paramiko.AuthenticationException
    except ImportError:
        raise UserErrorException(
            "Please install paramiko to use deprecated legacy compute target methods."
        )
示例#28
0
    def wait_for_completion(self, show_output=False):
        """Wait for the model evaluation process to finish.

        :param show_output: Boolean option to print more verbose output. Defaults to False.
        :type show_output: bool
        """
        if not (self.workspace and self.create_operation_id):
            raise UserErrorException('wait_for_completion operation cannot be performed on this object.'
                                     'Make sure the object was created via the appropriate method '
                                     'in the Model class')
        operation_state, error, request_id = self._get_operation_state()
        self.parent_request_id = request_id
        current_state = operation_state
        if show_output:
            sys.stdout.write('{}'.format(current_state))
            sys.stdout.flush()
        while operation_state not in ['Cancelled', 'Succeeded', 'Failed', 'TimedOut']:
            time.sleep(5)
            operation_state, error, _ = self._get_operation_state()
            if show_output:
                sys.stdout.write('.')
                if operation_state != current_state:
                    sys.stdout.write('\n{}'.format(operation_state))
                    current_state = operation_state
                sys.stdout.flush()
        sys.stdout.write('\n')
        sys.stdout.flush()
        module_logger.info(
            'Model {} operation with name {} finished operation {}\n'.format(
                self.__class__._model_eval_type, self.name, operation_state
            )
        )
        if operation_state == 'Failed':
            if error and 'statusCode' in error and 'message' in error:
                module_logger.info(
                    'Model {} failed with\n'
                    'StatusCode: {}\n'
                    'Message: {}\n'
                    'Operation ID: {}\n'
                    'Request ID: {}\n'.format(
                        self.__class__._model_eval_type,
                        error['statusCode'],
                        error['message'],
                        self.create_operation_id,
                        self.parent_request_id
                    )
                )
            else:
                module_logger.info(
                    'Model profiling failed, unexpected error response:\n'
                    '{}\n'
                    'Operation ID: {}\n'
                    'Request ID: {}\n'.format(
                        error,
                        self.create_operation_id,
                        self.parent_request_id)
                )
        self._update_creation_state()
示例#29
0
    def update_args_and_io(args, inputs, outputs):

        if isinstance(args, str):
            return

        for index in range(len(args)):
            if isinstance(args[index], _Dataset):
                raise UserErrorException(
                    "Dataset cannot be used directly in a run. If you are using a FileDataset and "
                    "would like to mount or download the dataset, please call the as_mount or the as_download "
                    "methods on the dataset object. If you would like to use the direct mode, please call the "
                    "as_named_input method on the dataset object to convert the dataset into a "
                    "DatasetConsumptionConfig. Please visit our public documentation for more information on these "
                    "methods and classes at https://docs.microsoft.com/en-us/python/api/azureml-core/azureml.data"
                    ".output_dataset_config.outputfiledatasetconfig?view=azure-ml-py."
                )
            elif isinstance(args[index], Data):
                raise UserErrorException(
                    "azureml.core.runconfig.Data is not supported in arguments. Only "
                    "DatasetConsumptionConfig is supported. It can be created by calling "
                    "dataset.as_named_input('my_dataset')")
            elif isinstance(args[index], DatasetConsumptionConfig):
                dataset = args[index]
                if dataset.name in inputs:
                    module_logger.warning((
                        "Dataset with the name {} is already defined in the data section of the "
                        "RunConfiguration. The DatasetConsumptionConfig in the data section will "
                        "be used to materialized the data").format(
                            dataset.name))
                else:
                    inputs[dataset.name] = dataset
                args[index] = _DATASET_ARGUMENT_TEMPLATE.format(dataset.name)
            elif isinstance(args[index], OutputDatasetConfig):
                output = args[index]
                args[index] = _DATASET_OUTPUT_ARGUMENT_TEMPLATE.format(
                    output.name)
                outputs[output.name] = output
            elif isinstance(args[index], OutputData):
                raise UserErrorException(
                    "Arguments does not support OutputData. You need to pass the placeholder "
                    "into arguments which will be replaced with the output directory where "
                    "your script should write the output to. The placeholder has the following "
                    "format: {}:name where name is the key of the OutputData in the "
                    "output_data section of the run "
                    "configuration.".format(_DATASET_OUTPUT_ARGUMENT_TEMPLATE))
示例#30
0
 def _verify_prefix(prefix):
     if not prefix:
         return
     prefix = prefix.lstrip("./\\")
     prefix_segments = re.split(r'[/\\]+', prefix)
     if len(prefix_segments) > 1:
         raise UserErrorException(
             "Nested prefix '{}' for Azure File Share is currently not supported."
         )