Exemplo n.º 1
0
def _parse_kubernetes_config(backend_config):
    """Creates build context tarfile containing Dockerfile and project code,
    returning path to tarfile."""
    if not backend_config:
        raise ExecutionException('Backend_config file not found.')
    kube_config = backend_config.copy()
    if 'kube-job-template-path' not in backend_config.keys():
        raise ExecutionException(
            "'kube-job-template-path' attribute must be specified in "
            'backend_config.')
    kube_job_template = backend_config['kube-job-template-path']
    if os.path.exists(kube_job_template):
        with open(kube_job_template, 'r') as job_template:
            yaml_obj = yaml.safe_load(job_template.read())
        kube_job_template = yaml_obj
        kube_config['kube-job-template'] = kube_job_template
    else:
        raise ExecutionException(
            "Could not find 'kube-job-template-path': {}".format(
                kube_job_template))
    if 'kube-context' not in backend_config.keys():
        _logger.debug('Could not find kube-context in backend_config.'
                      ' Using current context or in-cluster config.')
    if 'repository-uri' not in backend_config.keys():
        raise ExecutionException(
            "Could not find 'repository-uri' in backend_config.")
    return kube_config
Exemplo n.º 2
0
def _fetch_git_repo(uri, version, dst_dir):
    """Clone the git repo at ``uri`` into ``dst_dir``, checking out commit
    ``version`` (or defaulting to the head commit of the repository's master
    branch if version is unspecified).

    Assumes authentication parameters are specified by the environment, e.g. by
    a Git credential helper.
    """
    # We defer importing git until the last moment, because the import
    # requires that the git executable is availble on the PATH, so we only
    # want to fail if we actually need it.
    import git
    repo = git.Repo.init(dst_dir)
    origin = repo.create_remote('origin', uri)
    origin.fetch()
    if version is not None:
        try:
            repo.git.checkout(version)
        except git.exc.GitCommandError as e:
            raise ExecutionException(
                "Unable to checkout version '%s' of git repo %s"
                '- please ensure that the version exists in the repo. '
                'Error: %s' % (version, uri, e))
    else:
        repo.create_head('master', origin.refs.master)
        repo.heads.master.checkout()
    repo.submodule_update(init=True, recursive=True)
Exemplo n.º 3
0
 def _validate_parameters(self, user_parameters):
     missing_params = []
     for name in self.parameters:
         if (name not in user_parameters
                 and self.parameters[name].default is None):
             missing_params.append(name)
     if missing_params:
         raise ExecutionException(
             'No value given for missing parameters: %s' %
             ', '.join(["'%s'" % name for name in missing_params]))
Exemplo n.º 4
0
def _parse_subdirectory(uri):
    # Parses a uri and returns the uri and subdirectory as separate values.
    # Uses '#' as a delimiter.
    subdirectory = ''
    parsed_uri = uri
    if '#' in uri:
        subdirectory = uri[uri.find('#') + 1:]
        parsed_uri = uri[:uri.find('#')]
    if subdirectory and '.' in subdirectory:
        raise ExecutionException(
            "'.' is not allowed in project subdirectory paths.")
    return parsed_uri, subdirectory
Exemplo n.º 5
0
def _fetch_project(uri, force_tempdir, version=None):
    """Fetch a project into a local directory, returning the path to the local
    project directory.

    :Args
    force_tempdir: If True, will fetch the project into a temporary directory.
                Otherwise, will fetch ZIP or Git projects into a temporary
                directory but simply return the path of local projects
                (i.e. perform a no-op for local projects).
    """
    parsed_uri, subdirectory = _parse_subdirectory(uri)
    use_temp_dst_dir = force_tempdir or _is_zip_uri(
        parsed_uri) or not _is_local_uri(parsed_uri)
    dst_dir = tempfile.mkdtemp() if use_temp_dst_dir else parsed_uri
    if use_temp_dst_dir:
        _logger.info('=== Fetching project from %s into %s ===', uri, dst_dir)
    if _is_zip_uri(parsed_uri):
        if _is_file_uri(parsed_uri):
            parsed_file_uri = urllib.parse.urlparse(
                urllib.parse.unquote(parsed_uri))
            parsed_uri = os.path.join(parsed_file_uri.netloc,
                                      parsed_file_uri.path)
        _unzip_repo(zip_file=(parsed_uri if _is_local_uri(parsed_uri) else
                              _fetch_zip_repo(parsed_uri)),
                    dst_dir=dst_dir)
    elif _is_local_uri(uri):
        if version is not None:
            raise ExecutionException(
                'Setting a version is only supported for Git project URIs')
        if use_temp_dst_dir:
            dir_util.copy_tree(src=parsed_uri, dst=dst_dir)
    else:
        assert _GIT_URI_REGEX.match(
            parsed_uri), 'Non-local URI %s should be a Git URI' % parsed_uri
        _fetch_git_repo(parsed_uri, version, dst_dir)
    res = os.path.abspath(os.path.join(dst_dir, subdirectory))
    if not os.path.exists(res):
        raise ExecutionException('Could not find subdirectory %s of %s' %
                                 (subdirectory, dst_dir))
    return res
Exemplo n.º 6
0
 def _compute_path_value(self, user_param_value, storage_dir):
     local_path = get_local_path_or_none(user_param_value)
     if local_path:
         if not os.path.exists(local_path):
             raise ExecutionException(
                 'Got value %s for parameter %s, but no such file or '
                 'directory was found.' % (user_param_value, self.name))
         return os.path.abspath(local_path)
     basename = os.path.basename(user_param_value)
     dest_path = os.path.join(storage_dir, basename)
     if dest_path != user_param_value:
         data.download_uri(uri=user_param_value, output_path=dest_path)
     return os.path.abspath(dest_path)
Exemplo n.º 7
0
def _fetch_zip_repo(uri):
    import requests
    from io import BytesIO

    # TODO (dbczumar): Replace HTTP resolution via ``requests.get`` with an
    # invocation of ```segmind_track.data.download_uri()`` when the API
    # supports the same set of available stores as the artifact repository
    # (Azure, FTP, etc). See the following issue:
    # https://github.com/mlflow/mlflow/issues/763.
    response = requests.get(uri)
    try:
        response.raise_for_status()
    except requests.HTTPError as error:
        raise ExecutionException('Unable to retrieve ZIP file. Reason: %s' %
                                 str(error))
    return BytesIO(response.content)
Exemplo n.º 8
0
    def get_entry_point(self, entry_point):
        if entry_point in self._entry_points:
            return self._entry_points[entry_point]
        _, file_extension = os.path.splitext(entry_point)
        ext_to_cmd = {'.py': 'python', '.sh': os.environ.get('SHELL', 'bash')}
        if file_extension in ext_to_cmd:
            command = '%s %s' % (ext_to_cmd[file_extension],
                                 shlex_quote(entry_point))
            if not is_string_type(command):
                command = command.encode('utf-8')
            return EntryPoint(name=entry_point, parameters={}, command=command)

        raise ExecutionException(
            'Could not find {0} among entry points {1} or interpret {0} as a '
            'runnable script. Supported script file extensions: '
            '{2}'.format(entry_point, list(self._entry_points.keys()),
                         list(ext_to_cmd.keys())))
Exemplo n.º 9
0
def _wait_for(submitted_run_obj):
    """Wait on the passed-in submitted run, reporting its status to the
    tracking server."""
    run_id = submitted_run_obj.run_id
    active_run = None
    # Note: there's a small chance we fail to report the run's status to the
    # tracking server if
    # we're interrupted before we reach the try block below
    try:
        active_run = MlflowClient().get_run(
            run_id) if run_id is not None else None
        if submitted_run_obj.wait():
            _logger.info("=== Run (ID '%s') succeeded ===", run_id)
            _maybe_set_run_terminated(active_run, 'FINISHED')
        else:
            _maybe_set_run_terminated(active_run, 'FAILED')
            raise ExecutionException("Run (ID '%s') failed" % run_id)
    except KeyboardInterrupt:
        _logger.error("=== Run (ID '%s') interrupted, cancelling run ===",
                      run_id)
        submitted_run_obj.cancel()
        _maybe_set_run_terminated(active_run, 'FAILED')
        raise
Exemplo n.º 10
0
def load_project(directory):
    mlproject_path = _find_mlproject(directory)

    # TODO: Validate structure of YAML loaded from the file
    yaml_obj = {}
    if mlproject_path is not None:
        with open(mlproject_path) as mlproject_file:
            yaml_obj = yaml.safe_load(mlproject_file)

    project_name = yaml_obj.get('name')

    # Validate config if docker_env parameter is present
    docker_env = yaml_obj.get('docker_env')
    if docker_env:
        if not docker_env.get('image'):
            raise ExecutionException(
                'Project configuration (MLproject file) was invalid: Docker '
                'environment specified but no image attribute found.')
        if docker_env.get('volumes'):
            if not (isinstance(docker_env['volumes'], list) and all(
                [isinstance(i, str)
                 for i in docker_env['volumes']])):  # noqa: E125, E501
                raise ExecutionException(
                    'Project configuration (MLproject file) was invalid: '
                    'Docker volumes must be a list of strings, '
                    """e.g.: '["/path1/:/path1", "/path2/:/path2"])""")
        if docker_env.get('environment'):
            if not (isinstance(docker_env['environment'], list) and all([
                    isinstance(i, list) or isinstance(i, str)
                    for i in docker_env['environment']
            ])):
                raise ExecutionException(
                    'Project configuration (MLproject file) was invalid: '
                    'environment must be a list containing either strings '
                    '(to copy env variables from host system) or lists'
                    ' of string pairs (to define new environment variables).')

    # Validate config if conda_env parameter is present
    conda_path = yaml_obj.get('conda_env')
    if conda_path and docker_env:
        raise ExecutionException('Project cannot contain both a docker and '
                                 'conda environment.')

    # Parse entry points
    entry_points = {}
    for name, entry_point_yaml in yaml_obj.get('entry_points', {}).items():
        parameters = entry_point_yaml.get('parameters', {})
        command = entry_point_yaml.get('command')
        entry_points[name] = EntryPoint(name, parameters, command)

    if conda_path:
        conda_env_path = os.path.join(directory, conda_path)
        if not os.path.exists(conda_env_path):
            raise ExecutionException(
                'Project specified conda environment file %s, but no such '
                'file was found.' % conda_env_path)
        return Project(
            conda_env_path=conda_env_path,
            entry_points=entry_points,
            docker_env=docker_env,
            name=project_name,
        )

    default_conda_path = os.path.join(directory, DEFAULT_CONDA_FILE_NAME)
    if os.path.exists(default_conda_path):
        return Project(conda_env_path=default_conda_path,
                       entry_points=entry_points,
                       docker_env=docker_env,
                       name=project_name)

    return Project(conda_env_path=None,
                   entry_points=entry_points,
                   docker_env=docker_env,
                   name=project_name)
Exemplo n.º 11
0
 def _compute_uri_value(self, user_param_value):
     if not data.is_uri(user_param_value):
         raise ExecutionException('Expected URI for parameter %s but got '
                                  '%s' % (self.name, user_param_value))
     return user_param_value
Exemplo n.º 12
0
def _run(uri,
         experiment_id,
         entry_point='main',
         version=None,
         parameters=None,
         backend=None,
         backend_config=None,
         use_conda=True,
         storage_dir=None,
         synchronous=True,
         run_id=None):
    """Helper that delegates to the project-running method corresponding to the
    passed-in backend.

    Returns a ``SubmittedRun`` corresponding to the project run.
    """

    parameters = parameters or {}
    work_dir = _fetch_project(uri=uri, force_tempdir=False, version=version)
    project = _project_spec.load_project(work_dir)
    _validate_execution_environment(project, backend)  # noqa
    project.get_entry_point(entry_point)._validate_parameters(parameters)
    if run_id:
        active_run = MlflowClient().get_run(run_id)
    else:
        active_run = _create_run(uri, experiment_id, work_dir, entry_point)

    # Consolidate parameters for logging.
    # `storage_dir` is `None` since we want to log actual path not downloaded
    # local path
    entry_point_obj = project.get_entry_point(entry_point)
    final_params, extra_params = entry_point_obj.compute_parameters(
        parameters, storage_dir=None)
    for key, value in (list(final_params.items()) +
                       list(extra_params.items())):
        MlflowClient().log_param(active_run.info.run_id, key, value)

    repo_url = _get_git_repo_url(work_dir)
    if repo_url is not None:
        for tag in [MLFLOW_GIT_REPO_URL, LEGACY_MLFLOW_GIT_REPO_URL]:
            MlflowClient().set_tag(active_run.info.run_id, tag, repo_url)

    # Add branch name tag if a branch is specified through -version
    if _is_valid_branch_name(work_dir, version):
        for tag in [MLFLOW_GIT_BRANCH, LEGACY_MLFLOW_GIT_BRANCH_NAME]:
            MlflowClient().set_tag(active_run.info.run_id, tag, version)

    if backend == 'local' or backend is None:
        MlflowClient().set_tag(active_run.info.run_id, MLFLOW_PROJECT_BACKEND,
                               'local')
        command_args = []
        command_separator = ' '
        # If a docker_env attribute is defined in MLproject then it takes
        # precedence over conda yaml environments, so the project will be
        # executed inside a docker container.
        if project.docker_env:
            pass
        # MlflowClient().set_tag(active_run.info.run_id,
        #                                 MLFLOW_PROJECT_ENV, 'docker')
        # _validate_docker_env(project)
        # _validate_docker_installation()
        # image = _build_docker_image(
        #     work_dir=work_dir,
        #     repository_uri=project.name,
        #     base_image=project.docker_env.get('image'),
        #     run_id=active_run.info.run_id)
        # command_args += _get_docker_command(
        #     image=image,
        #     active_run=active_run,
        #     volumes=project.docker_env.get('volumes'),
        #     user_env_vars=project.docker_env.get('environment'))
        # Synchronously create a conda environment (even though this may take
        # some time) to avoid failures due to multiple concurrent attempts to
        # create the same conda env.
        # elif use_conda:
        #     MlflowClient().set_tag(active_run.info.run_id,
        #                                     MLFLOW_PROJECT_ENV, 'conda')
        #     command_separator = ' && '
        #     conda_env_name = _get_or_create_conda_env(project.conda_env_path)
        #     command_args += _get_conda_command(conda_env_name)
        # In synchronous mode, run the entry point command in a blocking
        # fashion, sending status updates to the tracking server when finished
        # . Note that the run state may not be persisted to the tracking server
        # if interrupted
        if synchronous:
            command_args += _get_entry_point_command(project, entry_point,
                                                     parameters, storage_dir)
            command_str = command_separator.join(command_args)
            return _run_entry_point(command_str,
                                    work_dir,
                                    experiment_id,
                                    run_id=active_run.info.run_id)
        # Otherwise, invoke `mlflow run` in a subprocess
        return _invoke_mlflow_run_subprocess(work_dir=work_dir,
                                             entry_point=entry_point,
                                             parameters=parameters,
                                             experiment_id=experiment_id,
                                             use_conda=use_conda,
                                             storage_dir=storage_dir,
                                             run_id=active_run.info.run_id)
    # elif backend == 'kubernetes':
    # from segmind.projects import kubernetes as kb
    # MlflowClient().set_tag(active_run.info.run_id,
    #                                 MLFLOW_PROJECT_ENV, 'docker')
    # MlflowClient().set_tag(active_run.info.run_id,
    #                                 MLFLOW_PROJECT_BACKEND, 'kubernetes')
    # _validate_docker_env(project)
    # _validate_docker_installation()
    # kube_config = _parse_kubernetes_config(backend_config)
    # image = _build_docker_image(
    #     work_dir=work_dir,
    #     repository_uri=kube_config['repository-uri'],
    #     base_image=project.docker_env.get('image'),
    #     run_id=active_run.info.run_id)
    # image_digest = kb.push_image_to_registry(image.tags[0])
    # submitted_run = kb.run_kubernetes_job(
    #     project.name, active_run, image.tags[0], image_digest,
    #     _get_entry_point_command(project, entry_point, parameters,
    #                              storage_dir),
    #     _get_run_env_vars(
    #         run_id=active_run.info.run_uuid,
    #         experiment_id=active_run.info.experiment_id),
    #     kube_config.get('kube-context', None),
    #     kube_config['kube-job-template'])
    # return submitted_run

    supported_backends = ['local', 'kubernetes']
    raise ExecutionException('Got unsupported execution mode %s. Supported '
                             'values: %s' % (backend, supported_backends))