Exemplo n.º 1
0
def install_model(args):
    """Install a model.

    Args:
        args: Command line args parsed by argparse.
        args.model (str): mlm/zip path, mlm/zip url, model name, GitHub repo,
                          like mlhubber/mlhub, or MLHUB.yaml on github repo,
                          like mlhubber/audit:doc/MLHUB.yaml.
    """

    logger = logging.getLogger(__name__)
    logger.info('Install a model.')
    logger.debug('args: {}'.format(args))

    model = args.model  # model pkg name
    location = args.model  # pkg file path or URL
    version = None  # model pkg version
    mlhubyaml = None  # MLHUB.yaml path or URL

    # Obtain the model URL if not a local file.

    if not utils.is_archive(model) and not utils.is_url(
            model) and '/' not in model:

        # Model package name, which can be found in mlhub repo.
        # Like:
        #     $ ml install audit
        #
        # We assume the URL got from mlhub repo is a link to a mlm/zip/tar file
        # or a GitHub repo reference or MLHUB.yaml.

        # Correct model name if possible.

        matched_model = utils.get_misspelled_pkg(model)
        if matched_model is not None:
            model = matched_model

        # Get model pkg meta data from mlhub repo.

        location, version, meta_list = utils.get_model_info_from_repo(
            model, args.mlhub)

        # Update bash completion list.

        utils.update_model_completion({e['meta']['name'] for e in meta_list})

    if not utils.is_archive(location):

        # Model from GitHub.
        # Like:
        #     $ ml install mlhubber/audit
        #     $ ml install mlhubber/audit:doc/MLHUB.yaml
        #     $ ml install https://github.com/mlhubber/audit/...

        mlhubyaml = utils.get_pkgyaml_github_url(location)  # URL to MLHUB.yaml
        location = utils.get_githubrepo_zip_url(location)
        logger.debug("location: {}".format(location))
        logger.debug("mlhubyaml: {}".format(mlhubyaml))

    # Determine the path of downloaded/existing model package file

    pkgfile = None
    if utils.is_archive(location):
        pkgfile = os.path.basename(location)  # pkg file name
    elif utils.is_url(location):
        pkgfile = utils.get_url_filename(location)

    # Query archive type if not available from file name per se.

    while pkgfile is None or not utils.is_archive(pkgfile):
        print(
            "The file type cannot be determined.\n"
            "Please give it a file name with explicit valid archive extension: ",
            end='')
        pkgfile = input()

    uncompressdir = pkgfile[:pkgfile.rfind(
        '.')]  # Dir Where pkg file is extracted

    # Installation.

    entry = None  # Meta info read from MLHUB.yaml
    with tempfile.TemporaryDirectory() as mlhubtmpdir:

        # Determine the local path of the model package

        if utils.is_url(location):
            local = os.path.join(mlhubtmpdir, pkgfile)  # downloaded
        else:
            local = location  # local file path

        uncompressdir = os.path.join(mlhubtmpdir, uncompressdir)

        # Obtain model version.

        if version is None:
            if utils.ends_with_mlm(
                    pkgfile):  # Get version number from MLM file name.

                model, version = utils.interpret_mlm_name(pkgfile)

            elif not utils.is_github_url(
                    location):  # Get MLHUB.yaml inside the archive file.

                if utils.is_url(
                        location
                ):  # Download the package file because it is not from GitHub.
                    utils.download_model_pkg(location, local, pkgfile,
                                             args.quiet)

                if not args.quiet:
                    print("Extracting '{}' ...\n".format(pkgfile))

                utils.unpack_with_promote(local,
                                          uncompressdir,
                                          valid_name=pkgfile)
                mlhubyaml = utils.get_available_pkgyaml(
                    uncompressdir)  # Path to MLHUB.yaml

            if mlhubyaml is not None:  # Get version number from MLHUB.yaml
                entry = utils.read_mlhubyaml(mlhubyaml)
                meta = entry["meta"]
                model = meta["name"]
                version = meta["version"]

            utils.update_model_completion({model
                                           })  # Update bash completion list.

        # Check if model is already installed.

        install_path = utils.get_package_dir(model)  # Installation path
        if os.path.exists(install_path):
            installed_version = utils.load_description(
                model)['meta']['version']

            # Ensure version number is string.

            installed_version = str(installed_version)
            version = str(version)

            if StrictVersion(installed_version) > StrictVersion(version):
                yes = utils.yes_or_no(
                    "Downgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True)
            elif StrictVersion(installed_version) == StrictVersion(version):
                yes = utils.yes_or_no(
                    "Replace '{}' version '{}' with version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True)
            else:
                yes = utils.yes_or_no(
                    "Upgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True)

            if not yes:
                sys.exit(0)
            else:
                print()

            shutil.rmtree(install_path)

        # Uncompress package file.

        if not os.path.exists(
                uncompressdir
        ):  # Model pkg mlm or GitHub pkg has not unzipped yet.
            if utils.is_url(location):  # Download the package file if needed.
                utils.download_model_pkg(location, local, pkgfile, args.quiet)

            if not args.quiet:
                print("Extracting '{}' ...\n".format(pkgfile))

            utils.unpack_with_promote(local, uncompressdir, valid_name=pkgfile)

        # Install package files.
        #
        # Because it is time-consuming to download all package files one-by-one , we
        # download the whole zipball from the repo first, then re-arrange the files
        # according to `dependencies` -> `files` in MLHUB.yaml if any.

        # Find if any files specified in MLHUB.yaml

        if mlhubyaml is None:  # MLM file which can obtain version number from it name.
            mlhubyaml = utils.get_available_pkgyaml(uncompressdir)
            entry = utils.read_mlhubyaml(mlhubyaml)

        depspec = None
        if 'dependencies' in entry:
            depspec = entry['dependencies']
        elif 'dependencies' in entry['meta']:
            depspec = entry['meta']['dependencies']

        file_spec = None
        if depspec is not None and 'files' in depspec:
            file_spec = {'files': depspec['files']}
        elif 'files' in entry:
            file_spec = {'files': entry['files']}

        if file_spec is not None:  # install package files if they are specified in MLHUB.yaml

            # MLHUB.yaml should always be at the package root.

            os.mkdir(install_path)
            if utils.is_url(
                    mlhubyaml
            ):  # We currently only support MLHUB.yaml specified on GitHub.
                if mlhubyaml.startswith("https://api"):
                    urllib.request.urlretrieve(
                        json.loads(urllib.request.urlopen(
                            mlhubyaml).read())['download_url'],
                        os.path.join(install_path, MLHUB_YAML))
                else:
                    urllib.request.urlretrieve(
                        mlhubyaml, os.path.join(install_path, MLHUB_YAML))
            else:
                shutil.move(mlhubyaml, install_path)

            # All package files except MLHUB.yaml should be specified in 'files' of MLHUB.yaml

            try:
                utils.install_file_deps(
                    utils.flatten_mlhubyaml_deps(file_spec)[0][1],
                    model,
                    downloadir=uncompressdir)
            except utils.ModePkgInstallationFileNotFoundException as e:
                if os.path.exists(install_path):
                    shutil.rmtree(install_path)

                raise

        else:
            # Otherwise, put all files under package dir.
            # **Note** Here we must make sure <instal_path> does not exist.
            # Otherwise, <unzipdir> will be inside <install_path>
            shutil.move(uncompressdir, install_path)

        # Update bash completion list.

        utils.update_command_completion(
            set(utils.load_description(model)['commands']))

        # Update working dir if any.

        if args.workding_dir is not None:
            utils.update_working_dir(model, args.workding_dir)

        if not args.quiet:

            # Informative message about the size of the installed model.

            print("Installed '{}' into '{}' ({:,} bytes).".format(
                model, install_path, utils.dir_size(install_path)))

            # Suggest next step. README or DOWNLOAD

            utils.print_next_step('install', model=model)
Exemplo n.º 2
0
def install_model(args):
    """Install a model.

    Args:
        args: Command line args parsed by argparse.
        args.model (str): mlm/zip path, mlm/zip url, model name, GitHub repo,
                          like mlhubber/mlhub, or MLHUB.yaml on github repo,
                          like mlhubber/audit:doc/MLHUB.yaml.
    """

    logger = logging.getLogger(__name__)
    logger.info("Install a model.")
    logger.debug(f"args: {args}")

    # Avoid 403 errors which result when the header identifies itself
    # as python urllib or is empty and thus the web site assumes it is
    # a robot. We are not a robot but a user downloading a file. This
    # will ensure gitlab is okay with retrieving from a URL by adding
    # a header rather than no header. TODO move to using Requests.
    
    opener = urllib.request.build_opener()
    opener.addheaders = [('User-agent', 'Mozilla/5.0')]
    urllib.request.install_opener(opener)

    model = args.model  # model pkg name
    location = args.model  # pkg file path or URL
    key = args.i  # SSH key
    version = None  # model pkg version
    mlhubyaml = None  # MLHUB.yaml path or URL
    repo_obj = None  # RepoTypeURL object for related URL interpretation
    maybe_private = False  # Maybe private repo

    # Obtain the model URL if not a local file.

    if (
        not utils.is_archive_file(model)
        and not utils.is_url(model)
        and "/" not in model
    ):

        # Model package name, which can be found in mlhub repo.
        # Like:
        #     $ ml install audit
        #
        # We assume the URL got from mlhub repo is a link to a mlm/zip/tar file
        # or a GitHub repo reference or MLHUB.yaml.

        # Correct model name if possible.

        matched_model = utils.get_misspelled_pkg(model)
        if matched_model is not None:
            model = matched_model

        # Get model pkg meta data from mlhub repo.

        location, version, meta_list = utils.get_model_info_from_repo(
            model, args.mlhub
        )

        # Update bash completion list.

        utils.update_model_completion({e["meta"]["name"] for e in meta_list})

    if not utils.is_archive_file(location):

        # Model from a repo such as GitHub, GitLab, Bitbucket etc.
        #
        # Possible options are:
        #   $ ml install mlhubber/audit            # latest commit on the master branch of GitHub repo mlhubber/audit
        #   $ ml install mlhubber/audit@dev        # latest commit on the dev branch of GitHub repo mlhubber/audit
        #   $ ml install mlhubber/audit@0001ea4    # commit 0001ea4 of mlhubber/audit
        #   $ ml install mlhubber/audit:doc/MLHUB.yaml            # latest commit on master, but a specified YAML file
        #   $ ml install https://github.com/mlhubber/audit/...    # Arbitrary GitHub link address
        #
        #   $ ml install github:mlhubber/audit    # GitHub repo, the same as ml install mlhubber/audit
        #
        #   $ ml install gitlab:mlhubber/audit@2fe89kh:doc/MLHUB.yaml    # GitLab repo
        #   $ ml install https://https://gitlab.com/mlhubber/audit/...   # GitLab repo
        #
        #   $ ml install bitbucket:mlhubber/audit                        # BitBucket repo
        #   $ ml install https://bitbucket.org/mlhubber/audit/...        # BitBucket repo

        repo_obj = utils.RepoTypeURL.get_repo_obj(location)
        try:
            mlhubyaml = repo_obj.get_pkg_yaml_url()
            location = repo_obj.compose_repo_zip_url()
            logger.debug(f"location: {location}")
            logger.debug(f"mlhubyaml: {mlhubyaml}")
        except utils.DescriptionYAMLNotFoundException:  # Maybe private repo
            maybe_private = True
            pass

    # Determine the path of downloaded/existing model package file

    pkgfile = None
    if maybe_private:  # Maybe private repo
        pkgfile = repo_obj.repo
    elif utils.is_archive_file(location):
        pkgfile = os.path.basename(location)  # pkg file name
    elif utils.is_url(location):
        pkgfile = utils.get_url_filename(location)

    # Query archive type if not available from file name per se.

    if not maybe_private:
        while pkgfile is None or not utils.is_archive_file(pkgfile):
            print(
                "The file type cannot be determined.\n"
                "Please give it a file name with explicit valid archive extension: ",
                end="",
            )
            pkgfile = input()

    if maybe_private:
        uncompressdir = pkgfile
    else:
        uncompressdir = pkgfile[
            : pkgfile.rfind(".")
        ]  # Dir Where pkg file is extracted

    # Installation.

    entry = None  # Meta info read from MLHUB.yaml
    with tempfile.TemporaryDirectory() as mlhubtmpdir:

        # Determine the local path of the model package

        if maybe_private:
            local = None
        elif utils.is_url(location):
            local = os.path.join(mlhubtmpdir, pkgfile)  # downloaded
        else:
            local = location  # local file path

        uncompressdir = os.path.join(mlhubtmpdir, uncompressdir)

        # Obtain model version.

        if version is None:
            if utils.ends_with_mlm(
                pkgfile
            ):  # Get version number from MLM file name.

                model, version = utils.interpret_mlm_name(pkgfile)

            elif not repo_obj:

                # Get MLHUB.yaml inside the archive file.

                if utils.is_url(
                    location
                ):  # Download the package file because it is not from GitHub.
                    utils.download_model_pkg(
                        location, local, pkgfile, args.quiet
                    )

                if not args.quiet:
                    print("Extracting '{}' ...\n".format(pkgfile))

                utils.unpack_with_promote(
                    local, uncompressdir, valid_name=pkgfile
                )
                mlhubyaml = utils.get_available_pkgyaml(
                    uncompressdir
                )  # Path to MLHUB.yaml

            elif maybe_private:

                identity_env = (
                    "GIT_SSH_COMMAND='ssh -i {}' ".format(key) if key else ""
                )
                command = "cd {}; {}git clone {}; cd {}; git checkout {}".format(
                    mlhubtmpdir,
                    identity_env,
                    repo_obj.get_ssh_clone_url(),
                    repo_obj.repo,
                    repo_obj.ref,
                )
                proc = subprocess.Popen(
                    command, shell=True, stderr=subprocess.PIPE
                )
                output, errors = proc.communicate()
                if proc.returncode != 0:
                    raise utils.InstallFailedException(errors.decode("utf-8"))

                if repo_obj.path:
                    mlhubyaml = os.path.join(uncompressdir, repo_obj.path)
                else:
                    mlhubyaml = utils.get_available_pkgyaml(
                        uncompressdir
                    )  # Path to MLHUB.yaml

            if mlhubyaml is not None:  # Get version number from MLHUB.yaml
                entry = utils.read_mlhubyaml(mlhubyaml)
                meta = entry["meta"]
                model = meta["name"]
                version = meta["version"]

            utils.update_model_completion(
                {model}
            )  # Update bash completion list.

        # Check if model is already installed.

        install_path = utils.get_package_dir(model)  # Installation path
        if os.path.exists(install_path):
            installed_version = utils.load_description(model)["meta"][
                "version"
            ]

            # Ensure version number is string.

            installed_version = str(installed_version)
            version = str(version)

            if StrictVersion(installed_version) > StrictVersion(version):
                yes = utils.yes_or_no(
                    "Downgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True,
                )
            elif StrictVersion(installed_version) == StrictVersion(version):
                yes = utils.yes_or_no(
                    "Replace '{}' version '{}' with version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True,
                )
            else:
                yes = utils.yes_or_no(
                    "Upgrade '{}' from version '{}' to version '{}'",
                    model,
                    installed_version,
                    version,
                    yes=True,
                )

            if not yes:
                # Suggest next step before exiting, as if an install has just happened.
                utils.print_next_step("install", model=model)
                sys.exit(0)
            else:
                print()

            shutil.rmtree(install_path)

        # Uncompress package file.

        if not os.path.exists(
            uncompressdir
        ):  # Model pkg mlm or GitHub pkg has not unzipped yet.
            if utils.is_url(location):  # Download the package file if needed.
                utils.download_model_pkg(location, local, pkgfile, args.quiet)

            if not args.quiet:
                print("Extracting '{}' ...\n".format(pkgfile))

            utils.unpack_with_promote(local, uncompressdir, valid_name=pkgfile)

        # Install package files.
        #
        # Because it is time-consuming to download all package files one-by-one , we
        # download the whole zipball from the repo first, then re-arrange the files
        # according to `dependencies` -> `files` in MLHUB.yaml if any.

        # Find if any files specified in MLHUB.yaml

        if (
            mlhubyaml is None
        ):  # MLM file which can obtain version number from it name.
            mlhubyaml = utils.get_available_pkgyaml(uncompressdir)
            entry = utils.read_mlhubyaml(mlhubyaml)

        depspec = None
        if "dependencies" in entry:
            depspec = entry["dependencies"]
        elif "dependencies" in entry["meta"]:
            depspec = entry["meta"]["dependencies"]

        file_spec = None
        if depspec is not None and "files" in depspec:
            file_spec = {"files": depspec["files"]}
        elif "files" in entry:
            file_spec = {"files": entry["files"]}

        if (
            file_spec is not None
        ):  # install package files if they are specified in MLHUB.yaml

            # MLHUB.yaml should always be at the package root.

            os.mkdir(install_path)
            if utils.is_url(
                mlhubyaml
            ):  # We currently only support MLHUB.yaml specified on GitHub.
                if mlhubyaml.startswith("https://api"):
                    urllib.request.urlretrieve(
                        json.loads(urllib.request.urlopen(mlhubyaml).read())[
                            "download_url"
                        ],
                        os.path.join(install_path, MLHUB_YAML),
                    )
                else:
                    urllib.request.urlretrieve(
                        mlhubyaml, os.path.join(install_path, MLHUB_YAML)
                    )
            else:
                shutil.move(mlhubyaml, install_path)

            # All package files except MLHUB.yaml should be specified in 'files' of MLHUB.yaml

            try:
                utils.install_file_deps(
                    utils.flatten_mlhubyaml_deps(file_spec)[0][1],
                    model,
                    downloadir=uncompressdir,
                    yes=True,
                )
            except utils.ModelPkgInstallationFileNotFoundException:
                if os.path.exists(install_path):
                    shutil.rmtree(install_path)

                raise

        else:
            # Otherwise, put all files under package dir.
            # **Note** Here we must make sure <install_path> does not exist.
            # Otherwise, <unzipdir> will be inside <install_path>
            shutil.move(uncompressdir, install_path)

        # Update bash completion list.

        utils.update_command_completion(
            set(utils.load_description(model)["commands"])
        )

        # Update working dir if any.

        if args.working_dir is not None:
            utils.update_working_dir(model, args.working_dir)

        if not args.quiet:

            # Informative message about the size of the installed model.

            msg  = f"Found '{model}' version {version}.\n\nInstalled '{model}' "
            msg += f"into '{install_path}/' ({utils.dir_size(install_path):,} bytes)."
            print(msg)

            # Suggest next step. README or DOWNLOAD

            utils.print_next_step("install", model=model)