def _verify_archive(self): """ Verify the archive containing the git repository. :raises CachitoError: if 'git fsck' fails for the extracted sources """ log.debug("Verifying the archive at %s", self.sources_dir.archive_path) if not os.path.exists(self.sources_dir.archive_path) or not tarfile.is_tarfile( self.sources_dir.archive_path ): err_msg = f"No valid archive found at {self.sources_dir.archive_path}" log.exception(err_msg) raise CachitoError(err_msg) err_msg = { "log": "Cachito found an error when verifying the generated archive at %s. %s", "exception": f"Invalid archive at {self.sources_dir.archive_path!s}", } with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir: cmd = ["git", "fsck"] repo_path = os.path.join(temp_dir, "app") try: with tarfile.open(self.sources_dir.archive_path, mode="r:gz") as tar: tar.extractall(temp_dir) except (tarfile.ExtractError, zlib.error, OSError) as exc: log.error(err_msg["log"], self.sources_dir.archive_path, exc) raise CachitoError(err_msg["exception"]) try: run_cmd(cmd, {"cwd": repo_path, "check": True}) except subprocess.CalledProcessError as exc: msg = f"{err_msg['log']}. STDERR: %s" log.error(msg, self.sources_dir.archive_path, exc, exc.stderr) raise CachitoError(err_msg["exception"])
def test_run_cmd_with_timeout( mock_run, input_params: Dict[str, Any], expected_run_params: Dict[str, Any] ): mock_run.return_value.returncode = 0 cmd = ["git", "fcsk"] run_cmd(cmd, input_params) mock_run.assert_called_once_with(cmd, **expected_run_params)
def fetch_pip_source(request_id, package_configs=None): """ Resolve and fetch pip dependencies for a given request. :param int request_id: the Cachito request ID this is for :param list package_configs: the list of optional package configurations submitted by the user """ version_output = run_cmd(["pip", "--version"], {}) log.info(f"pip version: {version_output.strip()}") validate_pip_config() bundle_dir: RequestBundleDir = RequestBundleDir(request_id) log.info("Configuring Nexus for pip for the request %d", request_id) set_request_state(request_id, "in_progress", "Configuring Nexus for pip") pip_repo_name = get_pypi_hosted_repo_name(request_id) raw_repo_name = get_raw_hosted_repo_name(request_id) prepare_nexus_for_pip_request(pip_repo_name, raw_repo_name) log.info("Fetching dependencies for request %d", request_id) package_configs = package_configs or [{}] packages_data = [] requirement_file_paths = [] for pkg_cfg in package_configs: pkg_path = pkg_cfg.get("path", ".") source_dir = bundle_dir.app_subpath(pkg_path).source_dir set_request_state( request_id, "in_progress", f"Fetching dependencies at the {pkg_path!r} directory", ) request = get_request(request_id) pkg_and_deps_info = resolve_pip( source_dir, request, requirement_files=pkg_cfg.get("requirements_files"), build_requirement_files=pkg_cfg.get("requirements_build_files"), ) # defer custom requirement files creation to use the Nexus password in the URLs for requirement_file_path in pkg_and_deps_info.pop("requirements"): requirement_file_paths.append(requirement_file_path) # defer DB operations to use the Nexus password in the env vars packages_data.append(pkg_and_deps_info) log.info("Finalizing the Nexus configuration for pip for the request %d", request_id) set_request_state(request_id, "in_progress", "Finalizing the Nexus configuration for pip") username = get_hosted_repositories_username(request_id) password = finalize_nexus_for_pip_request(pip_repo_name, raw_repo_name, username) # Set environment variables and config files pip_config_files = [] for requirement_file_path in requirement_file_paths: custom_requirement_file = _get_custom_requirement_config_file( requirement_file_path, bundle_dir.source_root_dir, raw_repo_name, username, password) if custom_requirement_file: pip_config_files.append(custom_requirement_file) raw_url = get_pypi_hosted_repo_url(request_id) pip_index_url = get_index_url(raw_url, username, password) env_vars = {"PIP_INDEX_URL": {"value": pip_index_url, "kind": "literal"}} ca_cert = nexus.get_ca_cert() if ca_cert: ca_cert_path = os.path.join("app", "package-index-ca.pem") env_vars["PIP_CERT"] = {"value": ca_cert_path, "kind": "path"} pip_config_files.append(make_base64_config_file(ca_cert, ca_cert_path)) worker_config = get_worker_config() env_vars.update( worker_config.cachito_default_environment_variables.get("pip", {})) update_request_env_vars(request_id, env_vars) packages_json_data = PackagesData() for pkg_cfg, pkg_data in zip(package_configs, packages_data): pkg_subpath = os.path.normpath(pkg_cfg.get("path", ".")) pkg_info = pkg_data["package"] pkg_deps = pkg_data["dependencies"] packages_json_data.add_package(pkg_info, pkg_subpath, pkg_deps) packages_json_data.write_to_file(bundle_dir.pip_packages_data) if pip_config_files: update_request_with_config_files(request_id, pip_config_files)
def fetch_gomod_source(request_id, dep_replacements=None, package_configs=None): """ Resolve and fetch gomod dependencies for a given request. :param int request_id: the Cachito request ID this is for :param list dep_replacements: dependency replacements with the keys "name" and "version"; only supported with a single path :param list package_configs: the list of optional package configurations submitted by the user :raises CachitoError: if the dependencies could not be retrieved """ version_output = run_cmd(["go", "version"], {}) log.info(f"Go version: {version_output.strip()}") config = get_worker_config() if package_configs is None: package_configs = [] bundle_dir: RequestBundleDir = RequestBundleDir(request_id) subpaths = [os.path.normpath(c["path"]) for c in package_configs if c.get("path")] if not subpaths: # Default to the root of the application source subpaths = [os.curdir] invalid_gomod_files = _find_missing_gomod_files(bundle_dir, subpaths) if invalid_gomod_files: invalid_files_print = "; ".join(invalid_gomod_files) file_suffix = "s" if len(invalid_gomod_files) > 1 else "" # missing gomod files is supported if there is only one path referenced if config.cachito_gomod_ignore_missing_gomod_file and len(subpaths) == 1: log.warning("go.mod file missing for request at %s", invalid_files_print) return raise CachitoError( "The {} file{} must be present for the gomod package manager".format( invalid_files_print.strip(), file_suffix ) ) if len(subpaths) > 1 and dep_replacements: raise CachitoError( "Dependency replacements are only supported for a single go module path." ) env_vars = { "GOCACHE": {"value": "deps/gomod", "kind": "path"}, "GOPATH": {"value": "deps/gomod", "kind": "path"}, "GOMODCACHE": {"value": "deps/gomod/pkg/mod", "kind": "path"}, } env_vars.update(config.cachito_default_environment_variables.get("gomod", {})) update_request_env_vars(request_id, env_vars) packages_json_data = PackagesData() for i, subpath in enumerate(subpaths): log.info( "Fetching the gomod dependencies for request %d in subpath %s", request_id, subpath ) set_request_state( request_id, "in_progress", f'Fetching the gomod dependencies at the "{subpath}" directory', ) request = get_request(request_id) gomod_source_path = str(bundle_dir.app_subpath(subpath).source_dir) try: gomod = resolve_gomod( gomod_source_path, request, dep_replacements, bundle_dir.source_dir ) except CachitoError: log.exception("Failed to fetch gomod dependencies for request %d", request_id) raise module_info = gomod["module"] packages_json_data.add_package(module_info, subpath, gomod["module_deps"]) # add package deps for package in gomod["packages"]: pkg_info = package["pkg"] package_subpath = _package_subpath(module_info["name"], pkg_info["name"], subpath) packages_json_data.add_package(pkg_info, package_subpath, package.get("pkg_deps", [])) packages_json_data.write_to_file(bundle_dir.gomod_packages_data)
def upload_non_registry_dependency( dep_identifier, version_suffix, verify_scripts=False, checksum_info=None ): """ Upload the non-registry npm dependency to the Nexus hosted repository with a custom version. :param str dep_identifier: the identifier of the dependency to download :param str version_suffix: the suffix to append to the dependency's version in its package.json file :param bool verify_scripts: if ``True``, raise an exception if dangerous scripts are present in the ``package.json`` file and would have been executed by ``npm pack`` if ``ignore-scripts`` was set to ``false`` :param ChecksumInfo checksum_info: if not ``None``, the checksum of the downloaded artifact will be verified. :raise CachitoError: if the dependency cannot be download, uploaded, or is invalid """ # These are the scripts that should not be present if verify_scripts is True dangerous_scripts = {"prepare", "prepack"} with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir: env = { # This is set since the home directory must be determined by the HOME environment # variable or by looking at the /etc/passwd file. The latter does not always work # since some deployments (e.g. OpenShift) don't have an entry for the running user # in /etc/passwd. "HOME": os.environ.get("HOME", ""), "NPM_CONFIG_CACHE": os.path.join(temp_dir, "cache"), # This is important to avoid executing any dangerous scripts if it's a Git dependency "NPM_CONFIG_IGNORE_SCRIPTS": "true", "PATH": os.environ.get("PATH", ""), # Have `npm pack` fail without a prompt if the SSH key from a protected source such # as a private GitHub repo is not trusted "GIT_SSH_COMMAND": "ssh -o StrictHostKeyChecking=yes", } run_params = {"env": env, "cwd": temp_dir} npm_pack_args = ["npm", "pack", dep_identifier] log.info("Downloading the npm dependency %s to be uploaded to Nexus", dep_identifier) # An example of the command's stdout: # "reactivex-rxjs-6.5.5.tgz\n" stdout = run_cmd( npm_pack_args, run_params, f"Failed to download the npm dependency {dep_identifier}" ) dep_archive = os.path.join(temp_dir, stdout.strip()) if checksum_info: verify_checksum(dep_archive, checksum_info) package_json_rel_path = find_package_json(dep_archive) if not package_json_rel_path: msg = f"The dependency {dep_identifier} does not have a package.json file" log.error(msg) raise CachitoError(msg) modified_dep_archive = os.path.join( os.path.dirname(dep_archive), f"modified-{os.path.basename(dep_archive)}" ) with tarfile.open(dep_archive, mode="r:*") as dep_archive_file: with tarfile.open(modified_dep_archive, mode="x:gz") as modified_dep_archive_file: for member in dep_archive_file.getmembers(): # Add all the files except for the package.json file without any modifications if member.path != package_json_rel_path: modified_dep_archive_file.addfile( member, dep_archive_file.extractfile(member) ) continue # Modify the version in the package.json file try: package_json = json.load(dep_archive_file.extractfile(member)) except json.JSONDecodeError: msg = ( f"The dependency {dep_identifier} does not have a valid " "package.json file" ) log.exception(msg) raise CachitoError(msg) if verify_scripts: log.info( "Checking for dangerous scripts in the package.json of %s", dep_identifier, ) scripts = package_json.get("scripts", {}) if dangerous_scripts & scripts.keys(): msg = ( f"The dependency {dep_identifier} is not supported because Cachito " "cannot execute the following required scripts of Git " f"dependencies: {', '.join(sorted(dangerous_scripts))}" ) log.error(msg) raise CachitoError(msg) new_version = f"{package_json['version']}{version_suffix}" log.debug( "Modifying the version of %s from %s to %s", dep_identifier, package_json["version"], new_version, ) package_json["version"] = new_version package_json_bytes = json.dumps(package_json, indent=2).encode("utf-8") package_json_file_obj = io.BytesIO(package_json_bytes) member.size = len(package_json_bytes) modified_dep_archive_file.addfile(member, package_json_file_obj) repo_name = get_js_hosted_repo_name() nexus.upload_asset_only_component(repo_name, "npm", modified_dep_archive)
def download_dependencies( download_dir: Path, deps: List[Dict[str, Any]], proxy_repo_url: str, skip_deps: Optional[Set[str]] = None, pkg_manager: str = "npm", ) -> Set[str]: """ Download the list of npm dependencies using npm pack to the deps bundle directory. By downloading the dependencies, this stages the content in the request specific npm proxy. Any dependency that has the key "bundled" set to ``True`` will not be downloaded. This is because the dependency is bundled as part of another dependency, and thus already present in the tarball of the dependency that bundles it. :param download_dir: the downloaded tarball of each dependency will be stored under this directory with necessary parent directory components created. For example, the tarball of a dependency foo is stored under <download_dir>/github/repo_namespace/foo.tar.gz :type download_dir: pathlib.Path :param deps: a list of dependencies where each dependency has the keys: bundled, name, version, and version_in_nexus :type deps: list[dict[str, any]] :param str proxy_repo_url: the Nexus proxy repository URL to use as the registry :param set[str] skip_deps: a set of dependency identifiers to not download because they've already been downloaded for this request. :param str pkg_manager: the name of the package manager to download dependencies for, affects destination directory and logging output (npm is used to do the actual download regardless) :return: a set of dependency identifiers that were downloaded :rtype: set[str] :raises CachitoError: if any of the downloads fail """ assert pkg_manager == "npm" or pkg_manager == "yarn" # nosec if skip_deps is None: skip_deps = set() conf = get_worker_config() with tempfile.TemporaryDirectory(prefix="cachito-") as temp_dir: npm_rc_file = os.path.join(temp_dir, ".npmrc") if conf.cachito_nexus_ca_cert and os.path.exists(conf.cachito_nexus_ca_cert): nexus_ca = conf.cachito_nexus_ca_cert else: nexus_ca = None # The token must be privileged so that it has access to the cachito-js repository generate_and_write_npmrc_file( npm_rc_file, proxy_repo_url, conf.cachito_nexus_username, conf.cachito_nexus_password, custom_ca_path=nexus_ca, ) env = { # This is set since the home directory must be determined by the HOME environment # variable or by looking at the /etc/passwd file. The latter does not always work # since some deployments (e.g. OpenShift) don't have an entry for the running user # in /etc/passwd. "HOME": os.environ.get("HOME", ""), "NPM_CONFIG_CACHE": os.path.join(temp_dir, "cache"), # This should not be necessary since all the dependencies come from Nexus, but it's an # extra precaution "NPM_CONFIG_IGNORE_SCRIPTS": "true", "NPM_CONFIG_USERCONFIG": npm_rc_file, "PATH": os.environ.get("PATH", ""), } # Download the dependencies directly in the bundle directory run_params = {"env": env, "cwd": str(download_dir)} log.info("Processing %d %s dependencies to stage in Nexus", len(deps), pkg_manager) downloaded_deps = set() # This must be done in batches to prevent Nexus from erroring with "Header is too large" deps_batches: List[List] = [] counter = 0 batch_size = get_worker_config().cachito_js_download_batch_size for dep in deps: external_dep_version = None if dep.get("version_in_nexus"): version = dep["version_in_nexus"] external_dep_version = dep["version"] else: version = dep["version"] dep_identifier = f"{dep['name']}@{version}" if dep["bundled"]: log.debug("Not downloading %s since it is a bundled dependency", dep_identifier) continue elif dep["version"].startswith("file:"): log.debug("Not downloading %s since it is a file dependency", dep_identifier) continue elif dep_identifier in skip_deps: log.debug( "Not downloading %s since it was already downloaded previously", dep_identifier ) continue if counter % batch_size == 0: deps_batches.append([]) deps_batches[-1].append((dep_identifier, external_dep_version)) downloaded_deps.add(dep_identifier) counter += 1 for dep_batch in deps_batches: # Create a list of dependencies to be downloaded. Excluding 'external_dep_version' # from the list of tuples dep_identifiers = [dep_identifier for dep_identifier, _ in dep_batch] log.debug( "Downloading the following %s dependencies: %s", pkg_manager, ", ".join(dep_identifiers), ) npm_pack_args = ["npm", "pack"] + dep_identifiers output = run_cmd( npm_pack_args, run_params, f"Failed to download the {pkg_manager} dependencies" ) # Move dependencies to their respective folders # Iterate through the tuples made of dependency tarball and dep_identifier # e.g. ('ab-2.10.2-external-sha512-ab.tar.gz', ('[email protected]', # 'https://github.com/ab/2.10.2.tar.gz')) for tarball, (dep_identifier, external_dep_version) in zip( output.split("\n"), dep_batch ): # tarball: e.g. ab-2.10.2-external-sha512-ab.tar.gz # dep_identifier: [email protected] # external_dep_version: https://github.com/ab/2.10.2.tar.gz dir_path = dep_identifier.rsplit("@", 1)[0] # ab # In case of external dependencies, create additional intermediate # parent e.g. github/<org>/<repo> or external-<repo> if external_dep_version: known_git_host_match = re.match( r"^(?P<host>.+)(?::)(?!//)(?P<repo_path>.+)(?:#.+)$", external_dep_version ) if known_git_host_match: # This means external_dep_version is in the format of # <git-host>:<namespace>/<repo>#<commit> groups = known_git_host_match.groupdict() dir_path = os.path.join(groups["host"], *groups["repo_path"].split("/")) else: dir_path = f"external-{dir_path}" # Create the target directory for the dependency dep_dir = download_dir.joinpath(*dir_path.split("/", 1)) dep_dir.mkdir(exist_ok=True, parents=True) # Move the dependency into the target directory shutil.move(str(download_dir.joinpath(tarball)), str(dep_dir.joinpath(tarball))) return downloaded_deps
def fetch_yarn_source(request_id: int, package_configs: List[dict] = None): """ Resolve and fetch yarn dependencies for a given request. This function uses the Python ``os.path`` library to manipulate paths, so the path to the configuration files may differ in format based on the system the Cachito worker is deployed on (i.e. Linux vs Windows). :param int request_id: the Cachito request ID this is for :param list package_configs: the list of optional package configurations submitted by the user :raise CachitoError: if the task fails """ version_output = run_cmd(["node", "--version"], {}) log.info(f"Node.js version: {version_output.strip()}") if package_configs is None: package_configs = [] validate_yarn_config() bundle_dir: RequestBundleDir = RequestBundleDir(request_id) subpaths = [ os.path.normpath(c["path"]) for c in package_configs if c.get("path") ] if not subpaths: # Default to the root of the application source subpaths = [os.curdir] _verify_yarn_files(bundle_dir, subpaths) log.info("Configuring Nexus for yarn for the request %d", request_id) set_request_state(request_id, "in_progress", "Configuring Nexus for yarn") repo_name = get_yarn_proxy_repo_name(request_id) prepare_nexus_for_js_request(repo_name) yarn_config_files = [] downloaded_deps: Set[str] = set() packages_json_data = PackagesData() for i, subpath in enumerate(subpaths): log.info("Fetching the yarn dependencies for request %d in subpath %s", request_id, subpath) set_request_state( request_id, "in_progress", f'Fetching the yarn dependencies at the "{subpath}" directory', ) request = get_request(request_id) package_source_path = str(bundle_dir.app_subpath(subpath).source_dir) try: package_and_deps_info = resolve_yarn(package_source_path, request, skip_deps=downloaded_deps) except CachitoError: log.exception("Failed to fetch yarn dependencies for request %d", request_id) raise downloaded_deps = downloaded_deps | package_and_deps_info[ "downloaded_deps"] log.info( "Generating the yarn configuration files for request %d in subpath %s", request_id, subpath, ) remote_package_source_path = os.path.normpath( os.path.join("app", subpath)) if package_and_deps_info["package.json"]: package_json_str = json.dumps( package_and_deps_info["package.json"], indent=2) package_json_path = os.path.join(remote_package_source_path, "package.json") yarn_config_files.append( make_base64_config_file(package_json_str, package_json_path)) if package_and_deps_info["lock_file"]: yarn_lock_str = _yarn_lock_to_str( package_and_deps_info["lock_file"]) yarn_lock_path = os.path.join(remote_package_source_path, "yarn.lock") yarn_config_files.append( make_base64_config_file(yarn_lock_str, yarn_lock_path)) if i == 0: default_env = get_worker_config( ).cachito_default_environment_variables env_vars = { **default_env.get("npm", {}), **default_env.get("yarn", {}) } update_request_env_vars(request_id, env_vars) pkg_info = package_and_deps_info["package"] pkg_deps = package_and_deps_info["deps"] packages_json_data.add_package(pkg_info, subpath, pkg_deps) packages_json_data.write_to_file(bundle_dir.yarn_packages_data) log.info("Finalizing the Nexus configuration for yarn for the request %d", request_id) set_request_state(request_id, "in_progress", "Finalizing the Nexus configuration for yarn") username = get_yarn_proxy_repo_username(request_id) password = finalize_nexus_for_js_request(username, repo_name) log.info("Generating the .npmrc file(s)") proxy_repo_url = get_yarn_proxy_repo_url(request_id) yarn_config_files.extend( generate_npmrc_config_files(proxy_repo_url, username, password, subpaths)) log.info("Adding empty .yarnrc file(s)") for subpath in subpaths: yarnrc_path = os.path.normpath(os.path.join("app", subpath, ".yarnrc")) yarn_config_files.append(make_base64_config_file("", yarnrc_path)) update_request_with_config_files(request_id, yarn_config_files)