def get_denylisted_srpms(self): src_config = get_source_container(self.workflow, fallback={}) denylist_srpms = src_config.get('denylist_srpms') if not denylist_srpms: self.log.debug('denylist_srpms is not defined in reactor_config_map') return [] denylist_url = denylist_srpms['denylist_url'] denylist_key = denylist_srpms['denylist_key'] req_session = get_retrying_requests_session() response = req_session.get(denylist_url) response.raise_for_status() response_json = response.json() if denylist_key not in response_json: self.log.debug('deny list json : %s', response_json) raise RuntimeError('Denylist key: {} missing in denylist json from : {}'. format(denylist_key, denylist_url)) deny_list = response_json[denylist_key] if not isinstance(deny_list, list): self.log.error('Wrong denylist: %s', repr(deny_list)) raise RuntimeError('Denylist value in key: {} is not list: {}'. format(denylist_key, type(deny_list))) wrong_types = [pkg for pkg in deny_list if not isinstance(pkg, str)] if wrong_types: self.log.error('Wrong types in denylist, should be str: %s', repr(deny_list)) raise RuntimeError('Values in denylist has to be all strings') self.log.debug('denylisted srpms: %s', deny_list) return deny_list
def download_files(self, downloads: Sequence[DownloadRequest], build_dir: BuildDir) -> Iterator[Path]: """Download maven artifacts to a build dir.""" artifacts_path = build_dir.path / self.DOWNLOAD_DIR koji_config = self.workflow.conf.koji insecure = koji_config.get('insecure_download', False) self.log.debug('%d files to download', len(downloads)) session = util.get_retrying_requests_session() for index, download in enumerate(downloads): dest_path = artifacts_path / download.dest dest_dir = dest_path.parent dest_filename = dest_path.name if not dest_dir.exists(): dest_dir.mkdir(parents=True) self.log.debug('%d/%d downloading %s', index + 1, len(downloads), download.url) download_url(url=download.url, dest_dir=dest_dir, insecure=insecure, session=session, dest_filename=dest_filename, expected_checksums=download.checksums) yield dest_path
def download_files(self, downloads): artifacts_path = os.path.join(self.workdir, self.DOWNLOAD_DIR) koji_config = get_koji(self.workflow) insecure = koji_config.get('insecure_download', False) self.log.debug('%d files to download', len(downloads)) session = util.get_retrying_requests_session() for index, download in enumerate(downloads): dest_path = os.path.join(artifacts_path, download.dest) dest_dir = dest_path.rsplit('/', 1)[0] dest_filename = dest_path.rsplit('/', 1)[-1] if not os.path.exists(dest_dir): os.makedirs(dest_dir) self.log.debug('%d/%d downloading %s', index + 1, len(downloads), download.url) download_url(url=download.url, dest_dir=dest_dir, insecure=insecure, session=session, dest_filename=dest_filename, expected_checksums=download.checksums)
def download_sources(self, sources, insecure=False, download_dir=SRPMS_DOWNLOAD_DIR): """Download sources content Download content in the given URLs into a new temporary directory and return a list with each downloaded artifact's path. :param sources: list, dicts with URLs to download :param insecure: bool, whether to perform TLS checks of urls :param download_dir: str, directory where to download content :return: str, paths to directory with downloaded sources """ workdir = tempfile.mkdtemp() dest_dir = os.path.join(workdir, download_dir) if not os.path.exists(dest_dir): os.makedirs(dest_dir) req_session = get_retrying_requests_session() for source in sources: download_url(source['url'], dest_dir, insecure=insecure, session=req_session, dest_filename=source.get('dest')) return dest_dir
def download_sources(self, sources, insecure=False, download_dir=SRPMS_DOWNLOAD_DIR): """Download sources content Download content in the given URLs into a new temporary directory and return a list with each downloaded artifact's path. :param sources: list, dicts with URLs to download :param insecure: bool, whether to perform TLS checks of urls :param download_dir: str, directory where to download content :return: str, paths to directory with downloaded sources """ dest_dir: Path = self.workflow.build_dir.source_container_sources_dir / download_dir dest_dir.mkdir(parents=True, exist_ok=True) req_session = get_retrying_requests_session() for source in sources: subdir: Path = dest_dir / source.get('subdir', '') subdir.mkdir(parents=True, exist_ok=True) checksums = source.get('checksums', {}) download_url(source['url'], subdir, insecure=insecure, session=req_session, dest_filename=source.get('dest'), expected_checksums=checksums) return str(dest_dir)
def download_file(self, url, dest_filename, insecure=False): """Downloads file specified by URL :param url: file url :param dest_filename: filename to be used for downloaded content :param insecure: download file without cert validation :return: file path of downloaded content """ self.log.debug('Downloading file: %s', url) workdir = self.workflow.source.get_build_file_path()[1] dest_dir = os.path.join(workdir, self.DOWNLOAD_DIR) dest_path = os.path.join(dest_dir, dest_filename) if not os.path.exists(dest_dir): os.makedirs(dest_dir) req_session = get_retrying_requests_session() request = req_session.get(url, stream=True, verify=not insecure) request.raise_for_status() with open(dest_path, 'wb') as f: for chunk in request.iter_content( chunk_size=DEFAULT_DOWNLOAD_BLOCK_SIZE): f.write(chunk) self.log.debug('Download finished: %s', dest_path) return dest_path
def _make_session(self, insecure, cert): # method_whitelist=False allows retrying non-idempotent methods like POST session = get_retrying_requests_session(method_whitelist=False) session.verify = not insecure if cert: session.cert = cert return session
def download_files(self, downloads): artifacts_path = os.path.join(self.workdir, self.DOWNLOAD_DIR) self.log.debug('%d files to download', len(downloads)) session = util.get_retrying_requests_session() for index, download in enumerate(downloads): dest_path = os.path.join(artifacts_path, download.dest) dest_dir = dest_path.rsplit('/', 1)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) self.log.debug('%d/%d downloading %s', index + 1, len(downloads), download.url) checksums = {algo: hashlib.new(algo) for algo in download.checksums} request = session.get(download.url, stream=True) request.raise_for_status() with open(dest_path, 'wb') as f: for chunk in request.iter_content(chunk_size=DEFAULT_DOWNLOAD_BLOCK_SIZE): f.write(chunk) for checksum in checksums.values(): checksum.update(chunk) for algo, checksum in checksums.items(): if checksum.hexdigest() != download.checksums[algo]: raise ValueError( 'Computed {} checksum, {}, does not match expected checksum, {}' .format(algo, checksum.hexdigest(), download.checksums[algo]))
def download_files(self, downloads): artifacts_path = os.path.join(self.workdir, self.DOWNLOAD_DIR) self.log.debug('%d files to download', len(downloads)) session = util.get_retrying_requests_session() for index, download in enumerate(downloads): dest_path = os.path.join(artifacts_path, download.dest) dest_dir = dest_path.rsplit('/', 1)[0] if not os.path.exists(dest_dir): os.makedirs(dest_dir) self.log.debug('%d/%d downloading %s', index + 1, len(downloads), download.url) checksums = { algo: hashlib.new(algo) for algo in download.checksums } request = session.get(download.url, stream=True) request.raise_for_status() with open(dest_path, 'wb') as f: for chunk in request.iter_content( chunk_size=DEFAULT_DOWNLOAD_BLOCK_SIZE): f.write(chunk) for checksum in checksums.values(): checksum.update(chunk) for algo, checksum in checksums.items(): if checksum.hexdigest() != download.checksums[algo]: raise ValueError( 'Computed {} checksum, {}, does not match expected checksum, {}' .format(algo, checksum.hexdigest(), download.checksums[algo]))
def test_connection_failure(self): url = 'https://example.com/path/file' dest_dir = tempfile.mkdtemp() session = get_retrying_requests_session() (flexmock(session).should_receive('get').and_raise( requests.exceptions.RetryError)) with pytest.raises(requests.exceptions.RetryError): download_url(url, dest_dir, session=session)
def get_remote_source_files( self, download_queue: Sequence[DownloadRequest]) -> List[Dict[str, Any]]: remote_source_files = [] downloads_path = self.workflow.build_dir.any_platform.path / self.DOWNLOAD_DIR session = util.get_retrying_requests_session() self.log.debug('%d url source files to download', len(download_queue)) koji_config = self.workflow.conf.koji insecure = koji_config.get('insecure_download', False) for index, download in enumerate(download_queue): dest_filename = download.dest if not re.fullmatch(r'^[\w\-.]+$', dest_filename): dest_filename = session.head(download.url).headers.get( "Content-disposition").split("filename=")[1].replace( '"', '') dest_path = os.path.join(downloads_path, dest_filename) dest_dir = os.path.dirname(dest_path) if not os.path.exists(dest_dir): os.makedirs(dest_dir) self.log.debug('%d/%d downloading %s', index + 1, len(download_queue), download.url) download_url(url=download.url, dest_dir=dest_dir, insecure=insecure, session=session, dest_filename=dest_filename, expected_checksums=download.checksums) checksum_type = list(download.checksums.keys())[0] remote_source_files.append({ 'file': dest_path, 'metadata': { 'type': KOJI_BTYPE_REMOTE_SOURCE_FILE, 'checksum_type': checksum_type, 'checksum': download.checksums[checksum_type], 'filename': dest_filename, 'filesize': os.path.getsize(dest_path), 'extra': { 'source-url': download.url, 'artifacts': self.source_url_to_artifacts[download.url], 'typeinfo': { KOJI_BTYPE_REMOTE_SOURCE_FILE: {} }, }, } }) return remote_source_files
def __init__(self, pnc_map, session=None): if not session: self.session = get_retrying_requests_session() else: self.session = session self.base_api_url = pnc_map['base_api_url'] # using urljoin here causes the API path in the base_api_url to be removed self.get_scm_archive_request_url = self.base_api_url + '/' + pnc_map[ 'get_scm_archive_path']
def extract_base_url(self, repo_url): session = get_retrying_requests_session() response = session.get(repo_url) response.raise_for_status() repo = ConfigParser() repo.readfp(StringIO(response.text)) return [repo.get(section, 'baseurl') for section in repo.sections() if repo.has_option(section, 'baseurl')]
def __init__(self, pnc_map, session=None): if not session: self.session = get_retrying_requests_session() else: self.session = session self.pnc_map = pnc_map self.base_api_url = self.pnc_map['base_api_url'] self._artifact_request_url = None self._scm_archive_request_url = None
def exclude_files_from_remote_sources(self, remote_sources_map, remote_sources_dir): """ :param remote_sources_map: dict, keys are filenames of sources from cachito, values are url with json from cachito :param remote_sources_dir: str, dir with downloaded sources from cachito """ src_config = self.workflow.conf.source_container denylist_sources_url = src_config.get('denylist_sources') if not denylist_sources_url: self.log.debug('no "denylist_sources" defined, not excluding any ' 'files from remote sources') return request_session = get_retrying_requests_session() denylist_sources = self._get_denylist_sources(request_session, denylist_sources_url) # key: full path to source archive, value: cachito json full_remote_sources_map = self._create_full_remote_sources_map( request_session, remote_sources_map, remote_sources_dir) for remote_archive, remote_json in full_remote_sources_map.items(): unpack_dir = remote_archive + '_unpacked' with tarfile.open(remote_archive) as tf: tf.extractall(unpack_dir) delete_app = self._check_if_package_excluded( remote_json['packages'], denylist_sources, remote_archive) # if any package in cachito json matched excluded entry, # remove 'app' from sources, except 'app/vendor' when exists if delete_app and os.path.exists(os.path.join(unpack_dir, 'app')): self._delete_app_directory(remote_sources_dir, unpack_dir, remote_archive) # search for excluded matches matches = self._get_excluded_matches(unpack_dir, denylist_sources) self._remove_excluded_matches(matches) # delete former archive os.unlink(remote_archive) # re-create new archive without excluded content with tarfile.open(remote_archive, "w:gz") as tar: for add_file in os.listdir(unpack_dir): tar.add(os.path.join(unpack_dir, add_file), arcname=add_file) # cleanup unpacked dir shutil.rmtree(unpack_dir)
def extract_base_url(self, repo_url): session = get_retrying_requests_session() response = session.get(repo_url) response.raise_for_status() repo = ConfigParser() repo.readfp(StringIO(response.text)) return [ repo.get(section, 'baseurl') for section in repo.sections() if repo.has_option(section, 'baseurl') ]
def test_connection_failure(self): build_id = '1234' session = get_retrying_requests_session() (flexmock(session) .should_receive('get') .and_raise(requests.exceptions.RetryError)) pnc_util = PNCUtil(mock_pnc_map(), session) with pytest.raises(requests.exceptions.RetryError): pnc_util.get_scm_archive_from_build_id(build_id)
def _setup_session(self, insecure, token, cert): # method_whitelist=False allows retrying non-idempotent methods like POST session = get_retrying_requests_session(method_whitelist=False) session.verify = not insecure if token: session.headers[self.OIDC_TOKEN_HEADER] = '%s %s' % (self.OIDC_TOKEN_TYPE, token) if cert: session.cert = cert self.session = session
def __init__(self, url, organization, token, insecure=False): """ :param url: URL of OMPS service :param organization: organization to be used for manifests :param token: secret auth token :param insecure: don't validate OMPS server cert """ self._url = url self._organization = organization self._token = token self._insecure = insecure self.log = logging.getLogger(self.__class__.__name__) # this class handle status errors itself self.req_session = get_retrying_requests_session(raise_on_status=False)
def test_streaming_failure(self): url = 'https://example.com/path/file' dest_dir = tempfile.mkdtemp() session = get_retrying_requests_session() # get response shows successful connection response = flexmock() (response.should_receive('raise_for_status')) # but streaming from the response fails (response.should_receive('iter_content').and_raise( requests.exceptions.RequestException)) # get on the session should return our mock response (flexmock(session).should_receive('get').and_return(response)) # Speed through the retries (flexmock(time).should_receive('sleep')) with pytest.raises(requests.exceptions.RequestException): download_url(url, dest_dir, session=session)
def _get_cache_allowlist(self) -> List[Dict[str, Any]]: src_config = self.workflow.conf.source_container allowlist_cache_url = src_config.get('lookaside_cache_allowlist') if not allowlist_cache_url: self.log.debug('no "lookaside_cache_allowlist" defined, ' 'not allowing any lookaside cache usage') return [] self.log.debug( '"lookaside_cache_allowlist" defined, might allow lookaside cache usage' ) request_session = get_retrying_requests_session() response = request_session.get(allowlist_cache_url) response.raise_for_status() allowlist_cache_yaml = yaml.safe_load(response.text) return allowlist_cache_yaml
def download_url(url, dest_dir, insecure=False, session=None, dest_filename=None): """Download file from URL, handling retries To download to a temporary directory, use: f = download_url(url, tempfile.mkdtemp()) :param url: URL to download from :param dest_dir: existing directory to create file in :param insecure: bool, whether to perform TLS checks :param session: optional existing requests session to use :param dest_filename: optional filename for downloaded file :return: str, path of downloaded file """ if session is None: session = get_retrying_requests_session() parsed_url = urlparse(url) if not dest_filename: dest_filename = os.path.basename(parsed_url.path) dest_path = os.path.join(dest_dir, dest_filename) logger.debug('downloading %s', url) for attempt in range(HTTP_MAX_RETRIES + 1): response = session.get(url, stream=True, verify=not insecure) response.raise_for_status() try: with open(dest_path, 'wb') as f: for chunk in response.iter_content( chunk_size=DEFAULT_DOWNLOAD_BLOCK_SIZE): f.write(chunk) break except requests.exceptions.RequestException: if attempt < HTTP_MAX_RETRIES: time.sleep(HTTP_BACKOFF_FACTOR * (2**attempt)) else: raise logger.debug('download finished: %s', dest_path) return dest_path
def download_sources(self, urls, insecure=False): """Download sources content Download content in the given URLs into a new temporary directory and return a list with each downloaded artifact's path. :param urls: int, Koji build id of the container image we want SRPMs for :param insecure: bool, whether to perform TLS checks of urls :return: str, paths to directory with downloaded sources """ workdir = tempfile.mkdtemp() dest_dir = os.path.join(workdir, self.DOWNLOAD_DIR) if not os.path.exists(dest_dir): os.makedirs(dest_dir) req_session = get_retrying_requests_session() for url in urls: download_url(url, dest_dir, insecure=insecure, session=req_session) return dest_dir
def icm(self): """ Get and validate the ICM from the Cachito API `content-manifest` endpoint. :return: dict, the ICM as a Python dict """ if self.icm_url is None and self._icm is None: self._icm = deepcopy(self.minimal_icm) if self._icm is None: session = get_retrying_requests_session() session.verify = self.cachito_verify self.log.debug('Making request to "%s"', self.icm_url) response = session.get(self.icm_url) response.raise_for_status() self._icm = response.json() # Returns dict # Validate; `json.dumps()` converts `icm` to str. Confusingly, `read_yaml` # *will* validate JSON read_yaml(json.dumps(self._icm), 'schemas/content_manifest.json') return self._icm
def request_delete(self, url, manifest, insecure, auth): session = get_retrying_requests_session() try: response = session.delete(url, verify=not insecure, auth=auth) response.raise_for_status() self.log.info("deleted manifest %s", manifest) return True except (HTTPError, RetryError) as ex: if ex.response.status_code == requests.codes.NOT_FOUND: self.log.warning("cannot delete %s: not found", manifest) elif ex.response.status_code == requests.codes.METHOD_NOT_ALLOWED: self.log.warning("cannot delete %s: image deletion disabled on registry", manifest) else: msg = "failed to delete %s: %s" % (manifest, ex.response.reason) self.log.error("%s\n%s", msg, ex.response.text) raise PluginFailedException(msg) return False
def get_srpm_urls(self, sigkeys=None, insecure=False): """Fetch SRPM download URLs for each image generated by a build Build each possible SRPM URL and check if the URL is available, respecting the signing intent preference order. :param sigkeys: list, strings for keys which signed the srpms to be fetched :return: list, strings with URLs pointing to SRPM files """ if not sigkeys: sigkeys = [''] self.log.debug('get srpm_urls: %s', self.koji_build_id) archives = self.session.listArchives(self.koji_build_id, type='image') self.log.debug('archives: %s', archives) rpms = [ rpm for archive in archives for rpm in self.session.listRPMs(imageID=archive['id']) ] denylist_srpms = self.get_denylisted_srpms() srpm_build_paths = {} for rpm in rpms: rpm_id = rpm['id'] self.log.debug('Resolving SRPM for RPM ID: %s', rpm_id) if rpm['external_repo_name'] != 'INTERNAL': msg = ('RPM comes from an external repo (RPM ID: {}). ' 'External RPMs are currently not supported.' ).format(rpm_id) raise RuntimeError(msg) rpm_hdr = self.session.getRPMHeaders(rpm_id, headers=['SOURCERPM']) if 'SOURCERPM' not in rpm_hdr: raise RuntimeError( 'Missing SOURCERPM header (RPM ID: {})'.format(rpm_id)) srpm_name = rpm_hdr['SOURCERPM'].rsplit('-', 2)[0] if any(denied == srpm_name for denied in denylist_srpms): self.log.debug('skipping denylisted srpm %s', rpm_hdr['SOURCERPM']) continue srpm_filename = rpm_hdr['SOURCERPM'] if srpm_filename in srpm_build_paths: continue rpm_build = self.session.getBuild(rpm['build_id'], strict=True) base_url = self.pathinfo.build(rpm_build) srpm_build_paths[srpm_filename] = base_url srpm_urls = [] missing_srpms = [] req_session = get_retrying_requests_session() for srpm_filename, base_url in srpm_build_paths.items(): for sigkey in sigkeys: # koji uses lowercase for paths. We make sure the sigkey is in lower case url_candidate = self.assemble_srpm_url(base_url, srpm_filename, sigkey.lower()) # allow redirects, head call doesn't do it by default request = req_session.head(url_candidate, verify=not insecure, allow_redirects=True) if request.ok: srpm_urls.append({'url': url_candidate}) self.log.debug('%s is available for signing key "%s"', srpm_filename, sigkey) break else: self.log.error( '%s not found for the given signing intent: %s"', srpm_filename, self.signing_intent) missing_srpms.append(srpm_filename) if missing_srpms: raise RuntimeError( 'Could not find files signed by any of {} for these SRPMS: {}'. format(sigkeys, missing_srpms)) return srpm_urls
def fetch(self): session = get_retrying_requests_session() response = session.get(self.repourl) response.raise_for_status() self.content = response.content
def run(self): """ Run the plugin. """ if not self.url: self.log.info('No remote source url to download, skipping plugin') return session = get_retrying_requests_session() # Download the source code archive cachito_config = get_cachito(self.workflow) insecure_ssl_conn = cachito_config.get('insecure', False) archive = download_url(self.url, self.workflow.source.workdir, session=session, insecure=insecure_ssl_conn) # Unpack the source code archive into a dedicated dir in container build workdir dest_dir = os.path.join(self.workflow.builder.df_dir, self.REMOTE_SOURCE) if not os.path.exists(dest_dir): os.makedirs(dest_dir) else: raise RuntimeError( 'Conflicting path {} already exists in the dist-git repository' .format(self.REMOTE_SOURCE)) with tarfile.open(archive) as tf: tf.extractall(dest_dir) config_files = (self.get_remote_source_config( session, self.remote_source_conf_url, insecure_ssl_conn) if self.remote_source_conf_url else []) # Inject cachito provided configuration files for config in config_files: config_path = os.path.join(dest_dir, config['path']) if config['type'] == CFG_TYPE_B64: data = base64.b64decode(config['content']) with open(config_path, 'wb') as f: f.write(data) else: err_msg = "Unknown cachito configuration file data type '{}'".format( config['type']) raise ValueError(err_msg) os.chmod(config_path, 0o444) # Set build args self.workflow.builder.buildargs.update(self.buildargs) # Create cachito.env file with environment variables received from cachito request self.generate_cachito_env_file() # To copy the sources into the build image, Dockerfile should contain # COPY $REMOTE_SOURCE $REMOTE_SOURCE_DIR args_for_dockerfile_to_add = { 'REMOTE_SOURCE': self.REMOTE_SOURCE, 'REMOTE_SOURCE_DIR': REMOTE_SOURCE_DIR, } self.workflow.builder.buildargs.update(args_for_dockerfile_to_add) return archive
def run(self): """ Run the plugin. """ if not self.remote_sources: self.log.info('Missing remote_sources parameters, skipping plugin') return session = get_retrying_requests_session() archives = [] cachito_config = get_cachito(self.workflow) insecure_ssl_conn = cachito_config.get('insecure', False) for remote_source in self.remote_sources: parsed_url = urlparse(remote_source['url']) dest_filename = os.path.basename(parsed_url.path) # prepend remote source name to destination filename, so multiple source archives # don't have name collision if self.multiple_remote_sources: dest_filename = "{}_{}".format(remote_source['name'], dest_filename) # Download the source code archive archive = download_url(remote_source['url'], self.workflow.source.workdir, session=session, insecure=insecure_ssl_conn, dest_filename=dest_filename) archives.append(archive) # Unpack the source code archive into a dedicated dir in container build workdir dest_dir = os.path.join(self.workflow.builder.df_dir, self.REMOTE_SOURCE) sub_path = self.REMOTE_SOURCE if self.multiple_remote_sources: dest_dir = os.path.join(dest_dir, remote_source['name']) sub_path = os.path.join(sub_path, remote_source['name']) if not os.path.exists(dest_dir): os.makedirs(dest_dir) else: raise RuntimeError( 'Conflicting path {} already exists in the dist-git repository' .format(sub_path)) with tarfile.open(archive) as tf: tf.extractall(dest_dir) config_files = (self.get_remote_source_config( session, remote_source["configs"], insecure_ssl_conn)) self.generate_cachito_config_files(dest_dir, config_files) # Set build args if not self.multiple_remote_sources: self.workflow.builder.buildargs.update( remote_source['build_args']) # Create cachito.env file with environment variables received from cachito request self.generate_cachito_env_file(dest_dir, remote_source['build_args']) self.add_general_buildargs() return archives