def _verify(self, file_location, target, all_chksums=True, handlers=None): """Internal function for derivatives. Digs through chksums, and either returns None, or throws an errors.FetchFailed exception. - -2: file doesn't exist. - -1: if (size chksum is available, and file is smaller than stated chksum) - 0: if all chksums match - 1: if file is too large (if size chksums are available) or else size is right but a chksum didn't match. if all_chksums is True, all chksums must be verified; if false, all a handler can be found for are used. """ nondefault_handlers = handlers if handlers is None: try: handlers = get_handlers(target.chksums) except MissingChksumHandler as e: raise errors.MissingChksumHandler( f'missing required checksum handler: {e}') if all_chksums: missing = set(target.chksums).difference(handlers) if missing: raise errors.RequiredChksumDataMissing(target, *sorted(missing)) if "size" in handlers: val = handlers["size"](file_location) if val == -1: raise errors.MissingDistfile(file_location) if val != target.chksums["size"]: if val < target.chksums["size"]: raise errors.FetchFailed( file_location, 'file is too small', resumable=True) raise errors.ChksumFailure( file_location, chksum='size', expected=target.chksums["size"], value=val) elif not os.path.exists(file_location): raise errors.MissingDistfile(file_location) elif not os.stat(file_location).st_size: raise errors.FetchFailed( file_location, 'file is empty', resumable=False) chfs = set(target.chksums).intersection(handlers) chfs.discard("size") chfs = list(chfs) if nondefault_handlers: for x in chfs: val = handlers[x](file_location) if val != target.chksums[x]: raise errors.ChksumFailure( file_location, chksum=x, expected=target.chksums[x], value=val) else: desired_vals = [target.chksums[x] for x in chfs] calced = get_chksums(file_location, *chfs) for desired, got, chf in zip(desired_vals, calced, chfs): if desired != got: raise errors.ChksumFailure( file_location, chksum=chf, expected=desired, value=got)
def __init__(self, fname=None, **chksums): # if provided fname, calculate checksums from the given file. if fname is not None: if not os.path.exists(fname): raise ChksumException('File not found: %s' % fname) values = get_chksums(fname, *self.algorithms) chksums = zip(self.algorithms, values) # if provided checksums, use them else: chksums = chksums.items() # validate checksums, and set attributes tmp_algorithms = list(self.algorithms) for algorithm, chksum in chksums: if algorithm not in tmp_algorithms: raise ChksumException('Invalid checksum algorithm: %s' % \ algorithm) setattr(self, algorithm, ChksumValue(algorithm, chksum)) tmp_algorithms.remove(algorithm) if len(tmp_algorithms) > 0: raise ChksumException('Missing checksums: %s' % \ ', '.join(tmp_algorithms))
def update_from_xpak(self, pkg, xpak): # invert the lookups here; if you do .items() on an xpak, # it'll load up the contents in full. new_dict = {k: xpak[k] for k in self._known_keys if k in xpak} new_dict['_chf_'] = xpak._chf_ chfs = [x for x in self._stored_chfs if x != 'mtime'] for key, value in zip(chfs, get_chksums(pkg.path, *chfs)): if key != 'size': value = "%x" % (value, ) new_dict[key.upper()] = value self[pkg.cpvstr] = new_dict return new_dict
def validate(distfile): # validate format found = False for _format in _supported_formats: if distfile.endswith(_format): found = True if not found: raise DiffUnsupported('Invalid distfile type: %s' % distfile) # validate size (XXX: Improve this. Make the max size a config opt) distfile_path = os.path.join(portage.settings['DISTDIR'], distfile) size = get_chksums(distfile_path, 'size')[0] if size > (300 * 1024 * 1024): # 300MB raise DiffUnsupported('Invalid distfile size: %s' % distfile)
def test_init(self): base.test_init(self) mkobj = self.make_obj o = mkobj(__file__) raw_data = open(__file__).read() self.assertEqual(o.data.text_fileobj().read(), raw_data) o = mkobj("/bin/this-file-should-not-exist-nor-be-read", data=data_source(raw_data)) self.assertEqual(o.data.text_fileobj().read(), raw_data) keys = o.chksums.keys() self.assertEqual([o.chksums[x] for x in keys], list(get_chksums(data_source(raw_data), *keys))) chksums = dict(o.chksums.iteritems()) self.assertEqual(sorted(mkobj(chksums=chksums).chksums.iteritems()), sorted(chksums.iteritems()))
def test_init(self): base.test_init(self) mkobj = self.make_obj o = mkobj(__file__) with open(__file__) as f: raw_data = f.read() self.assertEqual(o.data.text_fileobj().read(), raw_data) o = mkobj("/bin/this-file-should-not-exist-nor-be-read", data=data_source(raw_data)) self.assertEqual(o.data.text_fileobj().read(), raw_data) keys = list(o.chksums.keys()) self.assertEqual([o.chksums[x] for x in keys], list(get_chksums(data_source(raw_data), *keys))) chksums = dict(iter(o.chksums.items())) self.assertEqual(sorted(mkobj(chksums=chksums).chksums.items()), sorted(chksums.items()))
def _assemble_pkg_dict(cls, pkg): d = {} sequences = cls._pkg_attr_sequences for key in cls._stored_attrs: value = getattr(pkg, key) if key in sequences: value = ' '.join(sorted(value)) else: value = str(getattr(pkg, key)).strip() key = key.upper() d[cls._serialize_map.get(key, key)] = value for key, value in zip(cls._stored_chfs, get_chksums(pkg.path, *cls._stored_chfs)): if key != 'size': value = "%x" % (value, ) d[key.upper()] = value d["MTIME"] = str(os.stat(pkg.path).st_mtime) return d
def feed(self, pkgset): pkg = pkgset[0] pkg_path = pjoin(self.options.target_repo.location, pkg.category, pkg.package) ebuild_ext = '.ebuild' mismatched = [] invalid = [] unknown = [] # note we don't use os.walk, we need size info also for filename in listdir(pkg_path): path = pjoin(pkg_path, filename) if self.gitignored(path): continue if os.path.isfile(path) and os.stat(path).st_mode & 0o111: yield ExecutableFile(filename, pkg=pkg) # While this may seem odd, written this way such that the filtering # happens all in the genexp. If the result was being handed to any, # it's a frame switch each char, which adds up. banned_chars = set(filename) - allowed_filename_chars_set if banned_chars: yield BannedCharacter(filename, sorted(banned_chars), pkg=pkg) if filename.endswith(ebuild_ext): try: with open(path, mode='rb') as f: f.read(8192).decode() except UnicodeDecodeError as e: yield InvalidUTF8(filename, str(e), pkg=pkg) pkg_name = os.path.basename(filename[:-len(ebuild_ext)]) try: pkg_atom = atom_cls(f'={pkg.category}/{pkg_name}') if pkg_atom.package != os.path.basename(pkg_path): mismatched.append(pkg_name) except MalformedAtom: invalid.append(pkg_name) elif (self.options.gentoo_repo and filename not in ('Manifest', 'metadata.xml', 'files')): unknown.append(filename) if mismatched: yield MismatchedPN(sorted(mismatched), pkg=pkg) if invalid: yield InvalidPN(sorted(invalid), pkg=pkg) if unknown: yield UnknownPkgDirEntry(sorted(unknown), pkg=pkg) files_by_size = defaultdict(list) pkg_path_len = len(pkg_path) + 1 for root, dirs, files in os.walk(pjoin(pkg_path, 'files')): # don't visit any ignored directories for d in self.ignore_dirs.intersection(dirs): dirs.remove(d) base_dir = root[pkg_path_len:] for filename in files: path = pjoin(root, filename) if self.gitignored(path): continue file_stat = os.lstat(path) if stat.S_ISREG(file_stat.st_mode): if file_stat.st_mode & 0o111: yield ExecutableFile(pjoin(base_dir, filename), pkg=pkg) if file_stat.st_size == 0: yield EmptyFile(pjoin(base_dir, filename), pkg=pkg) else: files_by_size[file_stat.st_size].append(pjoin(base_dir, filename)) if file_stat.st_size > 20480: yield SizeViolation( pjoin(base_dir, filename), file_stat.st_size, pkg=pkg) banned_chars = set(filename) - allowed_filename_chars_set if banned_chars: yield BannedCharacter( pjoin(base_dir, filename), sorted(banned_chars), pkg=pkg) files_by_digest = defaultdict(list) for size, files in files_by_size.items(): if len(files) > 1: for f in files: digest = get_chksums(pjoin(pkg_path, f), self.digest_algo)[0] files_by_digest[digest].append(f) for digest, files in files_by_digest.items(): if len(files) > 1: yield DuplicateFiles(sorted(files), pkg=pkg)
def _chksum_callback(self, chfs): return list(zip(chfs, get_chksums(self.data, *chfs)))
def _verify(self, file_location, target, all_chksums=True, handlers=None): """ Internal function for derivatives. Digs through chksums, and either returns None, or throws an errors.FetchFailed exception. - -2: file doesn't exist. - -1: if (size chksum is available, and file is smaller than stated chksum) - 0: if all chksums match - 1: if file is too large (if size chksums are available) or else size is right but a chksum didn't match. if all_chksums is True, all chksums must be verified; if false, all a handler can be found for are used. """ nondefault_handlers = handlers if handlers is None: try: handlers = get_handlers(target.chksums) except KeyError: compatibility.raise_from(errors.FetchFailed( file_location, "Couldn't find a required checksum handler")) if all_chksums: missing = set(target.chksums).difference(handlers) if missing: raise errors.RequiredChksumDataMissing(target, *sorted(missing)) if "size" in handlers: val = handlers["size"](file_location) if val == -1: raise errors.MissingDistfile(file_location) c = cmp(val, target.chksums["size"]) if c: resumable = (c < 0) if resumable: msg = "File is too small." else: msg = "File is too big." raise errors.FetchFailed( file_location, msg, resumable=resumable) elif not os.path.exists(file_location): raise errors.MissingDistfile(file_location) elif not os.stat(file_location).st_size: raise errors.FetchFailed( file_location, 'file is empty', resumable=False) chfs = set(target.chksums).intersection(handlers) chfs.discard("size") chfs = list(chfs) if nondefault_handlers: for x in chfs: val = handlers[x](file_location) if val != target.chksums[x]: raise errors.FetchFailed( file_location, "Validation handler %s: expected %s, got %s" % (x, target.chksums[x], val)) else: desired_vals = [target.chksums[x] for x in chfs] calced = get_chksums(file_location, *chfs) for desired, got, chf in zip(desired_vals, calced, chfs): if desired != got: raise errors.FetchFailed( file_location, "Validation handler %s: expected %s, got %s" % (chf, desired, got))
def _chksum_callback(self, chfs): return zip(chfs, get_chksums(self.data, *chfs))
def _verify(self, file_location, target, all_chksums=True, handlers=None): """ Internal function for derivatives. Digs through chksums, and either returns None, or throws an errors.FetchFailed exception. - -2: file doesn't exist. - -1: if (size chksum is available, and file is smaller than stated chksum) - 0: if all chksums match - 1: if file is too large (if size chksums are available) or else size is right but a chksum didn't match. if all_chksums is True, all chksums must be verified; if false, all a handler can be found for are used. """ nondefault_handlers = handlers if handlers is None: try: handlers = get_handlers(target.chksums) except KeyError: compatibility.raise_from( errors.FetchFailed( file_location, "Couldn't find a required checksum handler")) if all_chksums: missing = set(target.chksums).difference(handlers) if missing: raise errors.RequiredChksumDataMissing(target, *sorted(missing)) if "size" in handlers: val = handlers["size"](file_location) if val == -1: raise errors.MissingDistfile(file_location) c = cmp(val, target.chksums["size"]) if c: resumable = (c < 0) if resumable: msg = "File is too small." else: msg = "File is too big." raise errors.FetchFailed(file_location, msg, resumable=resumable) elif not os.path.exists(file_location): raise errors.MissingDistfile(file_location) elif not os.stat(file_location).st_size: raise errors.FetchFailed(file_location, 'file is empty', resumable=False) chfs = set(target.chksums).intersection(handlers) chfs.discard("size") chfs = list(chfs) if nondefault_handlers: for x in chfs: val = handlers[x](file_location) if val != target.chksums[x]: raise errors.FetchFailed( file_location, "Validation handler %s: expected %s, got %s" % (x, target.chksums[x], val)) else: desired_vals = [target.chksums[x] for x in chfs] calced = get_chksums(file_location, *chfs) for desired, got, chf in zip(desired_vals, calced, chfs): if desired != got: raise errors.FetchFailed( file_location, "Validation handler %s: expected %s, got %s" % (chf, desired, got))
def _cmd_implementation_digests(self, domain, matches, observer, mirrors=False, force=False): manifest_config = self.repo.config.manifests if manifest_config.disabled: observer.info(f"repo {self.repo.repo_id} has manifests disabled") return required_chksums = set(manifest_config.required_hashes) write_chksums = manifest_config.hashes distdir = domain.fetcher.distdir ret = set() for key_query in sorted( set(match.unversioned_atom for match in matches)): pkgs = self.repo.match(key_query) # check for pkgs masked by bad metadata bad_metadata = self.repo._bad_masked.match(key_query) if bad_metadata: for pkg in bad_metadata: e = pkg.data error_str = f"{pkg.cpvstr}: {e.msg(verbosity=observer.verbosity)}" observer.error(error_str) ret.add(key_query) continue # Check for bad ebuilds -- mismatched or invalid PNs won't be # matched by regular restrictions so they will otherwise be # ignored. ebuilds = { x for x in listdir_files( pjoin(self.repo.location, str(key_query))) if x.endswith('.ebuild') } unknown_ebuilds = ebuilds.difference( os.path.basename(x.path) for x in pkgs) if unknown_ebuilds: error_str = ( f"{key_query}: invalid ebuild{_pl(unknown_ebuilds)}: " f"{', '.join(unknown_ebuilds)}") observer.error(error_str) ret.add(key_query) continue # empty package dir if not pkgs: continue manifest = pkgs[0].manifest # all pkgdir fetchables pkgdir_fetchables = {} for pkg in pkgs: pkgdir_fetchables.update({ fetchable.filename: fetchable for fetchable in iflatten_instance( pkg._get_attr['fetchables'] (pkg, allow_missing_checksums=True, skip_default_mirrors=(not mirrors)), fetch.fetchable) }) # fetchables targeted for (re-)manifest generation fetchables = {} chksum_set = set(write_chksums) for filename, fetchable in pkgdir_fetchables.items(): if force or not required_chksums.issubset(fetchable.chksums): fetchable.chksums = { k: v for k, v in fetchable.chksums.items() if k in chksum_set } fetchables[filename] = fetchable # Manifest files aren't necessary with thin manifests and no distfiles if manifest_config.thin and not pkgdir_fetchables: if os.path.exists(manifest.path): try: os.remove(manifest.path) except EnvironmentError as e: observer.error( 'failed removing old manifest: ' f'{key_query}::{self.repo.repo_id}: {e}') ret.add(key_query) continue # Manifest file is current and not forcing a refresh if not force and manifest.distfiles.keys( ) == pkgdir_fetchables.keys(): continue pkg_ops = domain.pkg_operations(pkgs[0], observer=observer) if not pkg_ops.supports("fetch"): observer.error( f"pkg {pkg} doesn't support fetching, can't generate manifest" ) ret.add(key_query) continue # fetch distfiles if not pkg_ops.fetch(list(fetchables.values()), observer): ret.add(key_query) continue # calculate checksums for fetched distfiles try: for fetchable in fetchables.values(): chksums = chksum.get_chksums( pjoin(distdir, fetchable.filename), *write_chksums) fetchable.chksums = dict(zip(write_chksums, chksums)) except chksum.MissingChksumHandler as e: observer.error(f'failed generating chksum: {e}') ret.add(key_query) break if key_query not in ret: fetchables.update(pkgdir_fetchables) observer.info( f"generating manifest: {key_query}::{self.repo.repo_id}") manifest.update(sorted(fetchables.values()), chfs=write_chksums) return ret
def _cmd_implementation_manifest(self, domain, restriction, observer, mirrors=False, force=False, distdir=None): manifest_config = self.repo.config.manifests if manifest_config.disabled: observer.info(f'{self.repo.repo_id} repo has manifests disabled') return required_chksums = set(manifest_config.required_hashes) write_chksums = manifest_config.hashes if distdir is None: distdir = domain.distdir ret = set() matches = self.repo.itermatch(restriction, sorter=sorted) for pkgs in map(list, pkgutils.groupby_pkg(matches)): key = pkgs[0].key manifest = pkgs[0].manifest # check for pkgs masked by bad metadata if bad_metadata := self.repo._bad_masked.match(pkgs[0].unversioned_atom): for pkg in bad_metadata: e = pkg.data error_str = f"{pkg.cpvstr}: {e.msg(verbosity=observer.verbosity)}" observer.error(error_str) ret.add(pkg.key) continue # all pkgdir fetchables pkgdir_fetchables = {} for pkg in pkgs: pkgdir_fetchables.update({ fetchable.filename: fetchable for fetchable in iflatten_instance(pkg.generate_fetchables( allow_missing_checksums=True, skip_default_mirrors=(not mirrors)), fetch.fetchable) }) # fetchables targeted for (re-)manifest generation fetchables = {} chksum_set = set(write_chksums) for filename, fetchable in pkgdir_fetchables.items(): if force or not required_chksums.issubset(fetchable.chksums): fetchable.chksums = { k: v for k, v in fetchable.chksums.items() if k in chksum_set} fetchables[filename] = fetchable # Manifest files aren't necessary with thin manifests and no distfiles if manifest_config.thin and not pkgdir_fetchables: if os.path.exists(manifest.path): try: os.remove(manifest.path) except EnvironmentError as e: observer.error( 'failed removing old manifest: ' f'{key}::{self.repo.repo_id}: {e}') ret.add(key) continue # Manifest file is current and not forcing a refresh if not force and manifest.distfiles.keys() == pkgdir_fetchables.keys(): continue # fetch distfiles pkg_ops = domain.pkg_operations(pkg, observer=observer) if not pkg_ops.fetch(list(fetchables.values()), observer, distdir=distdir): ret.add(key) continue # calculate checksums for fetched distfiles try: for fetchable in fetchables.values(): chksums = chksum.get_chksums( pjoin(distdir, fetchable.filename), *write_chksums) fetchable.chksums = dict(zip(write_chksums, chksums)) except chksum.MissingChksumHandler as e: observer.error(f'failed generating chksum: {e}') ret.add(key) break if key not in ret: fetchables.update(pkgdir_fetchables) observer.info(f"generating manifest: {key}::{self.repo.repo_id}") manifest.update(sorted(fetchables.values()), chfs=write_chksums)
def _verify(self, file_location, target, all_chksums=True, handlers=None): """Internal function for derivatives. Digs through chksums, and either returns None, or throws an errors.FetchFailed exception. - -2: file doesn't exist. - -1: if (size chksum is available, and file is smaller than stated chksum) - 0: if all chksums match - 1: if file is too large (if size chksums are available) or else size is right but a chksum didn't match. if all_chksums is True, all chksums must be verified; if false, all a handler can be found for are used. """ nondefault_handlers = handlers if handlers is None: try: handlers = get_handlers(target.chksums) except MissingChksumHandler as e: raise errors.MissingChksumHandler( f'missing required checksum handler: {e}') if all_chksums: missing = set(target.chksums).difference(handlers) if missing: raise errors.RequiredChksumDataMissing(target, *sorted(missing)) if "size" in handlers: val = handlers["size"](file_location) if val == -1: raise errors.MissingDistfile(file_location) if val != target.chksums["size"]: if val < target.chksums["size"]: raise errors.FetchFailed(file_location, 'file is too small', resumable=True) raise errors.ChksumFailure(file_location, chksum='size', expected=target.chksums["size"], value=val) elif not os.path.exists(file_location): raise errors.MissingDistfile(file_location) elif not os.stat(file_location).st_size: raise errors.FetchFailed(file_location, 'file is empty', resumable=False) chfs = set(target.chksums).intersection(handlers) chfs.discard("size") chfs = list(chfs) if nondefault_handlers: for x in chfs: val = handlers[x](file_location) if val != target.chksums[x]: raise errors.ChksumFailure(file_location, chksum=x, expected=target.chksums[x], value=val) else: desired_vals = [target.chksums[x] for x in chfs] calced = get_chksums(file_location, *chfs) for desired, got, chf in zip(desired_vals, calced, chfs): if desired != got: raise errors.ChksumFailure(file_location, chksum=chf, expected=desired, value=got)