Пример #1
0
    def _verify(self, file_location, target, all_chksums=True, handlers=None):
        """Internal function for derivatives.

        Digs through chksums, and either returns None, or throws an
        errors.FetchFailed exception.
          - -2: file doesn't exist.
          - -1: if (size chksum is available, and
                file is smaller than stated chksum)
          - 0:  if all chksums match
          - 1:  if file is too large (if size chksums are available)
                or else size is right but a chksum didn't match.

        if all_chksums is True, all chksums must be verified; if false, all
        a handler can be found for are used.
        """

        nondefault_handlers = handlers
        if handlers is None:
            try:
                handlers = get_handlers(target.chksums)
            except MissingChksumHandler as e:
                raise errors.MissingChksumHandler(
                    f'missing required checksum handler: {e}')
        if all_chksums:
            missing = set(target.chksums).difference(handlers)
            if missing:
                raise errors.RequiredChksumDataMissing(target, *sorted(missing))

        if "size" in handlers:
            val = handlers["size"](file_location)
            if val == -1:
                raise errors.MissingDistfile(file_location)
            if val != target.chksums["size"]:
                if val < target.chksums["size"]:
                    raise errors.FetchFailed(
                        file_location, 'file is too small', resumable=True)
                raise errors.ChksumFailure(
                    file_location, chksum='size', expected=target.chksums["size"], value=val)
        elif not os.path.exists(file_location):
            raise errors.MissingDistfile(file_location)
        elif not os.stat(file_location).st_size:
            raise errors.FetchFailed(
                file_location, 'file is empty', resumable=False)

        chfs = set(target.chksums).intersection(handlers)
        chfs.discard("size")
        chfs = list(chfs)
        if nondefault_handlers:
            for x in chfs:
                val = handlers[x](file_location)
                if val != target.chksums[x]:
                    raise errors.ChksumFailure(
                        file_location, chksum=x, expected=target.chksums[x], value=val)
        else:
            desired_vals = [target.chksums[x] for x in chfs]
            calced = get_chksums(file_location, *chfs)
            for desired, got, chf in zip(desired_vals, calced, chfs):
                if desired != got:
                    raise errors.ChksumFailure(
                        file_location, chksum=chf, expected=desired, value=got)
Пример #2
0
    def __init__(self, fname=None, **chksums):

        # if provided fname, calculate checksums from the given file.
        if fname is not None:

            if not os.path.exists(fname):
                raise ChksumException('File not found: %s' % fname)

            values = get_chksums(fname, *self.algorithms)
            chksums = zip(self.algorithms, values)

        # if provided checksums, use them
        else:
            chksums = chksums.items()

        # validate checksums, and set attributes
        tmp_algorithms = list(self.algorithms)
        for algorithm, chksum in chksums:
            if algorithm not in tmp_algorithms:
                raise ChksumException('Invalid checksum algorithm: %s' % \
                                      algorithm)
            setattr(self, algorithm, ChksumValue(algorithm, chksum))
            tmp_algorithms.remove(algorithm)
        if len(tmp_algorithms) > 0:
            raise ChksumException('Missing checksums: %s' % \
                              ', '.join(tmp_algorithms))
Пример #3
0
 def update_from_xpak(self, pkg, xpak):
     # invert the lookups here; if you do .items() on an xpak,
     # it'll load up the contents in full.
     new_dict = {k: xpak[k] for k in self._known_keys if k in xpak}
     new_dict['_chf_'] = xpak._chf_
     chfs = [x for x in self._stored_chfs if x != 'mtime']
     for key, value in zip(chfs, get_chksums(pkg.path, *chfs)):
         if key != 'size':
             value = "%x" % (value, )
         new_dict[key.upper()] = value
     self[pkg.cpvstr] = new_dict
     return new_dict
Пример #4
0
        def validate(distfile):

            # validate format
            found = False
            for _format in _supported_formats:
                if distfile.endswith(_format):
                    found = True
            if not found:
                raise DiffUnsupported('Invalid distfile type: %s' % distfile)

            # validate size (XXX: Improve this. Make the max size a config opt)
            distfile_path = os.path.join(portage.settings['DISTDIR'],
                                         distfile)
            size = get_chksums(distfile_path, 'size')[0]
            if size > (300 * 1024 * 1024):  # 300MB
                raise DiffUnsupported('Invalid distfile size: %s' % distfile)
Пример #5
0
    def test_init(self):
        base.test_init(self)
        mkobj = self.make_obj
        o = mkobj(__file__)
        raw_data = open(__file__).read()
        self.assertEqual(o.data.text_fileobj().read(), raw_data)
        o = mkobj("/bin/this-file-should-not-exist-nor-be-read",
            data=data_source(raw_data))
        self.assertEqual(o.data.text_fileobj().read(), raw_data)
        keys = o.chksums.keys()
        self.assertEqual([o.chksums[x] for x in keys],
            list(get_chksums(data_source(raw_data), *keys)))

        chksums = dict(o.chksums.iteritems())
        self.assertEqual(sorted(mkobj(chksums=chksums).chksums.iteritems()),
            sorted(chksums.iteritems()))
Пример #6
0
    def test_init(self):
        base.test_init(self)
        mkobj = self.make_obj
        o = mkobj(__file__)
        with open(__file__) as f:
            raw_data = f.read()
        self.assertEqual(o.data.text_fileobj().read(), raw_data)
        o = mkobj("/bin/this-file-should-not-exist-nor-be-read",
            data=data_source(raw_data))
        self.assertEqual(o.data.text_fileobj().read(), raw_data)
        keys = list(o.chksums.keys())
        self.assertEqual([o.chksums[x] for x in keys],
            list(get_chksums(data_source(raw_data), *keys)))

        chksums = dict(iter(o.chksums.items()))
        self.assertEqual(sorted(mkobj(chksums=chksums).chksums.items()),
            sorted(chksums.items()))
Пример #7
0
    def _assemble_pkg_dict(cls, pkg):
        d = {}
        sequences = cls._pkg_attr_sequences
        for key in cls._stored_attrs:

            value = getattr(pkg, key)
            if key in sequences:
                value = ' '.join(sorted(value))
            else:
                value = str(getattr(pkg, key)).strip()
            key = key.upper()
            d[cls._serialize_map.get(key, key)] = value

        for key, value in zip(cls._stored_chfs,
                              get_chksums(pkg.path, *cls._stored_chfs)):
            if key != 'size':
                value = "%x" % (value, )
            d[key.upper()] = value
        d["MTIME"] = str(os.stat(pkg.path).st_mtime)
        return d
Пример #8
0
    def feed(self, pkgset):
        pkg = pkgset[0]
        pkg_path = pjoin(self.options.target_repo.location, pkg.category, pkg.package)
        ebuild_ext = '.ebuild'
        mismatched = []
        invalid = []
        unknown = []
        # note we don't use os.walk, we need size info also
        for filename in listdir(pkg_path):
            path = pjoin(pkg_path, filename)

            if self.gitignored(path):
                continue

            if os.path.isfile(path) and  os.stat(path).st_mode & 0o111:
                yield ExecutableFile(filename, pkg=pkg)

            # While this may seem odd, written this way such that the filtering
            # happens all in the genexp. If the result was being handed to any,
            # it's a frame switch each char, which adds up.
            banned_chars = set(filename) - allowed_filename_chars_set
            if banned_chars:
                yield BannedCharacter(filename, sorted(banned_chars), pkg=pkg)

            if filename.endswith(ebuild_ext):
                try:
                    with open(path, mode='rb') as f:
                        f.read(8192).decode()
                except UnicodeDecodeError as e:
                    yield InvalidUTF8(filename, str(e), pkg=pkg)

                pkg_name = os.path.basename(filename[:-len(ebuild_ext)])
                try:
                    pkg_atom = atom_cls(f'={pkg.category}/{pkg_name}')
                    if pkg_atom.package != os.path.basename(pkg_path):
                        mismatched.append(pkg_name)
                except MalformedAtom:
                    invalid.append(pkg_name)
            elif (self.options.gentoo_repo and
                    filename not in ('Manifest', 'metadata.xml', 'files')):
                unknown.append(filename)

        if mismatched:
            yield MismatchedPN(sorted(mismatched), pkg=pkg)
        if invalid:
            yield InvalidPN(sorted(invalid), pkg=pkg)
        if unknown:
            yield UnknownPkgDirEntry(sorted(unknown), pkg=pkg)

        files_by_size = defaultdict(list)
        pkg_path_len = len(pkg_path) + 1
        for root, dirs, files in os.walk(pjoin(pkg_path, 'files')):
            # don't visit any ignored directories
            for d in self.ignore_dirs.intersection(dirs):
                dirs.remove(d)
            base_dir = root[pkg_path_len:]
            for filename in files:
                path = pjoin(root, filename)
                if self.gitignored(path):
                    continue
                file_stat = os.lstat(path)
                if stat.S_ISREG(file_stat.st_mode):
                    if file_stat.st_mode & 0o111:
                        yield ExecutableFile(pjoin(base_dir, filename), pkg=pkg)
                    if file_stat.st_size == 0:
                        yield EmptyFile(pjoin(base_dir, filename), pkg=pkg)
                    else:
                        files_by_size[file_stat.st_size].append(pjoin(base_dir, filename))
                        if file_stat.st_size > 20480:
                            yield SizeViolation(
                                pjoin(base_dir, filename), file_stat.st_size, pkg=pkg)
                    banned_chars = set(filename) - allowed_filename_chars_set
                    if banned_chars:
                        yield BannedCharacter(
                            pjoin(base_dir, filename), sorted(banned_chars), pkg=pkg)

        files_by_digest = defaultdict(list)
        for size, files in files_by_size.items():
            if len(files) > 1:
                for f in files:
                    digest = get_chksums(pjoin(pkg_path, f), self.digest_algo)[0]
                    files_by_digest[digest].append(f)

        for digest, files in files_by_digest.items():
            if len(files) > 1:
                yield DuplicateFiles(sorted(files), pkg=pkg)
Пример #9
0
 def _chksum_callback(self, chfs):
     return list(zip(chfs, get_chksums(self.data, *chfs)))
Пример #10
0
    def _verify(self, file_location, target, all_chksums=True, handlers=None):
        """
        Internal function for derivatives.

        Digs through chksums, and either returns None, or throws an
        errors.FetchFailed exception.
          - -2: file doesn't exist.
          - -1: if (size chksum is available, and
                file is smaller than stated chksum)
          - 0:  if all chksums match
          - 1:  if file is too large (if size chksums are available)
                or else size is right but a chksum didn't match.

        if all_chksums is True, all chksums must be verified; if false, all
        a handler can be found for are used.
        """

        nondefault_handlers = handlers
        if handlers is None:
            try:
                handlers = get_handlers(target.chksums)
            except KeyError:
                compatibility.raise_from(errors.FetchFailed(
                    file_location, "Couldn't find a required checksum handler"))
        if all_chksums:
            missing = set(target.chksums).difference(handlers)
            if missing:
                raise errors.RequiredChksumDataMissing(target, *sorted(missing))

        if "size" in handlers:
            val = handlers["size"](file_location)
            if val == -1:
                raise errors.MissingDistfile(file_location)
            c = cmp(val, target.chksums["size"])
            if c:
                resumable = (c < 0)
                if resumable:
                    msg = "File is too small."
                else:
                    msg = "File is too big."
                raise errors.FetchFailed(
                    file_location, msg, resumable=resumable)
        elif not os.path.exists(file_location):
            raise errors.MissingDistfile(file_location)
        elif not os.stat(file_location).st_size:
            raise errors.FetchFailed(
                file_location, 'file is empty', resumable=False)

        chfs = set(target.chksums).intersection(handlers)
        chfs.discard("size")
        chfs = list(chfs)
        if nondefault_handlers:
            for x in chfs:
                val = handlers[x](file_location)
                if val != target.chksums[x]:
                    raise errors.FetchFailed(
                        file_location,
                        "Validation handler %s: expected %s, got %s" %
                        (x, target.chksums[x], val))
        else:
            desired_vals = [target.chksums[x] for x in chfs]
            calced = get_chksums(file_location, *chfs)
            for desired, got, chf in zip(desired_vals, calced, chfs):
                if desired != got:
                    raise errors.FetchFailed(
                        file_location,
                        "Validation handler %s: expected %s, got %s" %
                        (chf, desired, got))
Пример #11
0
 def _chksum_callback(self, chfs):
     return zip(chfs, get_chksums(self.data, *chfs))
Пример #12
0
    def _verify(self, file_location, target, all_chksums=True, handlers=None):
        """
        Internal function for derivatives.

        Digs through chksums, and either returns None, or throws an
        errors.FetchFailed exception.
          - -2: file doesn't exist.
          - -1: if (size chksum is available, and
                file is smaller than stated chksum)
          - 0:  if all chksums match
          - 1:  if file is too large (if size chksums are available)
                or else size is right but a chksum didn't match.

        if all_chksums is True, all chksums must be verified; if false, all
        a handler can be found for are used.
        """

        nondefault_handlers = handlers
        if handlers is None:
            try:
                handlers = get_handlers(target.chksums)
            except KeyError:
                compatibility.raise_from(
                    errors.FetchFailed(
                        file_location,
                        "Couldn't find a required checksum handler"))
        if all_chksums:
            missing = set(target.chksums).difference(handlers)
            if missing:
                raise errors.RequiredChksumDataMissing(target,
                                                       *sorted(missing))

        if "size" in handlers:
            val = handlers["size"](file_location)
            if val == -1:
                raise errors.MissingDistfile(file_location)
            c = cmp(val, target.chksums["size"])
            if c:
                resumable = (c < 0)
                if resumable:
                    msg = "File is too small."
                else:
                    msg = "File is too big."
                raise errors.FetchFailed(file_location,
                                         msg,
                                         resumable=resumable)
        elif not os.path.exists(file_location):
            raise errors.MissingDistfile(file_location)
        elif not os.stat(file_location).st_size:
            raise errors.FetchFailed(file_location,
                                     'file is empty',
                                     resumable=False)

        chfs = set(target.chksums).intersection(handlers)
        chfs.discard("size")
        chfs = list(chfs)
        if nondefault_handlers:
            for x in chfs:
                val = handlers[x](file_location)
                if val != target.chksums[x]:
                    raise errors.FetchFailed(
                        file_location,
                        "Validation handler %s: expected %s, got %s" %
                        (x, target.chksums[x], val))
        else:
            desired_vals = [target.chksums[x] for x in chfs]
            calced = get_chksums(file_location, *chfs)
            for desired, got, chf in zip(desired_vals, calced, chfs):
                if desired != got:
                    raise errors.FetchFailed(
                        file_location,
                        "Validation handler %s: expected %s, got %s" %
                        (chf, desired, got))
Пример #13
0
    def _cmd_implementation_digests(self,
                                    domain,
                                    matches,
                                    observer,
                                    mirrors=False,
                                    force=False):
        manifest_config = self.repo.config.manifests
        if manifest_config.disabled:
            observer.info(f"repo {self.repo.repo_id} has manifests disabled")
            return
        required_chksums = set(manifest_config.required_hashes)
        write_chksums = manifest_config.hashes
        distdir = domain.fetcher.distdir
        ret = set()

        for key_query in sorted(
                set(match.unversioned_atom for match in matches)):
            pkgs = self.repo.match(key_query)

            # check for pkgs masked by bad metadata
            bad_metadata = self.repo._bad_masked.match(key_query)
            if bad_metadata:
                for pkg in bad_metadata:
                    e = pkg.data
                    error_str = f"{pkg.cpvstr}: {e.msg(verbosity=observer.verbosity)}"
                    observer.error(error_str)
                    ret.add(key_query)
                continue

            # Check for bad ebuilds -- mismatched or invalid PNs won't be
            # matched by regular restrictions so they will otherwise be
            # ignored.
            ebuilds = {
                x
                for x in listdir_files(
                    pjoin(self.repo.location, str(key_query)))
                if x.endswith('.ebuild')
            }
            unknown_ebuilds = ebuilds.difference(
                os.path.basename(x.path) for x in pkgs)
            if unknown_ebuilds:
                error_str = (
                    f"{key_query}: invalid ebuild{_pl(unknown_ebuilds)}: "
                    f"{', '.join(unknown_ebuilds)}")
                observer.error(error_str)
                ret.add(key_query)
                continue

            # empty package dir
            if not pkgs:
                continue

            manifest = pkgs[0].manifest

            # all pkgdir fetchables
            pkgdir_fetchables = {}
            for pkg in pkgs:
                pkgdir_fetchables.update({
                    fetchable.filename: fetchable
                    for fetchable in iflatten_instance(
                        pkg._get_attr['fetchables']
                        (pkg,
                         allow_missing_checksums=True,
                         skip_default_mirrors=(not mirrors)), fetch.fetchable)
                })

            # fetchables targeted for (re-)manifest generation
            fetchables = {}
            chksum_set = set(write_chksums)
            for filename, fetchable in pkgdir_fetchables.items():
                if force or not required_chksums.issubset(fetchable.chksums):
                    fetchable.chksums = {
                        k: v
                        for k, v in fetchable.chksums.items()
                        if k in chksum_set
                    }
                    fetchables[filename] = fetchable

            # Manifest files aren't necessary with thin manifests and no distfiles
            if manifest_config.thin and not pkgdir_fetchables:
                if os.path.exists(manifest.path):
                    try:
                        os.remove(manifest.path)
                    except EnvironmentError as e:
                        observer.error(
                            'failed removing old manifest: '
                            f'{key_query}::{self.repo.repo_id}: {e}')
                        ret.add(key_query)
                continue

            # Manifest file is current and not forcing a refresh
            if not force and manifest.distfiles.keys(
            ) == pkgdir_fetchables.keys():
                continue

            pkg_ops = domain.pkg_operations(pkgs[0], observer=observer)
            if not pkg_ops.supports("fetch"):
                observer.error(
                    f"pkg {pkg} doesn't support fetching, can't generate manifest"
                )
                ret.add(key_query)
                continue

            # fetch distfiles
            if not pkg_ops.fetch(list(fetchables.values()), observer):
                ret.add(key_query)
                continue

            # calculate checksums for fetched distfiles
            try:
                for fetchable in fetchables.values():
                    chksums = chksum.get_chksums(
                        pjoin(distdir, fetchable.filename), *write_chksums)
                    fetchable.chksums = dict(zip(write_chksums, chksums))
            except chksum.MissingChksumHandler as e:
                observer.error(f'failed generating chksum: {e}')
                ret.add(key_query)
                break

            if key_query not in ret:
                fetchables.update(pkgdir_fetchables)
                observer.info(
                    f"generating manifest: {key_query}::{self.repo.repo_id}")
                manifest.update(sorted(fetchables.values()),
                                chfs=write_chksums)

        return ret
Пример #14
0
    def _cmd_implementation_manifest(self, domain, restriction, observer,
                                     mirrors=False, force=False, distdir=None):
        manifest_config = self.repo.config.manifests
        if manifest_config.disabled:
            observer.info(f'{self.repo.repo_id} repo has manifests disabled')
            return
        required_chksums = set(manifest_config.required_hashes)
        write_chksums = manifest_config.hashes

        if distdir is None:
            distdir = domain.distdir
        ret = set()

        matches = self.repo.itermatch(restriction, sorter=sorted)
        for pkgs in map(list, pkgutils.groupby_pkg(matches)):
            key = pkgs[0].key
            manifest = pkgs[0].manifest

            # check for pkgs masked by bad metadata
            if bad_metadata := self.repo._bad_masked.match(pkgs[0].unversioned_atom):
                for pkg in bad_metadata:
                    e = pkg.data
                    error_str = f"{pkg.cpvstr}: {e.msg(verbosity=observer.verbosity)}"
                    observer.error(error_str)
                    ret.add(pkg.key)
                continue

            # all pkgdir fetchables
            pkgdir_fetchables = {}
            for pkg in pkgs:
                pkgdir_fetchables.update({
                    fetchable.filename: fetchable for fetchable in
                    iflatten_instance(pkg.generate_fetchables(
                        allow_missing_checksums=True,
                        skip_default_mirrors=(not mirrors)),
                        fetch.fetchable)
                })

            # fetchables targeted for (re-)manifest generation
            fetchables = {}
            chksum_set = set(write_chksums)
            for filename, fetchable in pkgdir_fetchables.items():
                if force or not required_chksums.issubset(fetchable.chksums):
                    fetchable.chksums = {
                        k: v for k, v in fetchable.chksums.items() if k in chksum_set}
                    fetchables[filename] = fetchable

            # Manifest files aren't necessary with thin manifests and no distfiles
            if manifest_config.thin and not pkgdir_fetchables:
                if os.path.exists(manifest.path):
                    try:
                        os.remove(manifest.path)
                    except EnvironmentError as e:
                        observer.error(
                            'failed removing old manifest: '
                            f'{key}::{self.repo.repo_id}: {e}')
                        ret.add(key)
                continue

            # Manifest file is current and not forcing a refresh
            if not force and manifest.distfiles.keys() == pkgdir_fetchables.keys():
                continue

            # fetch distfiles
            pkg_ops = domain.pkg_operations(pkg, observer=observer)
            if not pkg_ops.fetch(list(fetchables.values()), observer, distdir=distdir):
                ret.add(key)
                continue

            # calculate checksums for fetched distfiles
            try:
                for fetchable in fetchables.values():
                    chksums = chksum.get_chksums(
                        pjoin(distdir, fetchable.filename), *write_chksums)
                    fetchable.chksums = dict(zip(write_chksums, chksums))
            except chksum.MissingChksumHandler as e:
                observer.error(f'failed generating chksum: {e}')
                ret.add(key)
                break

            if key not in ret:
                fetchables.update(pkgdir_fetchables)
                observer.info(f"generating manifest: {key}::{self.repo.repo_id}")
                manifest.update(sorted(fetchables.values()), chfs=write_chksums)
Пример #15
0
    def _verify(self, file_location, target, all_chksums=True, handlers=None):
        """Internal function for derivatives.

        Digs through chksums, and either returns None, or throws an
        errors.FetchFailed exception.
          - -2: file doesn't exist.
          - -1: if (size chksum is available, and
                file is smaller than stated chksum)
          - 0:  if all chksums match
          - 1:  if file is too large (if size chksums are available)
                or else size is right but a chksum didn't match.

        if all_chksums is True, all chksums must be verified; if false, all
        a handler can be found for are used.
        """

        nondefault_handlers = handlers
        if handlers is None:
            try:
                handlers = get_handlers(target.chksums)
            except MissingChksumHandler as e:
                raise errors.MissingChksumHandler(
                    f'missing required checksum handler: {e}')
        if all_chksums:
            missing = set(target.chksums).difference(handlers)
            if missing:
                raise errors.RequiredChksumDataMissing(target,
                                                       *sorted(missing))

        if "size" in handlers:
            val = handlers["size"](file_location)
            if val == -1:
                raise errors.MissingDistfile(file_location)
            if val != target.chksums["size"]:
                if val < target.chksums["size"]:
                    raise errors.FetchFailed(file_location,
                                             'file is too small',
                                             resumable=True)
                raise errors.ChksumFailure(file_location,
                                           chksum='size',
                                           expected=target.chksums["size"],
                                           value=val)
        elif not os.path.exists(file_location):
            raise errors.MissingDistfile(file_location)
        elif not os.stat(file_location).st_size:
            raise errors.FetchFailed(file_location,
                                     'file is empty',
                                     resumable=False)

        chfs = set(target.chksums).intersection(handlers)
        chfs.discard("size")
        chfs = list(chfs)
        if nondefault_handlers:
            for x in chfs:
                val = handlers[x](file_location)
                if val != target.chksums[x]:
                    raise errors.ChksumFailure(file_location,
                                               chksum=x,
                                               expected=target.chksums[x],
                                               value=val)
        else:
            desired_vals = [target.chksums[x] for x in chfs]
            calced = get_chksums(file_location, *chfs)
            for desired, got, chf in zip(desired_vals, calced, chfs):
                if desired != got:
                    raise errors.ChksumFailure(file_location,
                                               chksum=chf,
                                               expected=desired,
                                               value=got)