Пример #1
0
    def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]:
        root = xml.etree.ElementTree.parse(path)

        repository = root.find('{http://www.openpkg.org/xml-rdf-index/0.9}Repository')

        assert(repository is not None)

        for item in repository.findall('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description'):
            with factory.begin() as pkg:
                pkg.add_name(safe_findtext(item, '{http://www.openpkg.org/xml-rdf-index/0.9}Name'), NameType.SRCRPM_NAME)
                pkg.set_version(safe_findtext(item, '{http://www.openpkg.org/xml-rdf-index/0.9}Version'))
                pkg.add_licenses(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}License'))
                pkg.set_summary(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}Summary'))
                pkg.add_categories(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}Group'))
                pkg.add_homepages(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}URL'))

                for source in safe_findalltexts(item, './{http://www.openpkg.org/xml-rdf-index/0.9}Source/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}bag/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}li'):
                    if (source.startswith('https://') or source.startswith('http://') or source.startswith('ftp://')) and 'openpkg.org' not in source:
                        pkg.add_downloads(source)

                release = safe_findtext(item, '{http://www.openpkg.org/xml-rdf-index/0.9}Release')
                if pkg.version.endswith(release):
                    pkg.set_flags(PackageFlags.UNTRUSTED)

                yield pkg
Пример #2
0
    def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]:
        licenses: dict[str, str] = {}
        packages: dict[str, PackageData] = {}

        for entry in iter_xml_elements_at_level(path, 1, ['license', 'package', 'version']):
            if entry.tag == 'license':
                licenses[safe_getattr(entry, 'name')] = safe_findtext(entry, 'title')
            elif entry.tag == 'package':
                packages[safe_getattr(entry, 'name')] = PackageData(
                    safe_findtext(entry, 'title'),
                    safe_findalltexts(entry, 'license'),
                    safe_findalltexts(entry, 'category'),
                    safe_findalltexts(entry, 'url'),
                )
            elif entry.tag == 'version':
                pkgname = safe_getattr(entry, 'package')
                version = safe_getattr(entry, 'name')

                with factory.begin(pkgname + ' ' + version) as pkg:
                    # XXX: package naming is inconsistent (either plain name like kdenlive or
                    # domain prefixed like com.abisource.abiword), but it's assumed that
                    # everything up to the last dot may be stripped (#863)
                    pkg.add_name(packages[pkgname].title, NameType.NPACKD_TITLE)
                    pkg.add_name(pkgname, NameType.NPACKD_FULLNAME)
                    pkg.add_name(pkgname.split('.')[-1], NameType.NPACKD_LASTNAME)
                    pkg.set_version(version)

                    pkg.add_downloads((e.text for e in entry.findall('url')))

                    # from previously parsed <license> and <package> entries
                    pkg.add_licenses(licenses[license_] for license_ in packages[pkgname].licenses)
                    pkg.add_categories(_filter_categories(packages[pkgname].categories))
                    pkg.add_homepages(packages[pkgname].urls)

                yield pkg
Пример #3
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        root = xml.etree.ElementTree.parse(path)

        for application in root.findall('application'):
            with factory.begin() as app:
                app.add_name(safe_findtext(application, 'id'),
                             NameType.FDROID_ID)
                # org.primftpd: name="primiti\nve ftpd"
                app.add_name(
                    safe_findtext(application, 'name').replace('\n', ''),
                    NameType.FDROID_NAME)
                app.add_licenses(application.findtext('license'))
                app.add_categories(application.findtext('category'))
                app.add_homepages(application.findtext('web'))
                app.set_summary(application.findtext('summary'))

                upstream_version_code = int(
                    safe_findtext(application, 'marketvercode'))
                for package in application.findall('package'):
                    version_code = int(safe_findtext(package, 'versioncode'))
                    version = package.findtext('version')

                    if version:
                        pkg = app.clone()

                        pkg.set_version(version)
                        pkg.set_flags(PackageFlags.DEVEL if version_code >
                                      upstream_version_code else 0)

                        yield pkg
Пример #4
0
    def iter_parse(self, path: str,
                   factory: PackageFactory) -> Iterable[PackageMaker]:
        atom = '{http://www.w3.org/2005/Atom}'
        ds = '{http://schemas.microsoft.com/ado/2007/08/dataservices}'
        md = '{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}'

        for pagepath in os.listdir(path):
            if not pagepath.endswith('.xml'):
                continue

            root = xml.etree.ElementTree.parse(os.path.join(path, pagepath))

            for entry in root.findall(f'{atom}entry'):
                with factory.begin() as pkg:
                    pkg.add_name(safe_findtext(entry, f'{atom}title'),
                                 NameType.CHOCOLATEY_TITLE)
                    pkg.set_version(
                        safe_findtext(entry, f'{md}properties/{ds}Version'))
                    pkg.add_links(
                        LinkType.UPSTREAM_HOMEPAGE,
                        safe_findtext_empty(entry,
                                            f'{md}properties/{ds}ProjectUrl'))
                    pkg.add_links(
                        LinkType.UPSTREAM_REPOSITORY,
                        safe_findtext_empty(
                            entry, f'{md}properties/{ds}ProjectSourceUrl'))
                    pkg.add_links(
                        LinkType.PACKAGE_SOURCES,
                        safe_findtext_empty(
                            entry, f'{md}properties/{ds}PackageSourceUrl'))
                    pkg.add_links(
                        LinkType.UPSTREAM_DOCUMENTATION,
                        safe_findtext_empty(entry,
                                            f'{md}properties/{ds}DocsUrl'))
                    pkg.add_links(
                        LinkType.UPSTREAM_ISSUE_TRACKER,
                        safe_findtext_empty(
                            entry, f'{md}properties/{ds}BugTrackerUrl'))
                    pkg.add_links(
                        LinkType.UPSTREAM_DISCUSSION,
                        safe_findtext_empty(
                            entry, f'{md}properties/{ds}MailingListUrl'))
                    pkg.add_name(
                        safe_findtext_empty(entry,
                                            f'{md}properties/{ds}Title'),
                        NameType.CHOCOLATEY_METADATA_TITLE)

                    if safe_findtext(
                            entry,
                            f'{md}properties/{ds}IsPrerelease') == 'true':
                        pass
                        # XXX: need testing
                        #pkg.set_flags(PackageFlags.WEAK_DEVEL)

                    commentnode = entry.find(f'{atom}summary')
                    if commentnode is not None:
                        pkg.set_summary(commentnode.text)

                    yield pkg
Пример #5
0
    def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]:
        normalize_version = VersionStripper().strip_right_greedy('+')

        skipped_archs: Dict[str, int] = {}

        for entry in iter_xml_elements_at_level(path, 1, ['{http://linux.duke.edu/metadata/common}package']):
            with factory.begin() as pkg:
                arch = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}arch')

                if self.allowed_archs and arch not in self.allowed_archs:
                    skipped_archs[arch] = skipped_archs.get(arch, 0) + 1
                    continue

                name = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}name')
                if '%{' in name:
                    pkg.log('incorrect package name (unexpanded substitution)', severity=Logger.ERROR)
                    continue

                pkg.add_name(name, NameType.GENERIC_PKGNAME)

                version_elt = entry.find('{http://linux.duke.edu/metadata/common}version')
                if version_elt is None:
                    raise RuntimeError('Cannot find <version> element')

                epoch = version_elt.attrib['epoch']
                version = version_elt.attrib['ver']
                release = version_elt.attrib['rel']

                match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.', release)
                if match:
                    # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions
                    version += '-' + match.group(1)
                elif release < '1':
                    # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions
                    # most likely a snapshot
                    pkg.set_flags(PackageFlags.IGNORE)

                pkg.set_version(version, normalize_version)
                pkg.set_rawversion(nevra_construct(None, epoch, version, release))

                pkg.set_summary(entry.findtext('{http://linux.duke.edu/metadata/common}summary'))
                pkg.add_homepages(entry.findtext('{http://linux.duke.edu/metadata/common}url'))
                pkg.add_categories(entry.findtext('{http://linux.duke.edu/metadata/common}format/'
                                                  '{http://linux.duke.edu/metadata/rpm}group'))
                pkg.add_licenses(entry.findtext('{http://linux.duke.edu/metadata/common}format/'
                                                '{http://linux.duke.edu/metadata/rpm}license'))
                pkg.set_arch(entry.findtext('{http://linux.duke.edu/metadata/common}arch'))

                packager = entry.findtext('{http://linux.duke.edu/metadata/common}packager')
                if packager:
                    pkg.add_maintainers(extract_maintainers(packager))

                yield pkg

        for arch, numpackages in sorted(skipped_archs.items()):
            factory.log('skipped {} packages(s) with disallowed architecture {}'.format(numpackages, arch))
Пример #6
0
    def iter_parse(self, path: str,
                   factory: PackageFactory) -> Iterable[PackageMaker]:
        for filename in walk_tree(path, suffix='pspec.xml'):
            relpath = os.path.relpath(filename, path)

            with factory.begin(relpath) as pkg:
                try:
                    root = xml.etree.ElementTree.parse(filename).getroot()
                except xml.etree.ElementTree.ParseError as e:
                    pkg.log('Cannot parse XML: ' + str(e), Logger.ERROR)
                    continue

                name = safe_findtext(root, './Source/Name')
                pkgdir = os.path.dirname(relpath)

                pathname = relpath.split(os.sep)[-2]
                if name != pathname:
                    # there's only one exception ATOW
                    pkg.log(f'name "{name}" != package directory "{pathname}"',
                            Logger.ERROR)

                pkg.add_name(name, NameType.PISI_NAME)
                pkg.add_name(pkgdir, NameType.PISI_PKGDIR)
                pkg.set_summary(safe_findtext(root, './Source/Summary'))
                pkg.add_homepages(
                    map(lambda el: el.text, root.findall('./Source/Homepage')))
                pkg.add_downloads(
                    map(lambda el: el.text, root.findall('./Source/Archive')))
                pkg.add_licenses(
                    map(lambda el: el.text, root.findall('./Source/License')))
                pkg.add_categories(
                    map(lambda el: el.text, root.findall('./Source/IsA')))
                pkg.add_maintainers(
                    map(lambda el: el.text,
                        root.findall('./Source/Packager/Email')))

                lastupdate = max(root.findall('./History/Update'),
                                 key=lambda el: int(el.attrib['release']))
                pkg.set_version(safe_findtext(lastupdate, './Version'))

                yield pkg
Пример #7
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        licenses: Dict[str, str] = {}
        packages: Dict[str, Tuple[str, List[str], List[str], List[str]]] = {}

        for entry in iter_xml_elements_at_level(
                path, 1, ['license', 'package', 'version']):
            if entry.tag == 'license':
                licenses[safe_getattr(entry,
                                      'name')] = safe_findtext(entry, 'title')
            elif entry.tag == 'package':
                packages[safe_getattr(entry, 'name')] = (
                    safe_findtext(entry, 'title'),
                    safe_findalltexts(entry, 'license'),
                    safe_findalltexts(entry, 'category'),
                    safe_findalltexts(entry, 'url'),
                )
            elif entry.tag == 'version':
                pkgname = safe_getattr(entry, 'package')
                version = safe_getattr(entry, 'name')

                with factory.begin(pkgname + ' ' + version) as pkg:
                    # XXX: package naming is inconsistent (either plain name like kdenlive or
                    # domain prefixed like com.abisource.abiword), but it's assumed that
                    # everything up to the last dot may be stripped (#863)
                    pkg.set_name(pkgname)
                    pkg.set_basename(pkgname.split('.')[-1])
                    pkg.set_version(version)

                    pkg.add_downloads((e.text for e in entry.findall('url')))

                    # from previously parsed <license> and <package> entries
                    pkg.set_summary(packages[pkgname][0])
                    pkg.add_licenses(licenses[l] for l in packages[pkgname][1])
                    pkg.add_categories(_filter_categories(
                        packages[pkgname][2]))
                    pkg.add_homepages(_filter_categories(packages[pkgname][3]))

                yield pkg
Пример #8
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        normalize_version = VersionStripper().strip_right_greedy('+')

        skipped_archs: Dict[str, int] = Counter()

        if self._arch_from_filename:
            factory.log('mitigation for incorrect <arch></arch> enabled',
                        severity=Logger.WARNING)

        for entry in iter_xml_elements_at_level(
                path, 1, ['{http://linux.duke.edu/metadata/common}package']):
            if self._arch_from_filename:
                # XXX: openmandriva 3 hack, to be removed when it EoLs
                location_elt = entry.find(
                    '{http://linux.duke.edu/metadata/common}location')
                if location_elt is None:
                    raise RuntimeError('Cannot find <location> element')
                arch = nevra_parse(safe_getattr(location_elt, 'href'))[4]
            else:
                arch = safe_findtext(
                    entry, '{http://linux.duke.edu/metadata/common}arch')

            is_src = arch == 'src'

            if (is_src and not self._src) or (not is_src and not self._binary):
                skipped_archs[arch] += 1
                continue

            with factory.begin() as pkg:
                name = safe_findtext(
                    entry, '{http://linux.duke.edu/metadata/common}name')
                if '%{' in name:
                    pkg.log('incorrect package name (unexpanded substitution)',
                            severity=Logger.ERROR)
                    continue

                if is_src:
                    pkg.add_name(name, NameType.SRCRPM_NAME)
                else:
                    pkg.add_name(name, NameType.BINRPM_NAME)
                    sourcerpm = safe_findtext(
                        entry, '{http://linux.duke.edu/metadata/common}format/'
                        '{http://linux.duke.edu/metadata/rpm}sourcerpm')
                    pkg.add_name(
                        nevra_parse(sourcerpm)[0], NameType.BINRPM_SRCNAME)

                version_elt = entry.find(
                    '{http://linux.duke.edu/metadata/common}version')
                if version_elt is None:
                    raise RuntimeError('Cannot find <version> element')

                epoch = version_elt.attrib['epoch']
                version = version_elt.attrib['ver']
                release = version_elt.attrib['rel']

                fixed_version = version

                match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.',
                                 release)
                if match:
                    # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions
                    fixed_version += '-' + match.group(1)
                elif release < '1':
                    # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions
                    # most likely a snapshot
                    pkg.set_flags(PackageFlags.IGNORE)

                pkg.set_version(fixed_version, normalize_version)
                pkg.set_rawversion(
                    nevra_construct(None, epoch, version, release))

                pkg.set_summary(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}summary'))
                pkg.add_homepages(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}url'))
                pkg.add_categories(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}format/'
                        '{http://linux.duke.edu/metadata/rpm}group'))
                pkg.add_licenses(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}format/'
                        '{http://linux.duke.edu/metadata/rpm}license'))
                pkg.set_arch(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}arch'))

                packager = entry.findtext(
                    '{http://linux.duke.edu/metadata/common}packager')
                if packager:
                    pkg.add_maintainers(extract_maintainers(packager))

                yield pkg

        for arch, numpackages in sorted(skipped_archs.items()):
            factory.log(
                'skipped {} packages(s) with disallowed architecture {}'.
                format(numpackages, arch))
Пример #9
0
    def iter_parse(self, path: str,
                   factory: PackageFactory) -> Iterable[PackageMaker]:
        # All encounered version_status values:
        # alpha, beta, developmental, historical, mature, planning, rolling, stable, testing, unknown, unstable
        _unstable_versions = {
            'alpha', 'beta', 'developmental', 'planning', 'testing', 'unstable'
        }

        num_total = 0
        num_nover = 0
        num_noneng = 0
        num_debian = 0
        num_obsolete = 0

        num_accepted = 0
        num_devel = 0

        for entry in iter_xml_elements_at_level(
                path, 1,
            ['{http://semantic-mediawiki.org/swivt/1.0#}Subject']):
            pages = _get_attrs(
                entry, '{http://semantic-mediawiki.org/swivt/1.0#}page',
                '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
            if not pages:
                continue

            page = _unescape(pages[0].split('/')[-1])

            with factory.begin(page) as pkg:
                label = safe_findtext(
                    entry, '{http://www.w3.org/2000/01/rdf-schema#}label')
                name = entry.findtext(
                    '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Name'
                )
                version = entry.findtext(
                    '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_identifier'
                )

                if name is None:
                    continue

                num_total += 1

                if version is None:
                    num_nover += 1
                    continue

                if entry.findtext(
                        '{http://semantic-mediawiki.org/swivt/1.0#}wikiPageContentLanguage'
                ) != 'en':
                    num_noneng += 1
                    continue

                if entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Import_source'
                ) == 'Debian':  # 'Debian import' seems OK though
                    num_debian += 1
                    continue

                if entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Decommissioned_or_Obsolete'
                ) == 'Yes':
                    num_obsolete += 1
                    continue

                if self._high_priority and entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Is_High_Priority_Project'
                ) != 'true':
                    continue

                version_status = entry.findtext(
                    '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_status'
                )

                if version_status in _unstable_versions:
                    num_devel += 1
                    pkg.set_flags(PackageFlags.DEVEL)
                elif version_status == 'rolling':
                    pkg.set_flags(PackageFlags.ROLLING)

                num_accepted += 1

                pkg.add_name(page, NameType.GENERIC_GEN_NAME)
                pkg.set_version(version)
                pkg.set_summary(
                    entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Short_description'
                    ))

                pkg.add_homepages(
                    _get_attrs(
                        entry,
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Homepage_URL',
                        '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'
                    ))
                pkg.add_downloads(
                    _get_attrs(
                        entry,
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_download',
                        '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'
                    ))

                pkg.set_extra_field('page', page)
                pkg.set_extra_field('name', name)
                pkg.set_extra_field('label', label)

                yield pkg

        factory.log(
            'Total software entries (with Name and Version): {}'.format(
                num_total))
        factory.log(
            'Dropped entries with no version defined: {}'.format(num_nover))
        factory.log('Dropped non-english pages: {}'.format(num_noneng))
        factory.log(
            'Dropped entries marked as Import_source=Debian: {}'.format(
                num_debian))
        factory.log(
            'Dropped entries marked as Decommissioned_or_Obsolete: {}'.format(
                num_obsolete))
        factory.log('Accepted entries: {} ({} unstable)'.format(
            num_accepted, num_devel))