def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: root = xml.etree.ElementTree.parse(path) repository = root.find('{http://www.openpkg.org/xml-rdf-index/0.9}Repository') assert(repository is not None) for item in repository.findall('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description'): with factory.begin() as pkg: pkg.add_name(safe_findtext(item, '{http://www.openpkg.org/xml-rdf-index/0.9}Name'), NameType.SRCRPM_NAME) pkg.set_version(safe_findtext(item, '{http://www.openpkg.org/xml-rdf-index/0.9}Version')) pkg.add_licenses(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}License')) pkg.set_summary(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}Summary')) pkg.add_categories(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}Group')) pkg.add_homepages(item.findtext('{http://www.openpkg.org/xml-rdf-index/0.9}URL')) for source in safe_findalltexts(item, './{http://www.openpkg.org/xml-rdf-index/0.9}Source/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}bag/{http://www.w3.org/1999/02/22-rdf-syntax-ns#}li'): if (source.startswith('https://') or source.startswith('http://') or source.startswith('ftp://')) and 'openpkg.org' not in source: pkg.add_downloads(source) release = safe_findtext(item, '{http://www.openpkg.org/xml-rdf-index/0.9}Release') if pkg.version.endswith(release): pkg.set_flags(PackageFlags.UNTRUSTED) yield pkg
def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: licenses: dict[str, str] = {} packages: dict[str, PackageData] = {} for entry in iter_xml_elements_at_level(path, 1, ['license', 'package', 'version']): if entry.tag == 'license': licenses[safe_getattr(entry, 'name')] = safe_findtext(entry, 'title') elif entry.tag == 'package': packages[safe_getattr(entry, 'name')] = PackageData( safe_findtext(entry, 'title'), safe_findalltexts(entry, 'license'), safe_findalltexts(entry, 'category'), safe_findalltexts(entry, 'url'), ) elif entry.tag == 'version': pkgname = safe_getattr(entry, 'package') version = safe_getattr(entry, 'name') with factory.begin(pkgname + ' ' + version) as pkg: # XXX: package naming is inconsistent (either plain name like kdenlive or # domain prefixed like com.abisource.abiword), but it's assumed that # everything up to the last dot may be stripped (#863) pkg.add_name(packages[pkgname].title, NameType.NPACKD_TITLE) pkg.add_name(pkgname, NameType.NPACKD_FULLNAME) pkg.add_name(pkgname.split('.')[-1], NameType.NPACKD_LASTNAME) pkg.set_version(version) pkg.add_downloads((e.text for e in entry.findall('url'))) # from previously parsed <license> and <package> entries pkg.add_licenses(licenses[license_] for license_ in packages[pkgname].licenses) pkg.add_categories(_filter_categories(packages[pkgname].categories)) pkg.add_homepages(packages[pkgname].urls) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: root = xml.etree.ElementTree.parse(path) for application in root.findall('application'): with factory.begin() as app: app.add_name(safe_findtext(application, 'id'), NameType.FDROID_ID) # org.primftpd: name="primiti\nve ftpd" app.add_name( safe_findtext(application, 'name').replace('\n', ''), NameType.FDROID_NAME) app.add_licenses(application.findtext('license')) app.add_categories(application.findtext('category')) app.add_homepages(application.findtext('web')) app.set_summary(application.findtext('summary')) upstream_version_code = int( safe_findtext(application, 'marketvercode')) for package in application.findall('package'): version_code = int(safe_findtext(package, 'versioncode')) version = package.findtext('version') if version: pkg = app.clone() pkg.set_version(version) pkg.set_flags(PackageFlags.DEVEL if version_code > upstream_version_code else 0) yield pkg
def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: atom = '{http://www.w3.org/2005/Atom}' ds = '{http://schemas.microsoft.com/ado/2007/08/dataservices}' md = '{http://schemas.microsoft.com/ado/2007/08/dataservices/metadata}' for pagepath in os.listdir(path): if not pagepath.endswith('.xml'): continue root = xml.etree.ElementTree.parse(os.path.join(path, pagepath)) for entry in root.findall(f'{atom}entry'): with factory.begin() as pkg: pkg.add_name(safe_findtext(entry, f'{atom}title'), NameType.CHOCOLATEY_TITLE) pkg.set_version( safe_findtext(entry, f'{md}properties/{ds}Version')) pkg.add_links( LinkType.UPSTREAM_HOMEPAGE, safe_findtext_empty(entry, f'{md}properties/{ds}ProjectUrl')) pkg.add_links( LinkType.UPSTREAM_REPOSITORY, safe_findtext_empty( entry, f'{md}properties/{ds}ProjectSourceUrl')) pkg.add_links( LinkType.PACKAGE_SOURCES, safe_findtext_empty( entry, f'{md}properties/{ds}PackageSourceUrl')) pkg.add_links( LinkType.UPSTREAM_DOCUMENTATION, safe_findtext_empty(entry, f'{md}properties/{ds}DocsUrl')) pkg.add_links( LinkType.UPSTREAM_ISSUE_TRACKER, safe_findtext_empty( entry, f'{md}properties/{ds}BugTrackerUrl')) pkg.add_links( LinkType.UPSTREAM_DISCUSSION, safe_findtext_empty( entry, f'{md}properties/{ds}MailingListUrl')) pkg.add_name( safe_findtext_empty(entry, f'{md}properties/{ds}Title'), NameType.CHOCOLATEY_METADATA_TITLE) if safe_findtext( entry, f'{md}properties/{ds}IsPrerelease') == 'true': pass # XXX: need testing #pkg.set_flags(PackageFlags.WEAK_DEVEL) commentnode = entry.find(f'{atom}summary') if commentnode is not None: pkg.set_summary(commentnode.text) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: normalize_version = VersionStripper().strip_right_greedy('+') skipped_archs: Dict[str, int] = {} for entry in iter_xml_elements_at_level(path, 1, ['{http://linux.duke.edu/metadata/common}package']): with factory.begin() as pkg: arch = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}arch') if self.allowed_archs and arch not in self.allowed_archs: skipped_archs[arch] = skipped_archs.get(arch, 0) + 1 continue name = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}name') if '%{' in name: pkg.log('incorrect package name (unexpanded substitution)', severity=Logger.ERROR) continue pkg.add_name(name, NameType.GENERIC_PKGNAME) version_elt = entry.find('{http://linux.duke.edu/metadata/common}version') if version_elt is None: raise RuntimeError('Cannot find <version> element') epoch = version_elt.attrib['epoch'] version = version_elt.attrib['ver'] release = version_elt.attrib['rel'] match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.', release) if match: # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions version += '-' + match.group(1) elif release < '1': # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions # most likely a snapshot pkg.set_flags(PackageFlags.IGNORE) pkg.set_version(version, normalize_version) pkg.set_rawversion(nevra_construct(None, epoch, version, release)) pkg.set_summary(entry.findtext('{http://linux.duke.edu/metadata/common}summary')) pkg.add_homepages(entry.findtext('{http://linux.duke.edu/metadata/common}url')) pkg.add_categories(entry.findtext('{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}group')) pkg.add_licenses(entry.findtext('{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}license')) pkg.set_arch(entry.findtext('{http://linux.duke.edu/metadata/common}arch')) packager = entry.findtext('{http://linux.duke.edu/metadata/common}packager') if packager: pkg.add_maintainers(extract_maintainers(packager)) yield pkg for arch, numpackages in sorted(skipped_archs.items()): factory.log('skipped {} packages(s) with disallowed architecture {}'.format(numpackages, arch))
def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: for filename in walk_tree(path, suffix='pspec.xml'): relpath = os.path.relpath(filename, path) with factory.begin(relpath) as pkg: try: root = xml.etree.ElementTree.parse(filename).getroot() except xml.etree.ElementTree.ParseError as e: pkg.log('Cannot parse XML: ' + str(e), Logger.ERROR) continue name = safe_findtext(root, './Source/Name') pkgdir = os.path.dirname(relpath) pathname = relpath.split(os.sep)[-2] if name != pathname: # there's only one exception ATOW pkg.log(f'name "{name}" != package directory "{pathname}"', Logger.ERROR) pkg.add_name(name, NameType.PISI_NAME) pkg.add_name(pkgdir, NameType.PISI_PKGDIR) pkg.set_summary(safe_findtext(root, './Source/Summary')) pkg.add_homepages( map(lambda el: el.text, root.findall('./Source/Homepage'))) pkg.add_downloads( map(lambda el: el.text, root.findall('./Source/Archive'))) pkg.add_licenses( map(lambda el: el.text, root.findall('./Source/License'))) pkg.add_categories( map(lambda el: el.text, root.findall('./Source/IsA'))) pkg.add_maintainers( map(lambda el: el.text, root.findall('./Source/Packager/Email'))) lastupdate = max(root.findall('./History/Update'), key=lambda el: int(el.attrib['release'])) pkg.set_version(safe_findtext(lastupdate, './Version')) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: licenses: Dict[str, str] = {} packages: Dict[str, Tuple[str, List[str], List[str], List[str]]] = {} for entry in iter_xml_elements_at_level( path, 1, ['license', 'package', 'version']): if entry.tag == 'license': licenses[safe_getattr(entry, 'name')] = safe_findtext(entry, 'title') elif entry.tag == 'package': packages[safe_getattr(entry, 'name')] = ( safe_findtext(entry, 'title'), safe_findalltexts(entry, 'license'), safe_findalltexts(entry, 'category'), safe_findalltexts(entry, 'url'), ) elif entry.tag == 'version': pkgname = safe_getattr(entry, 'package') version = safe_getattr(entry, 'name') with factory.begin(pkgname + ' ' + version) as pkg: # XXX: package naming is inconsistent (either plain name like kdenlive or # domain prefixed like com.abisource.abiword), but it's assumed that # everything up to the last dot may be stripped (#863) pkg.set_name(pkgname) pkg.set_basename(pkgname.split('.')[-1]) pkg.set_version(version) pkg.add_downloads((e.text for e in entry.findall('url'))) # from previously parsed <license> and <package> entries pkg.set_summary(packages[pkgname][0]) pkg.add_licenses(licenses[l] for l in packages[pkgname][1]) pkg.add_categories(_filter_categories( packages[pkgname][2])) pkg.add_homepages(_filter_categories(packages[pkgname][3])) yield pkg
def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]: normalize_version = VersionStripper().strip_right_greedy('+') skipped_archs: Dict[str, int] = Counter() if self._arch_from_filename: factory.log('mitigation for incorrect <arch></arch> enabled', severity=Logger.WARNING) for entry in iter_xml_elements_at_level( path, 1, ['{http://linux.duke.edu/metadata/common}package']): if self._arch_from_filename: # XXX: openmandriva 3 hack, to be removed when it EoLs location_elt = entry.find( '{http://linux.duke.edu/metadata/common}location') if location_elt is None: raise RuntimeError('Cannot find <location> element') arch = nevra_parse(safe_getattr(location_elt, 'href'))[4] else: arch = safe_findtext( entry, '{http://linux.duke.edu/metadata/common}arch') is_src = arch == 'src' if (is_src and not self._src) or (not is_src and not self._binary): skipped_archs[arch] += 1 continue with factory.begin() as pkg: name = safe_findtext( entry, '{http://linux.duke.edu/metadata/common}name') if '%{' in name: pkg.log('incorrect package name (unexpanded substitution)', severity=Logger.ERROR) continue if is_src: pkg.add_name(name, NameType.SRCRPM_NAME) else: pkg.add_name(name, NameType.BINRPM_NAME) sourcerpm = safe_findtext( entry, '{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}sourcerpm') pkg.add_name( nevra_parse(sourcerpm)[0], NameType.BINRPM_SRCNAME) version_elt = entry.find( '{http://linux.duke.edu/metadata/common}version') if version_elt is None: raise RuntimeError('Cannot find <version> element') epoch = version_elt.attrib['epoch'] version = version_elt.attrib['ver'] release = version_elt.attrib['rel'] fixed_version = version match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.', release) if match: # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions fixed_version += '-' + match.group(1) elif release < '1': # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions # most likely a snapshot pkg.set_flags(PackageFlags.IGNORE) pkg.set_version(fixed_version, normalize_version) pkg.set_rawversion( nevra_construct(None, epoch, version, release)) pkg.set_summary( entry.findtext( '{http://linux.duke.edu/metadata/common}summary')) pkg.add_homepages( entry.findtext( '{http://linux.duke.edu/metadata/common}url')) pkg.add_categories( entry.findtext( '{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}group')) pkg.add_licenses( entry.findtext( '{http://linux.duke.edu/metadata/common}format/' '{http://linux.duke.edu/metadata/rpm}license')) pkg.set_arch( entry.findtext( '{http://linux.duke.edu/metadata/common}arch')) packager = entry.findtext( '{http://linux.duke.edu/metadata/common}packager') if packager: pkg.add_maintainers(extract_maintainers(packager)) yield pkg for arch, numpackages in sorted(skipped_archs.items()): factory.log( 'skipped {} packages(s) with disallowed architecture {}'. format(numpackages, arch))
def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]: # All encounered version_status values: # alpha, beta, developmental, historical, mature, planning, rolling, stable, testing, unknown, unstable _unstable_versions = { 'alpha', 'beta', 'developmental', 'planning', 'testing', 'unstable' } num_total = 0 num_nover = 0 num_noneng = 0 num_debian = 0 num_obsolete = 0 num_accepted = 0 num_devel = 0 for entry in iter_xml_elements_at_level( path, 1, ['{http://semantic-mediawiki.org/swivt/1.0#}Subject']): pages = _get_attrs( entry, '{http://semantic-mediawiki.org/swivt/1.0#}page', '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource') if not pages: continue page = _unescape(pages[0].split('/')[-1]) with factory.begin(page) as pkg: label = safe_findtext( entry, '{http://www.w3.org/2000/01/rdf-schema#}label') name = entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Name' ) version = entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_identifier' ) if name is None: continue num_total += 1 if version is None: num_nover += 1 continue if entry.findtext( '{http://semantic-mediawiki.org/swivt/1.0#}wikiPageContentLanguage' ) != 'en': num_noneng += 1 continue if entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Import_source' ) == 'Debian': # 'Debian import' seems OK though num_debian += 1 continue if entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Decommissioned_or_Obsolete' ) == 'Yes': num_obsolete += 1 continue if self._high_priority and entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Is_High_Priority_Project' ) != 'true': continue version_status = entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_status' ) if version_status in _unstable_versions: num_devel += 1 pkg.set_flags(PackageFlags.DEVEL) elif version_status == 'rolling': pkg.set_flags(PackageFlags.ROLLING) num_accepted += 1 pkg.add_name(page, NameType.GENERIC_GEN_NAME) pkg.set_version(version) pkg.set_summary( entry.findtext( '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Short_description' )) pkg.add_homepages( _get_attrs( entry, '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Homepage_URL', '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' )) pkg.add_downloads( _get_attrs( entry, '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_download', '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' )) pkg.set_extra_field('page', page) pkg.set_extra_field('name', name) pkg.set_extra_field('label', label) yield pkg factory.log( 'Total software entries (with Name and Version): {}'.format( num_total)) factory.log( 'Dropped entries with no version defined: {}'.format(num_nover)) factory.log('Dropped non-english pages: {}'.format(num_noneng)) factory.log( 'Dropped entries marked as Import_source=Debian: {}'.format( num_debian)) factory.log( 'Dropped entries marked as Decommissioned_or_Obsolete: {}'.format( num_obsolete)) factory.log('Accepted entries: {} ({} unstable)'.format( num_accepted, num_devel))