Exemplo n.º 1
0
    def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]:
        for recipe_name in os.listdir(path):
            if recipe_name.startswith('.'):
                continue

            pkg = factory.begin()

            pkg.add_name(recipe_name, NameType.GOBOLINUX_RECIPE)

            package_path = os.path.join(path, recipe_name)

            maxversion: Optional[str] = None
            for version_name in os.listdir(package_path):
                if maxversion is None or version_compare(version_name, maxversion) > 0:
                    maxversion = version_name

            if maxversion is None:
                pkg.log('no usable versions found', severity=Logger.ERROR)
                continue

            pkg.set_version(maxversion)

            recipe_path = os.path.join(package_path, maxversion, 'Recipe')
            description_path = os.path.join(package_path, maxversion, 'Resources', 'Description')

            if os.path.isfile(recipe_path):
                with open(recipe_path, 'r', encoding='utf-8', errors='ignore') as recipe:
                    for line in recipe:
                        line = line.strip()
                        if line.startswith('url='):
                            download = _expand_mirrors(line[4:])
                            if '$' not in download:
                                pkg.add_downloads(download.strip('"'))
                            else:
                                factory.log('Recipe for {}/{} skipped, unhandled URL substitute found'.format(recipe_name, maxversion), severity=Logger.ERROR)

            if os.path.isfile(description_path):
                with open(description_path, 'r', encoding='utf-8', errors='ignore') as description:
                    data = {}
                    current_tag = None
                    for line in description:
                        line = line.strip()
                        match = re.match('^\\[([A-Z][a-z]+)\\] *(.*?)$', line)
                        if match:
                            current_tag = match.group(1)
                            data[current_tag] = match.group(2)
                        elif current_tag is None:
                            factory.log('Description for {}/{} skipped, dumb format'.format(recipe_name, maxversion), severity=Logger.ERROR)
                            break
                        elif line:
                            if data[current_tag]:
                                data[current_tag] += ' '
                            data[current_tag] += line

                    pkg.set_summary(data.get('Summary'))
                    pkg.add_licenses(data.get('License'))
                    pkg.add_homepages(data.get('Homepage', '').strip('"'))

            yield pkg
Exemplo n.º 2
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        for fmri, pkgdata in _iter_packages(path):
            pkg = factory.begin('{} {}'.format(fmri, pkgdata['version']))

            pkg.set_extra_field('fmri', fmri)

            variables = {}
            for action in pkgdata['actions']:
                tokens = shlex.split(action)

                if not tokens or tokens.pop(0) != 'set':
                    factory.log('unrecognized action ' + action,
                                severity=Logger.ERROR)
                    continue

                key = None
                value = []

                for token in tokens:
                    if token.startswith('name='):
                        key = token[5:]
                    elif token.startswith('value='):
                        value.append(token[6:])
                    elif token.startswith('last-fmri='):
                        pass
                    else:
                        factory.log('unrecognized token ' + token,
                                    severity=Logger.ERROR)
                        continue

                if key and value:
                    variables[key] = value

            # these are entries without name, likely not really packages
            # skip these early to avoid parsing other stuff and polluting logs with warnings
            if 'com.oracle.info.name' not in variables or 'com.oracle.info.version' not in variables:
                continue

            # Regarding comment requirement: there are some packages which lack it,
            # however for ALL of them have counterparts with comment and some
            # additional fields (category, homepage, downloads). Packages without
            # comment look like legacy, and it's OK and desirable to drop them here
            if 'pkg.summary' not in variables:
                continue

            pkg.add_name(variables['com.oracle.info.name'][0],
                         NameType.GENERIC_PKGNAME)
            pkg.set_version(variables['com.oracle.info.version'][0])
            pkg.set_summary(variables['pkg.summary'][0])

            for category in variables.get('info.classification', []):
                if category.startswith('org.opensolaris.category.2008:'):
                    pkg.add_categories(category.split(':', 1)[1])

            pkg.add_homepages(variables.get('info.upstream-url'))
            pkg.add_downloads(variables.get('info.source-url'))

            yield pkg
Exemplo n.º 3
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        total_entries = 0

        for packagedata in _iter_packages(path):
            entity = packagedata['project'].rsplit(
                '/', 1)[-1]  # this is URL, take only the ID from it
            label = packagedata['projectLabel']

            with factory.begin('{} ({})'.format(entity, label)) as pkg:
                pkg.set_extra_field('entity', entity)
                pkg.set_name(label)
                pkg.set_summary(packagedata.get('projectDescription'))
                pkg.add_licenses(packagedata.get('licenses', '').split(', '))
                pkg.add_homepages(packagedata.get('websites', '').split(', '))

                names = set(packagedata['repology_projects'].split(
                    ', ')) if packagedata['repology_projects'] else set()

                if len(names) > 1:
                    pkg.log('multiple Repology project names: {}'.format(
                        ','.join(sorted(names))),
                            severity=Logger.WARNING)

                total_entries += 1

                # generate a package for each version
                for version in sorted(packagedata['versions'].split(', ')):
                    version, *flags = version.split('|')

                    verpkg = pkg.clone()

                    is_devel = 'U' in flags
                    is_foreign_os_release = 'o' in flags and 'O' not in flags
                    is_foreign_platform_release = 'p' in flags and 'P' not in flags

                    if is_foreign_os_release:
                        verpkg.log(
                            'version {} skipped due to bad OS'.format(version),
                            severity=Logger.NOTICE)
                        continue

                    if is_foreign_platform_release:
                        verpkg.log(
                            'version {} skipped due to bad Platform'.format(
                                version),
                            severity=Logger.NOTICE)
                        continue

                    verpkg.set_flags(PackageFlags.DEVEL, is_devel)
                    verpkg.set_version(version)

                    # generate package for each guessed name; it most cases, these will be merged anyway
                    for name in names:
                        namepkg = verpkg.clone()
                        namepkg.set_basename(name)
                        yield namepkg

        factory.log('{} total entries'.format(total_entries))
Exemplo n.º 4
0
    def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]:
        normalize_version = VersionStripper().strip_right_greedy('+')

        skipped_archs: Dict[str, int] = {}

        for entry in iter_xml_elements_at_level(path, 1, ['{http://linux.duke.edu/metadata/common}package']):
            with factory.begin() as pkg:
                arch = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}arch')

                if self.allowed_archs and arch not in self.allowed_archs:
                    skipped_archs[arch] = skipped_archs.get(arch, 0) + 1
                    continue

                name = safe_findtext(entry, '{http://linux.duke.edu/metadata/common}name')
                if '%{' in name:
                    pkg.log('incorrect package name (unexpanded substitution)', severity=Logger.ERROR)
                    continue

                pkg.add_name(name, NameType.GENERIC_PKGNAME)

                version_elt = entry.find('{http://linux.duke.edu/metadata/common}version')
                if version_elt is None:
                    raise RuntimeError('Cannot find <version> element')

                epoch = version_elt.attrib['epoch']
                version = version_elt.attrib['ver']
                release = version_elt.attrib['rel']

                match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.', release)
                if match:
                    # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions
                    version += '-' + match.group(1)
                elif release < '1':
                    # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions
                    # most likely a snapshot
                    pkg.set_flags(PackageFlags.IGNORE)

                pkg.set_version(version, normalize_version)
                pkg.set_rawversion(nevra_construct(None, epoch, version, release))

                pkg.set_summary(entry.findtext('{http://linux.duke.edu/metadata/common}summary'))
                pkg.add_homepages(entry.findtext('{http://linux.duke.edu/metadata/common}url'))
                pkg.add_categories(entry.findtext('{http://linux.duke.edu/metadata/common}format/'
                                                  '{http://linux.duke.edu/metadata/rpm}group'))
                pkg.add_licenses(entry.findtext('{http://linux.duke.edu/metadata/common}format/'
                                                '{http://linux.duke.edu/metadata/rpm}license'))
                pkg.set_arch(entry.findtext('{http://linux.duke.edu/metadata/common}arch'))

                packager = entry.findtext('{http://linux.duke.edu/metadata/common}packager')
                if packager:
                    pkg.add_maintainers(extract_maintainers(packager))

                yield pkg

        for arch, numpackages in sorted(skipped_archs.items()):
            factory.log('skipped {} packages(s) with disallowed architecture {}'.format(numpackages, arch))
Exemplo n.º 5
0
    def iter_parse(self, path: str, factory: PackageFactory) -> Iterable[PackageMaker]:
        num_manifests_without_packagename = 0

        for pkgloc in _iter_packages(path):
            with factory.begin(pkgloc.yamlpath_rel) as pkg:
                try:
                    with open(pkgloc.yamlpath_abs, 'r') as fd:
                        pkgdata = yaml.safe_load(fd)
                except UnicodeDecodeError:
                    pkg.log('Cannot read file, probably UTF-16 garbage', Logger.ERROR)
                    continue
                except yaml.MarkedYAMLError as e:
                    if e.problem_mark:
                        pkg.log(f'YAML error at line {e.problem_mark.line}: {e.problem}', Logger.ERROR)
                    else:
                        pkg.log(f'YAML error: {e.problem}', Logger.ERROR)
                    continue

                if 'PackageName' not in pkgdata:
                    num_manifests_without_packagename += 1
                    continue

                pkg.add_name(pkgdata['PackageIdentifier'], NameType.WINGET_ID)
                pkg.add_name(pkgdata['PackageIdentifier'].split('.', 1)[-1], NameType.WINGET_ID_NAME)
                pkg.add_name(pkgdata['PackageName'], NameType.WINGET_NAME)
                pkg.add_name(pkgloc.relevant_path, NameType.WINGET_PATH)
                # Moniker field is optional and mosty useless

                version = pkgdata['PackageVersion']
                if isinstance(version, float):
                    pkg.log(f'PackageVersion "{version}" is a floating point, should be quoted in YAML', Logger.WARNING)

                pkg.set_version(str(version))
                pkg.add_homepages(pkgdata.get('PackageUrl'))

                # pkg.set_summary(pkgdata.get('Description'))  # may be long
                # pkg.add_licenses(pkgdata['License'])  # long garbage

                pkg.add_categories(map(str, pkgdata.get('Tags', [])))

                if 'Installers' in pkgdata:
                    pkg.add_downloads(installer['InstallerUrl'] for installer in pkgdata['Installers'])

                pkg.set_extra_field('yamlpath', pkgloc.yamlpath_rel)

                yield pkg

        factory.log(f'Skipped manifests without PackageName: {num_manifests_without_packagename}', Logger.WARNING)
Exemplo n.º 6
0
    def iter_parse(self, path: str, factory: PackageFactory, transformer: PackageTransformer) -> Iterable[PackageMaker]:
        skipped_gemplats: Dict[str, int] = Counter()

        with open(path, 'rb') as fd:
            for gemname, gemversion, gemplat in rubymarshal.reader.load(fd):
                gemname = str(gemname)

                with factory.begin(gemname) as pkg:
                    if gemplat != 'ruby':
                        skipped_gemplats[gemplat] += 1
                        continue

                    gemversion = str(gemversion.marshal_dump()[0])

                    pkg.add_name(gemname, NameType.RUBYGEMS_NAME)
                    pkg.set_version(gemversion)

                    yield pkg

        for gemplat, count in sorted(skipped_gemplats.items()):
            factory.log(f'skipped {count} occurrence(s) of gemplat {gemplat}')
Exemplo n.º 7
0
    def iter_parse(self, path: str,
                   factory: PackageFactory) -> Iterable[PackageMaker]:
        has_control_files = False

        for pkgdir in os.listdir(os.path.join(path, 'ports')):
            with factory.begin(pkgdir) as pkg:
                package_path_abs = os.path.join(path, 'ports', pkgdir)
                controlpath = os.path.join(package_path_abs, 'CONTROL')
                manifestpath = os.path.join(package_path_abs, 'vcpkg.json')

                # read either of old-style control (CONTROL) or new-style manifest (vcpkg.json) file
                if os.path.exists(manifestpath):
                    pkgdata = _read_manifest_file(manifestpath)
                elif os.path.exists(controlpath):
                    has_control_files = True
                    pkgdata = _read_control_file(controlpath)
                else:
                    pkg.log('neither control nor manifest file found',
                            Logger.ERROR)
                    continue

                if pkgdata['name'] != pkgdir:
                    raise RuntimeError(
                        f'sanity check failed: source {pkgdata["name"]} != directory {pkgdir}'
                    )

                pkg.add_name(pkgdata['name'], NameType.VCPKG_SOURCE)

                for key in [
                        'version', 'version-string', 'version-semver',
                        'version-date'
                ]:
                    if key in pkgdata:
                        version = pkgdata[key]
                        break
                else:
                    raise RuntimeError(
                        'none of expected version schemes found')

                if re.match('[0-9]{4}[.-][0-9]{1,2}[.-][0-9]{1,2}', version):
                    pkg.set_version(version)
                    pkg.set_flags(PackageFlags.UNTRUSTED)
                else:
                    pkg.set_version(version, _normalize_version)

                # handle description which may be either a string or a list of strings
                description = pkgdata.get('description')

                if isinstance(description, str):
                    pkg.set_summary(description)
                elif isinstance(description, list):
                    pkg.set_summary(description[0])

                pkg.add_homepages(pkgdata.get('homepage'))

                for maintainer in pkgdata.get('maintainers', []):
                    pkg.add_maintainers(extract_maintainers(maintainer))

                # pretty much a hack to shut a bunch of fake versions up
                portfilepath = os.path.join(package_path_abs, 'portfile.cmake')
                if _grep_file(portfilepath, 'libimobiledevice-win32'):
                    pkg.log(
                        'marking as untrusted, https://github.com/libimobiledevice-win32 accused of version faking',
                        severity=Logger.WARNING)
                    pkg.set_flags(PackageFlags.UNTRUSTED)

                add_patch_files(pkg, package_path_abs, '*.patch')

                yield pkg

        if not has_control_files:
            factory.log(
                "No CONTROL files seen in the repository, seems like it's time to refactor vcpkg parser and remove legacy bits"
            )
Exemplo n.º 8
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        total_entries = 0

        for packagedata in _iter_packages(path):
            entity = packagedata['project'].rsplit(
                '/', 1)[-1]  # this is URL, take only the ID from it
            label = packagedata['projectLabel']

            with factory.begin('{} ({})'.format(entity, label)) as pkg:
                pkg.add_name(entity, NameType.WIKIDATA_ENTITY)
                pkg.add_name(label, NameType.WIKIDATA_LABEL)
                pkg.set_summary(packagedata.get('projectDescription'))
                pkg.add_licenses(
                    sorted(packagedata.get('licenses', '').split('␞')))
                pkg.add_links(
                    LinkType.UPSTREAM_HOMEPAGE,
                    sorted(packagedata.get('websites', '').split('␞')))
                pkg.add_links(
                    LinkType.UPSTREAM_REPOSITORY,
                    sorted(packagedata.get('repositories', '').split('␞')))

                names = sorted(packagedata['repology_projects'].split('␞'))

                if len(names) > 1:
                    pkg.log('multiple Repology project names: {}'.format(
                        ','.join(names)),
                            severity=Logger.WARNING)

                total_entries += 1

                pkgs_preferred = []
                pkgs_normal = []

                # generate a package for each version
                for version in sorted(packagedata['versions'].split('␞')):
                    version, *flags = version.split('|')

                    verpkg = pkg.clone()

                    is_version_stable = 'S' in flags
                    is_version_unstable = 'U' in flags
                    is_version_bad = 'X' in flags
                    is_foreign_os_release = 'o' in flags and 'O' not in flags
                    is_foreign_platform_release = 'p' in flags and 'P' not in flags
                    is_preferred = 'R' in flags

                    if is_foreign_os_release:
                        verpkg.log(
                            'version {} skipped due to bad OS'.format(version),
                            severity=Logger.NOTICE)
                        continue

                    if is_foreign_platform_release:
                        verpkg.log(
                            'version {} skipped due to bad Platform'.format(
                                version),
                            severity=Logger.NOTICE)
                        continue

                    if is_version_bad:
                        verpkg.log(
                            'version {} skipped due to bad Version Type'.
                            format(version),
                            severity=Logger.ERROR)
                        continue

                    verpkg.set_flags(
                        PackageFlags.DEVEL, is_version_unstable
                        and not is_version_stable)
                    verpkg.set_version(version)
                    pkg.add_links(
                        LinkType.UPSTREAM_DOWNLOAD,
                        sorted(flag for flag in flags if len(flag) > 1))

                    # generate package for each repology project
                    for name in names:
                        namepkg = verpkg.clone()
                        namepkg.add_name(
                            name, NameType.WIKIDATA_REPOLOGY_PROJECT_NAME)
                        if is_preferred:
                            pkgs_preferred.append(namepkg)
                        else:
                            pkgs_normal.append(namepkg)

                if pkgs_preferred:
                    # temporary diagnostics until Wikidata usage in Wikipedia gains more momentum
                    # before that, we can/have to help keeping it up to date
                    max_preferred_version = max(
                        (pkg.version for pkg in pkgs_preferred),
                        key=cmp_to_key(version_compare))
                    max_normal_version = max(
                        (pkg.version for pkg in pkgs_normal
                         if not pkg.flags & PackageFlags.DEVEL),
                        key=cmp_to_key(version_compare),
                        default=None)
                    max_devel_version = max(
                        (pkg.version for pkg in pkgs_normal
                         if pkg.flags & PackageFlags.DEVEL),
                        key=cmp_to_key(version_compare),
                        default=None)

                    if max_normal_version and version_compare(
                            max_preferred_version, max_normal_version) < 0:
                        pkg.log(
                            'preferred version {} < normal {} (may need rank update)'
                            .format(max_preferred_version, max_normal_version),
                            severity=Logger.WARNING)

                    if max_devel_version and version_compare(
                            max_devel_version, max_preferred_version) > 0:
                        pkg.log(
                            'devel version {} < preferred {} (may need preferred rank for devel version)'
                            .format(max_devel_version, max_preferred_version),
                            severity=Logger.WARNING)

                    yield from pkgs_preferred
                else:
                    num_devel = 0
                    num_stable = 0
                    for pkg in pkgs_normal:
                        if pkg.flags & PackageFlags.DEVEL:
                            num_devel += 1
                        else:
                            num_stable += 1

                    if num_devel > len(names):
                        pkg.log(
                            'no preferred devel version(s) set (out of {})'.
                            format(num_devel // len(names)),
                            severity=Logger.WARNING)

                    if num_stable > len(names):
                        pkg.log(
                            'no preferred stable version(s) set (out of {})'.
                            format(num_stable // len(names)),
                            severity=Logger.WARNING)

                    yield from pkgs_normal

        factory.log('{} total entries'.format(total_entries))
Exemplo n.º 9
0
    def iter_parse(self, path: str, factory: PackageFactory,
                   transformer: PackageTransformer) -> Iterable[PackageMaker]:
        normalize_version = VersionStripper().strip_right_greedy('+')

        skipped_archs: Dict[str, int] = Counter()

        if self._arch_from_filename:
            factory.log('mitigation for incorrect <arch></arch> enabled',
                        severity=Logger.WARNING)

        for entry in iter_xml_elements_at_level(
                path, 1, ['{http://linux.duke.edu/metadata/common}package']):
            if self._arch_from_filename:
                # XXX: openmandriva 3 hack, to be removed when it EoLs
                location_elt = entry.find(
                    '{http://linux.duke.edu/metadata/common}location')
                if location_elt is None:
                    raise RuntimeError('Cannot find <location> element')
                arch = nevra_parse(safe_getattr(location_elt, 'href'))[4]
            else:
                arch = safe_findtext(
                    entry, '{http://linux.duke.edu/metadata/common}arch')

            is_src = arch == 'src'

            if (is_src and not self._src) or (not is_src and not self._binary):
                skipped_archs[arch] += 1
                continue

            with factory.begin() as pkg:
                name = safe_findtext(
                    entry, '{http://linux.duke.edu/metadata/common}name')
                if '%{' in name:
                    pkg.log('incorrect package name (unexpanded substitution)',
                            severity=Logger.ERROR)
                    continue

                if is_src:
                    pkg.add_name(name, NameType.SRCRPM_NAME)
                else:
                    pkg.add_name(name, NameType.BINRPM_NAME)
                    sourcerpm = safe_findtext(
                        entry, '{http://linux.duke.edu/metadata/common}format/'
                        '{http://linux.duke.edu/metadata/rpm}sourcerpm')
                    pkg.add_name(
                        nevra_parse(sourcerpm)[0], NameType.BINRPM_SRCNAME)

                version_elt = entry.find(
                    '{http://linux.duke.edu/metadata/common}version')
                if version_elt is None:
                    raise RuntimeError('Cannot find <version> element')

                epoch = version_elt.attrib['epoch']
                version = version_elt.attrib['ver']
                release = version_elt.attrib['rel']

                fixed_version = version

                match = re.match('0\\.[0-9]+\\.((?:alpha|beta|rc)[0-9]+)\\.',
                                 release)
                if match:
                    # known pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Prerelease_versions
                    fixed_version += '-' + match.group(1)
                elif release < '1':
                    # unknown pre-release schema: https://fedoraproject.org/wiki/Packaging:Versioning#Some_definitions
                    # most likely a snapshot
                    pkg.set_flags(PackageFlags.IGNORE)

                pkg.set_version(fixed_version, normalize_version)
                pkg.set_rawversion(
                    nevra_construct(None, epoch, version, release))

                pkg.set_summary(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}summary'))
                pkg.add_homepages(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}url'))
                pkg.add_categories(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}format/'
                        '{http://linux.duke.edu/metadata/rpm}group'))
                pkg.add_licenses(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}format/'
                        '{http://linux.duke.edu/metadata/rpm}license'))
                pkg.set_arch(
                    entry.findtext(
                        '{http://linux.duke.edu/metadata/common}arch'))

                packager = entry.findtext(
                    '{http://linux.duke.edu/metadata/common}packager')
                if packager:
                    pkg.add_maintainers(extract_maintainers(packager))

                yield pkg

        for arch, numpackages in sorted(skipped_archs.items()):
            factory.log(
                'skipped {} packages(s) with disallowed architecture {}'.
                format(numpackages, arch))
Exemplo n.º 10
0
    def iter_parse(
        self, path: str, factory: PackageFactory,
        transformer: PackageTransformer
    ) -> Generator[PackageMaker, None, None]:
        with open(path, 'rb') as jsonfile:
            for key, packagedata in JsonSlicer(jsonfile, ('packages', None),
                                               encoding='utf-8',
                                               path_mode='map_keys'):
                pkg = factory.begin(key)

                # see how Nix parses 'derivative' names in
                # https://github.com/NixOS src/libexpr/names.cc, DrvName::DrvName
                # it just splits on dash followed by non-letter
                #
                # this doesn't work well on 100% cases, it's an upstream problem
                match = re.match('(.+?)-([^a-zA-Z].*)$', packagedata['name'])
                if not match:
                    factory.log('cannot extract version: {}/{}'.format(
                        key, packagedata['name']),
                                severity=Logger.ERROR)
                    continue

                pkg.set_name(match.group(1))
                pkg.set_version(match.group(2))

                # some exceptions
                for prefix in ('75dpi', '100dpi'):
                    if pkg.version.startswith(prefix):
                        pkg.set_name(pkg.name + '-' + prefix)
                        pkg.set_version(pkg.version[len(prefix) + 1:])

                merged = pkg.name + '-' + pkg.version
                for pkgname in [
                        'liblqr-1', 'python2.7-3to2', 'python3.6-3to2',
                        'libretro-4do', 'polkit-qt-1-qt5', 'polkit-qt-1-qt4'
                ]:
                    if merged.startswith(pkgname):
                        pkg.set_name(pkgname)
                        pkg.set_version(merged[len(pkgname) + 1:])

                keyparts = key.split('.')
                if len(keyparts) > 1:
                    pkg.add_categories(keyparts[0])

                if pkg.name.endswith('-git'):
                    pkg.set_name(pkg.name[:-4])
                    pkg.set_flags(PackageFlags.ignore)

                if re.match('.*20[0-9]{2}-[0-9]{2}-[0-9]{2}', pkg.version):
                    pkg.set_flags(PackageFlags.ignore)

                if re.match('[0-9a-f]*[a-f][0-9a-f]*$',
                            pkg.version) and len(pkg.version) >= 7:
                    pkg.log(
                        'ignoring version which looks like commit hash: {}'.
                        format(pkg.version),
                        severity=Logger.ERROR)
                    pkg.set_flags(PackageFlags.ignore)

                meta = packagedata['meta']

                pkg.add_homepages(meta.get('homepage'))

                if 'description' in meta:
                    pkg.set_summary(meta['description'].replace('\n', ' '))

                if 'maintainers' in meta:
                    if not isinstance(meta['maintainers'], list):
                        pkg.log('maintainers is not a list: {}'.format(
                            meta['maintainers']),
                                severity=Logger.ERROR)
                    else:
                        pkg.add_maintainers(
                            extract_nix_maintainers(meta['maintainers']))

                if 'license' in meta:
                    pkg.add_licenses(extract_nix_licenses(meta['license']))

                if 'position' in meta:
                    posfile, posline = meta['position'].rsplit(':', 1)
                    pkg.set_extra_field('posfile', posfile)
                    pkg.set_extra_field('posline', posline)

                    if posfile.startswith('pkgs/development/haskell-modules'):
                        pkg.set_flags(
                            PackageFlags.rolling
                        )  # XXX: haskell modules are autogenerated in nix: https://github.com/NixOS/nixpkgs/commits/master/pkgs/development/haskell-modules/hackage-packages.nix

                yield pkg
Exemplo n.º 11
0
    def iter_parse(self, path: str,
                   factory: PackageFactory) -> Iterable[PackageMaker]:
        # All encounered version_status values:
        # alpha, beta, developmental, historical, mature, planning, rolling, stable, testing, unknown, unstable
        _unstable_versions = {
            'alpha', 'beta', 'developmental', 'planning', 'testing', 'unstable'
        }

        num_total = 0
        num_nover = 0
        num_noneng = 0
        num_debian = 0
        num_obsolete = 0

        num_accepted = 0
        num_devel = 0

        for entry in iter_xml_elements_at_level(
                path, 1,
            ['{http://semantic-mediawiki.org/swivt/1.0#}Subject']):
            pages = _get_attrs(
                entry, '{http://semantic-mediawiki.org/swivt/1.0#}page',
                '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource')
            if not pages:
                continue

            page = _unescape(pages[0].split('/')[-1])

            with factory.begin(page) as pkg:
                label = safe_findtext(
                    entry, '{http://www.w3.org/2000/01/rdf-schema#}label')
                name = entry.findtext(
                    '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Name'
                )
                version = entry.findtext(
                    '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_identifier'
                )

                if name is None:
                    continue

                num_total += 1

                if version is None:
                    num_nover += 1
                    continue

                if entry.findtext(
                        '{http://semantic-mediawiki.org/swivt/1.0#}wikiPageContentLanguage'
                ) != 'en':
                    num_noneng += 1
                    continue

                if entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Import_source'
                ) == 'Debian':  # 'Debian import' seems OK though
                    num_debian += 1
                    continue

                if entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Decommissioned_or_Obsolete'
                ) == 'Yes':
                    num_obsolete += 1
                    continue

                if self._high_priority and entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Is_High_Priority_Project'
                ) != 'true':
                    continue

                version_status = entry.findtext(
                    '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_status'
                )

                if version_status in _unstable_versions:
                    num_devel += 1
                    pkg.set_flags(PackageFlags.DEVEL)
                elif version_status == 'rolling':
                    pkg.set_flags(PackageFlags.ROLLING)

                num_accepted += 1

                pkg.add_name(page, NameType.GENERIC_GEN_NAME)
                pkg.set_version(version)
                pkg.set_summary(
                    entry.findtext(
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Short_description'
                    ))

                pkg.add_homepages(
                    _get_attrs(
                        entry,
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Homepage_URL',
                        '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'
                    ))
                pkg.add_downloads(
                    _get_attrs(
                        entry,
                        '{http://directory.fsf.org/wiki/Special:URIResolver/Property-3A}Version_download',
                        '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'
                    ))

                pkg.set_extra_field('page', page)
                pkg.set_extra_field('name', name)
                pkg.set_extra_field('label', label)

                yield pkg

        factory.log(
            'Total software entries (with Name and Version): {}'.format(
                num_total))
        factory.log(
            'Dropped entries with no version defined: {}'.format(num_nover))
        factory.log('Dropped non-english pages: {}'.format(num_noneng))
        factory.log(
            'Dropped entries marked as Import_source=Debian: {}'.format(
                num_debian))
        factory.log(
            'Dropped entries marked as Decommissioned_or_Obsolete: {}'.format(
                num_obsolete))
        factory.log('Accepted entries: {} ({} unstable)'.format(
            num_accepted, num_devel))