示例#1
0
    def Parse(self, path):
        result = []
        skipped_archs = {}

        for entry in self.ParsePackagesEntriesFromXml(path):
            pkg = Package()

            arch = entry.find(
                '{http://linux.duke.edu/metadata/common}arch').text
            if self.allowed_archs and arch not in self.allowed_archs:
                skipped_archs[arch] = skipped_archs.get(arch, 0) + 1
                continue

            pkg.name = entry.find(
                '{http://linux.duke.edu/metadata/common}name').text
            version = entry.find(
                '{http://linux.duke.edu/metadata/common}version').attrib['ver']
            pkg.version, pkg.origversion = SanitizeVersion(version)
            pkg.comment = entry.find(
                '{http://linux.duke.edu/metadata/common}summary').text
            pkg.homepage = entry.find(
                '{http://linux.duke.edu/metadata/common}url').text
            pkg.category = entry.find(
                '{http://linux.duke.edu/metadata/common}format/'
                '{http://linux.duke.edu/metadata/rpm}group').text
            pkg.licenses.append(
                entry.find('{http://linux.duke.edu/metadata/common}format/'
                           '{http://linux.duke.edu/metadata/rpm}license').text)

            packager = entry.find(
                '{http://linux.duke.edu/metadata/common}packager').text
            if packager:
                pkg.maintainers = GetMaintainers(packager)

            result.append(pkg)

        for arch, numpackages in sorted(skipped_archs.items()):
            print(
                'WARNING: skipping {} packages(s) with disallowed architecture {}'
                .format(numpackages, arch),
                file=sys.stderr)

        return result
示例#2
0
    def Parse(self, path):
        result = []

        with subprocess.Popen([self.helperpath, path],
                              errors='ignore',
                              stdout=subprocess.PIPE,
                              universal_newlines=True) as proc:
            for line in proc.stdout:
                fields = line.strip().split('|')

                pkg = Package()

                pkg.name = fields[0]
                pkg.version = fields[1]
                pkg.maintainers = GetMaintainers(
                    fields[2])  # XXX: may have multiple maintainers
                pkg.category = fields[3]
                pkg.comment = fields[4]

                result.append(pkg)

        return result
示例#3
0
    def Parse(self, path):
        result = []

        with open(path, encoding='utf-8') as indexfile:
            for line in indexfile:
                fields = line.strip().split('|')
                if len(fields) != 12:
                    print(
                        'WARNING: package {} skipped, incorrect number of fields in INDEX'
                        .format(fields[0]),
                        file=sys.stderr)
                    continue
                if not fields[0]:
                    print('WARNING: line {} bogus, critical fields are empty'.
                          format(line.strip()),
                          file=sys.stderr)
                    continue

                pkg = Package()

                pkg.name, version = SplitPackageNameVersion(fields[0])
                pkg.version, pkg.origversion = SanitizeVersion(version)
                pkg.comment = fields[3]
                if fields[11]:
                    pkg.homepage = fields[11]

                # sometimes OWNER variable is used in which case
                # there's no MAINTAINER OWNER doesn't get to INDEX
                pkg.maintainers = GetMaintainers(fields[5])

                pkg.category = fields[6].split(' ')[0]

                pkg.extrafields['portname'] = fields[1].split('/')[-1]
                pkg.extrafields['origin'] = fields[1]

                result.append(pkg)

        return result
示例#4
0
    def Parse(self, path):
        result = []

        with open(path, encoding='utf-8') as file:
            reader = csv.reader(file, delimiter='|')
            for row in reader:
                pkg = Package()

                pkgname = row[0]

                # cut away string suffixws which come after version
                match = re.match('(.*?)(-[a-z_]+[0-9]*)+$', pkgname)
                if match is not None:
                    pkgname = match.group(1)

                pkg.name, version = SplitPackageNameVersion(pkgname)
                pkg.version, pkg.origversion = SanitizeVersion(version)
                pkg.comment = row[3]
                pkg.maintainers = GetMaintainers(row[5])
                pkg.category = row[6].split(' ')[0].strip()

                result.append(pkg)

        return result
示例#5
0
    def Parse(self, path):
        result = []

        with open(path, encoding='utf-8') as indexfile:
            for line in indexfile:
                fields = line.strip().split('|')
                if len(fields) != 13:
                    print('WARNING: package {} skipped, incorrect number of fields in INDEX'.format(fields[0]), file=sys.stderr)
                    continue

                pkg = Package()

                pkg.name, version = SplitPackageNameVersion(fields[0])
                pkg.version, pkg.origversion = SanitizeVersion(version)
                pkg.comment = fields[3]
                pkg.maintainers = GetMaintainers(fields[5])
                pkg.category = fields[6].split(' ')[0]

                if fields[9]:
                    pkg.homepage = fields[9]

                result.append(pkg)

        return result
示例#6
0
    def Parse(self, path):
        result = []

        with open(path, 'r', encoding='utf-8') as jsonfile:
            for key, packagedata in sorted(json.load(jsonfile)['packages'].items()):
                # see how Nix parses 'derivative' names in
                # https://github.com/NixOS src/libexpr/names.cc, DrvName::DrvName
                # it just splits on dash followed by non-letter
                #
                # this doesn't work well on 100% cases, it's an upstream problem
                match = re.match('(.+?)-([^a-zA-Z].*)$', packagedata['name'])
                if not match:
                    print('cannot extract version: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    continue

                pkg = Package()
                pkg.name = match.group(1)
                pkg.version = match.group(2)

                # some exceptions
                for prefix in ('75dpi', '100dpi'):
                    if pkg.version.startswith(prefix):
                        pkg.name += '-' + prefix
                        pkg.version = pkg.version[len(prefix) + 1:]

                for pkgname in ('liblqr', ):
                    if pkg.name == pkgname:
                        dashpos = pkg.version.find('-')
                        pkg.name = pkg.name + '-' + pkg.version[0:dashpos]
                        pkg.version = pkg.version[dashpos + 1:]

                if pkg.name.endswith('-git'):
                    pkg.name = pkg.name[:-4]
                    print('ignoring version for git snapshot: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    pkg.ignoreversion = True

                if re.match('.*20[0-9]{2}-[0-9]{2}-[0-9]{2}', pkg.version):
                    print('ignoring version which is a date: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    pkg.ignoreversion = True

                if re.match('[0-9a-f]*[a-f][0-9a-f]*$', pkg.version) and len(pkg.version) >= 7:
                    print('ignoring version which looks like commit hash: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    pkg.ignoreversion = True

                meta = packagedata['meta']

                if 'homepage' in meta:
                    pkg.homepage = meta['homepage']
                    if isinstance(pkg.homepage, list):  # XXX: remove after adding support for homepages array
                        pkg.homepage = pkg.homepage[0]

                if 'description' in meta:
                    pkg.comment = meta['description']

                if 'maintainers' in meta:
                    maintainers = meta['maintainers']
                    if not isinstance(meta['maintainers'], list):
                        print('maintainers is not a list: {}/{}'.format(key, packagedata['name']), file=sys.stderr)
                    else:
                        maintainers = ', '.join(maintainers)
                    pkg.maintainers = GetMaintainers(maintainers)

                if 'license' in meta:
                    pkg.licenses = ExtractLicenses(meta['license'])

                result.append(pkg)

        return result
示例#7
0
    def Parse(self, path):
        result = []

        for category in os.listdir(path):
            if category.startswith('.'):
                continue

            category_path = os.path.join(path, category)
            if not os.path.isdir(category_path):
                continue

            for package in os.listdir(category_path):
                package_path = os.path.join(category_path, package)
                if not os.path.isdir(package_path):
                    continue

                info_path = os.path.join(category_path, package,
                                         package + '.info')
                if not os.path.isfile(info_path):
                    print('WARNING: {} does not exist, package skipped'.format(
                        info_path),
                          file=sys.stderr)
                    continue

                with open(info_path, encoding='utf-8',
                          errors='ignore') as infofile:
                    variables = {}

                    key = None
                    total_value = []

                    for line in infofile:
                        line = line.strip()
                        if not line:
                            continue

                        value = None
                        if key:  # continued
                            value = line
                        else:  # new variable
                            key, value = line.split('=', 1)
                            value = value.lstrip('"').lstrip()

                        if value.endswith('\\'):  # will continue
                            total_value.append(value.rstrip('\\').rstrip())
                        elif value.endswith('"'):
                            total_value.append(value.rstrip('"').rstrip())
                            variables[key] = ' '.join(total_value)
                            key = None
                            total_value = []

                    pkg = Package()
                    pkg.category = category

                    pkg.name = variables['PRGNAM']
                    pkg.version = variables['VERSION']
                    pkg.homepage = variables['HOMEPAGE']
                    pkg.maintainers = GetMaintainers(variables['EMAIL'])
                    pkg.downloads = variables['DOWNLOAD'].split()

                    if pkg.name is not None and pkg.version is not None:
                        result.append(pkg)
                    else:
                        print(
                            'WARNING: {} skipped, likely due to parsing problems'
                            .format(info_path),
                            file=sys.stderr)

        return result
示例#8
0
文件: cpan.py 项目: mojca/repology
    def Parse(self, path):
        # Since data we get from CPAN is somewhat lacking, we need
        # somewhat complex parsing. Here's the example of what we get
        # in 02packages.details.txt package index downloaded from CPAN:
        #
        # Acme::constant                 0.001003  G/GL/GLITCHMR/Acme-constant-0.1.3.tar.gz
        # Acme::Constructor::Pythonic       0.002  T/TO/TOBYINK/Acme-Constructor-Pythonic-0.002.tar.gz
        # Acme::Continent                   undef  P/PE/PERIGRIN/XML-Toolkit-0.15.tar.gz
        #
        # 1. Module version (second column) does not always correspond
        #    to package version (which we need), so we need to parse
        #    package filename. The version may also be undefined.
        # 2. All package modules are listed, and we don't need them
        #    (which is not the problem as CPAN repo is shadow anyway)
        #
        # So we do out best to parse filename into package name and
        # actual version, and filter entries where module name is
        # equal to package name. Some entries are lost, some entries
        # are not even in 02packages.details.txt, some are unparsable
        # (no version, or garbage in version) but these are negligible.
        result = []

        with open(path) as packagesfile:
            parsing = False
            for line in packagesfile:
                line = line.strip()

                if not parsing:
                    if line == '':
                        parsing = True
                    continue

                module, version, package = re.split(r'[ \t]+', line)

                package_path, package_file = package.rsplit('/', 1)
                package_name = None

                if package_file.endswith('.tar.gz'):
                    package_name = package_file[0:-7]
                elif package_file.endswith('.tar.bz2'):
                    package_name = package_file[0:-8]
                elif package_file.endswith('.zip') or package_file.endswith(
                        '.tgz'):
                    package_name = package_file[0:-4]

                if package_name is None or package_name.find('-') == -1:
                    # Bad package name; XXX: log?
                    continue

                package_name, package_version = SplitPackageNameVersion(
                    package_name)
                if package_version.startswith(
                        'v') or package_version.startswith('V'):
                    package_version = package_version[1:]

                if not re.match('[0-9]', package_version):
                    # Bad version; XXX: log?
                    continue

                if module.replace('::', '-') != package_name:
                    # Submodules not really needed
                    continue

                pkg = Package()
                pkg.name = package_name
                pkg.version = package_version

                pkg.maintainers = GetMaintainers(
                    package_path.split('/')[2].lower() + '@cpan')
                pkg.homepage = 'http://search.cpan.org/dist/' + package_name + '/'

                result.append(pkg)

        return result
示例#9
0
    def Parse(self, path):
        result = []

        for category in os.listdir(path):
            category_path = os.path.join(path, category)
            if not os.path.isdir(category_path):
                continue
            if category == 'virtual' or category == 'metadata':
                continue

            for package in os.listdir(category_path):
                package_path = os.path.join(category_path, package)
                if not os.path.isdir(package_path):
                    continue

                metadata_path = os.path.join(package_path, 'metadata.xml')

                # parse maintainers from metadata.xml
                # these are the same for all ebuilds for current package
                maintainers = []
                if os.path.isfile(metadata_path):
                    with open(metadata_path, 'r',
                              encoding='utf-8') as metafile:
                        meta = xml.etree.ElementTree.parse(metafile)

                        for entry in meta.findall('maintainer'):
                            email_node = entry.find('email')

                            if email_node is not None and email_node.text is not None:
                                maintainers += extract_maintainers(
                                    email_node.text)

                for ebuild in os.listdir(package_path):
                    if not ebuild.endswith('.ebuild'):
                        continue

                    pkg = Package()

                    pkg.name = package
                    pkg.category = category
                    pkg.maintainers = maintainers

                    pkg.version, pkg.origversion = SanitizeVersion(
                        ebuild[len(package) + 1:-7])

                    if pkg.version.endswith('9999'):
                        # ignore versions for snapshots
                        pkg.SetFlag(PackageFlags.rolling)

                    metadata_path = os.path.join(
                        path, 'metadata', 'md5-cache', category,
                        package + '-' +
                        (pkg.origversion if pkg.origversion else pkg.version))
                    if os.path.isfile(metadata_path):
                        with open(metadata_path, 'r',
                                  encoding='utf-8') as metadata_file:
                            for line in metadata_file:
                                line = line.strip()
                                key, value = line.split('=', 1)

                                if key == 'DESCRIPTION':
                                    pkg.comment = value
                                elif key == 'HOMEPAGE':
                                    pkg.homepage = value.split(' ')[
                                        0]  # XXX: save all urls
                                elif key == 'LICENSE':
                                    if '(' in value:
                                        # XXX: conditionals and OR's: need more
                                        # complex parsing and backend support
                                        pkg.licenses.append(value)
                                    else:
                                        pkg.licenses += value.split(' ')
                                elif key == 'SRC_URI':
                                    pkg.downloads += ParseConditionalExpr(
                                        value)

                    result.append(pkg)

        return result
示例#10
0
    def Parse(self, path):
        result = []

        for category in os.listdir(path):
            category_path = os.path.join(path, category)
            if not os.path.isdir(category_path):
                continue
            if category == 'virtual' or category == 'metadata':
                continue

            for package in os.listdir(category_path):
                package_path = os.path.join(category_path, package)
                if not os.path.isdir(package_path):
                    continue

                metadata_path = os.path.join(package_path, 'metadata.xml')

                pkg = Package()

                if os.path.isfile(metadata_path):
                    with open(os.path.join(package_path, 'metadata.xml'),
                              'r',
                              encoding='utf-8') as metafile:
                        meta = xml.etree.ElementTree.parse(metafile)

                        for entry in meta.findall('maintainer'):
                            email_node = entry.find('email')

                            if email_node is not None and email_node.text is not None:
                                pkg.maintainers += GetMaintainers(
                                    email_node.text)

                maxorigversion = None
                maxversion = None
                for ebuild in os.listdir(package_path):
                    if not ebuild.endswith('.ebuild'):
                        continue

                    version, origversion = SanitizeVersion(
                        ebuild[len(package) + 1:-7])

                    if IsBetterVersion(version, maxversion):
                        maxorigversion = origversion
                        maxversion = version

                if maxversion is not None:
                    pkg.name = package
                    pkg.version = maxversion
                    pkg.origversion = maxorigversion
                    pkg.category = category

                    if not pkg.maintainers:
                        # If we have no maintainer, assign Gentoo's default maintainer value
                        # See https://wiki.gentoo.org/wiki/GLEP:67#Bug_assignment
                        pkg.maintainers = ['*****@*****.**']

                    metadata_path = os.path.join(
                        path, 'metadata', 'md5-cache', category,
                        package + '-' +
                        (maxorigversion if maxorigversion else maxversion))
                    if os.path.isfile(metadata_path):
                        with open(metadata_path, 'r',
                                  encoding='utf-8') as metadata_file:
                            for line in metadata_file:
                                line = line.strip()
                                key, value = line.split('=', 1)

                                if key == 'DESCRIPTION':
                                    pkg.comment = value
                                elif key == 'HOMEPAGE':
                                    pkg.homepage = value.split(' ')[
                                        0]  # XXX: save all urls
                                elif key == 'LICENSE':
                                    if value.find('(') != -1:
                                        # XXX: conditionals and OR's: need more
                                        # complex parsing and backend support
                                        pkg.licenses.append(value)
                                    else:
                                        pkg.licenses += value.split(' ')
                                elif key == 'SRC_URI':
                                    pkg.downloads += ParseConditionalExpr(
                                        value)

                    result.append(pkg)

        return result