Python get_data示例，portingdb.load_data.get_data Python示例

示例#1

0

显示文件

文件： cli.py 项目： angiebeatrizA/portingdb

def closed_mispackaged(ctx):
    """List mispackaged packages whose BugZilla report is closed.

    Exits with error code 1 if such packages are found.

    Use the --verbose flag to get the output pretty-printed for humans.
    """
    data = get_data(*ctx.obj['datadirs'])

    results = []
    for package in data['packages'].values():
        if package['status'] == 'mispackaged':
            for link in package['links']:
                if link['type'] == 'bug' and link['note'].startswith('CLOSED'):
                    results.append(package)

    if ctx.obj['verbose'] > 0:
        if results:
            print("\nThe following packages are both 'mispackaged' and "
                  "their associated Bugzilla report is CLOSED:\n")
            for p in results:
                print("\t{}".format(p['name']))
            print()
        else:
            print("\nThere are no packages both 'mispackaged' and "
                  "having the associated Bugzilla report CLOSED.\n")
    else:
        for p in results:
            print("{}".format(p['name']))

    if results:
        exit(1)

示例#2

0

显示文件

文件： cli.py 项目： angiebeatrizA/portingdb

def naming(ctx, category):
    """List packages with selected naming scheme issue."""
    data = get_data(*ctx.obj['datadirs'])
    for package in data['packages'].values():
        if category == 'misnamed-subpackage' and package['is_misnamed']:
            print(package['name'])
        if category == 'ambiguous-requires' and package['unversioned_requires']:
            print(package['name'])
        if category == 'blocked' and package['blocked_requires']:
            print(package['name'])

示例#3

0

显示文件

def check_fti(ctx, repo, arch, results, open_bug_reports):
    """Check all Python 2 packages to whether they install"""
    data = get_data(*ctx.obj['datadirs'])
    rpms_srpms = pkgs_srpm(data)
    results = pathlib.Path(results)
    filtered = {}
    print('Querying Bugzilla...')
    bugz = bugzillas()

    sources_reported_now = set()

    print('Running installcheck...')
    for name, info in installcheck(repo, arch).items():
        if name not in rpms_srpms.keys():
            continue
        source = rpms_srpms[name]
        filtered[name] = info
        filtered[name]['source'] = source
        if source.startswith(('sugar-', 'gr-')):
            # too many broken sugars and radios to file separately
            continue

        will_file = False

        if source not in bugz:
            print(f'{source}: {name} has no bug')
            will_file = True
        elif (bugz[source].status == 'CLOSED'
              and bugz[source].resolution != 'EOL'):
            print(f'{source}: {name} has CLOSED bug: {bugz[source].id}')
            will_file = True

        if source in sources_reported_now:
            will_file = False

        if will_file and open_bug_reports:
            sources_reported_now.add(source)
            open_bz(name, **filtered[name])

    results.write_text(json.dumps(filtered, indent=2))

    print(f'\nResults in {results}\n'
          f'There are {len(filtered)} noninstallable Python 2 packages.')

示例#4

0

显示文件

文件： get-history.py 项目： hobbes1069/portingdb

def main(update, naming):
    excluded = set(BAD_COMMITS)
    tmpdir = tempfile.mkdtemp()
    writer = csv.DictWriter(sys.stdout,
                            ['commit', 'date', 'status', 'num_packages'],
                            lineterminator='\n')
    writer.writeheader()

    prev_date = None
    prev_commit = None
    if update:
        with open(update) as f:
            for row in csv.DictReader(f):
                excluded.add(row['commit'])
                prev_date = row['date']
                prev_commit = row['commit']
                writer.writerow(row)

    try:
        tmpclone = os.path.join(tmpdir, 'tmp_clone')
        tmpdata = os.path.join(tmpclone, 'data')
        run(['git', 'clone', '.', tmpclone])
        prev_data_hash = None
        prev_batch = []

        end_commit = HISTORY_NAMING_END_COMMIT if naming else HISTORY_END_COMMIT
        for commit in reversed(git_history(end=end_commit)):
            date = run(['git', 'log', '-n1', '--pretty=%ci', commit]).strip()
            if prev_date and prev_date > date:
                continue
            data_hash = run(['git', 'rev-parse', commit + ':' + 'data'])
            if (commit in excluded) or (data_hash == prev_data_hash):
                prev_data_hash = data_hash
                continue
            if prev_date and prev_date[:11] != date[:11]:
                prev_date = date
                prev_commit = commit
                for row in prev_batch:
                    writer.writerow(row)
            elif not prev_date:
                prev_date = date
            else:
                prev_commit = commit
                print('{},{} - skipping'.format(prev_commit, prev_date),
                      file=sys.stderr)
                continue
            prev_batch = []

            # Note: we don't remove files that didn't exist in the old
            # version.
            run(['git', 'checkout', commit, '--', 'data'], cwd=tmpclone)

            data = get_data(tmpdata)

            if naming:
                prev_batch = get_history_naming_package_numbers(
                    data, commit, date)
            else:
                prev_batch = get_history_package_numbers(data, commit, date)

            prev_data_hash = data_hash
        for row in prev_batch:
            writer.writerow(row)
    finally:
        shutil.rmtree(tmpdir)
    return

示例#5

0

显示文件

def check_drops(ctx, filelist, primary, cache_sax, cache_rpms):
    """Check packages that should be dropped from the distribution."""
    data = get_data(*ctx.obj['datadirs'])

    cache_dir.mkdir(exist_ok=True)

    # Analyze filelists.xml.gz and primary.xml.gz

    cache_path = cache_dir / 'sax_results.json'

    if (cache_sax and cache_path.exists()):
        with cache_path.open('r') as f:
            results, sources = json.load(f)
    else:
        filelist = gzip.GzipFile(fileobj=filelist, mode='r')

        handler = SaxFilesHandler()
        xml.sax.parse(filelist, handler)

        results = handler.results

        primary = gzip.GzipFile(fileobj=primary, mode='r')

        handler = SaxPrimaryHandler()
        xml.sax.parse(primary, handler)

        sources = handler.sources

        with cache_path.open('w') as f:
            json.dump([results, sources], f)

    log('Packages considered: ', len(results))

    # For packages with entrypoints, download the corresponding RPM

    entrypoint_packages = []
    for name, result in results.items():
        entrypoints = result.get('entrypoints')
        if entrypoints and not result.get('keep'):
            entrypoint_packages.append(name)

    log('Packages with interesting entrypoints: ', len(entrypoint_packages))

    rpm_dl_path = cache_dir / 'rpm_cache'
    if rpm_dl_path.exists() and not cache_rpms:
        shutil.rmtree(rpm_dl_path)
    rpm_dl_path.mkdir(exist_ok=True)

    while entrypoint_packages:
        cp = subprocess.run(
            ['dnf', 'download', '--repo=rawhide', '--',
             *entrypoint_packages],
            cwd=rpm_dl_path,
            stdout=sys.stderr,
            stderr=subprocess.PIPE,
            universal_newlines=True,
            env={**os.environ, 'LANG': 'C.utf-8'})
        if cp.returncode == 0:
            break
        log(cp.stderr, end='')
        # Error: No package python2-foo available.
        package = cp.stderr.splitlines()[-1].split(' ')[-2]
        entrypoint_packages.remove(package)

    # Analyze entrypoints from downloaded RPMs

    for rpm_path in rpm_dl_path.iterdir():
        proc = subprocess.run(
            ['rpm', '-q', '--qf', '%{name}', '-p', rpm_path],
            stdout=subprocess.PIPE,
            check=True)
        name = proc.stdout.decode('utf-8')
        result = results.get(name)
        if result:
            for entrypoint in result.get('entrypoints'):
                rmp2cpio_proc = subprocess.Popen(
                    ['rpm2cpio', rpm_path],
                    stdout=subprocess.PIPE)
                cpio_proc = subprocess.run(
                    ['cpio', '-i', '--to-stdout', '.' + entrypoint],
                    stdout=subprocess.PIPE,
                    stdin=rmp2cpio_proc.stdout,
                    check=True)
                if rmp2cpio_proc.wait() != 0:
                    raise Exception()
                config = configparser.ConfigParser()
                if not cpio_proc.stdout:
                    result.setdefault('empty_entrypoints', []).append(entrypoint)
                    result['needs_investigation'] = True
                    result['keep'] = True
                    continue
                try:
                    config.read_string(cpio_proc.stdout.decode('utf-8'))
                except configparser.Error as e:
                    result.setdefault('bad_entrypoints', {})[entrypoint] = str(e)
                    result['needs_investigation'] = True
                    result['keep'] = True
                    continue
                handle_entrypoints(result, config)
                result['entrypoints_handled'] = True

    # Adjust "needs_investigation" for unknown files and unhandled entrypoints

    for name, result in results.items():
        if not result.get('keep'):
            entrypoints = result.get('entrypoints')
            if result.get('entrypoints'):
                if not result.pop('entrypoints_handled', False):
                    result['notes'].append('Entrypoints not handled')
                    result['needs_investigation'] = True
            if result.get('filename_unknown'):
                result['needs_investigation'] = True

    # Set legacy_leaf flags

    for pkg in data['packages'].values():
        for rpm_name, rpm in pkg['rpms'].items():
            # TODO: better way to match portingdb entry to package name
            name = rpm_name.rsplit('-', 2)[0]
            result = results.get(name)
            if result:
                result['legacy_leaf'] = rpm['legacy_leaf']

    # hardcoded packages

    # catfish is seriously mispackaged,
    # see https://src.fedoraproject.org/rpms/catfish/pull-request/1
    if 'catfish' in results:
        results['catfish']['needs_investigation'] = True

    def keep_manually(name, reason):
        if name in results:
            results[name]['keep'] = True
            results[name]['notes'].append(reason)

    # rpkg needs to stay for 3rd party consumers
    keep_manually('python2-rpkg', 'rhpkg+rfpkg dependency')

    # https://bugzilla.redhat.com/show_bug.cgi?id=1629435
    keep_manually('python2-q', 'debugging tool')

    for result in results.values():
        if result.get('needs_investigation'):
            result['verdict'] = 'investigate'
        elif result.get('keep'):
            result['verdict'] = 'keep'
        elif result.get('legacy_leaf'):
            result['verdict'] = 'drop_now'
        else:
            result['verdict'] = 'drop_later'

    # Set sources and determine retirement action
    for name, result in results.items():
        result['source'], *_ = (s for s, p in sources.items() if name in p)
    for source, pkgs in sources.items():
        local_results = [r for r in results.values() if r['name'] in pkgs]
        if len(local_results) < len(pkgs):
            # subpackages we know nothing about
            source_verdict = 'keep'
        elif all(r['verdict'] == 'drop_now' for r in local_results):
            source_verdict = 'retire_now'
        elif all(r['verdict'].startswith('drop_') for r in local_results):
            source_verdict = 'retire_later'
        else:
            source_verdict = 'keep'

        for result in local_results:
            result['source_verdict'] = source_verdict

    # Output it all

    print(json.dumps(results, indent=2))

    with open(cache_dir / ('results.json'), 'w') as f:
        json.dump(results, f, indent=2)

    with open(cache_dir / ('results-sources.json'), 'w') as f:
        json.dump(sources, f, indent=2)

    log('\nBinary packages:')
    stats_counter = collections.Counter(r['verdict'] for r in results.values())
    for package, number in stats_counter.most_common():
        log('{}: {}'.format(number, package))

    for verdict in stats_counter:
        filtered = {n: r for n, r in results.items() if r['verdict'] == verdict}
        with open(cache_dir / ('results-' + verdict + '.json'), 'w') as f:
            json.dump(filtered, f, indent=2)
        with open(cache_dir / ('results-' + verdict + '.txt'), 'w') as f:
            for name in filtered:
                print(name, file=f)

    log('\nSource packages:')
    # we will loose some information here, but that is OK for stats
    source_results = {result['source']: result for result in results.values()}
    stats_counter = collections.Counter(r['source_verdict'] for r in source_results.values())
    for package, number in stats_counter.most_common():
        log('{}: {}'.format(number, package))

    for verdict in stats_counter:
        if verdict == 'keep':
            continue
        filtered = {n: r for n, r in results.items() if r['source_verdict'] == verdict}
        with open(cache_dir / ('results-' + verdict + '-srpms.json'), 'w') as f:
            json.dump(filtered, f, indent=2)
        with open(cache_dir / ('results-' + verdict + '-srpms.txt'), 'w') as f:
            for name in set(r['source'] for r in filtered.values()):
                print(name, file=f)