def closed_mispackaged(ctx): """List mispackaged packages whose BugZilla report is closed. Exits with error code 1 if such packages are found. Use the --verbose flag to get the output pretty-printed for humans. """ data = get_data(*ctx.obj['datadirs']) results = [] for package in data['packages'].values(): if package['status'] == 'mispackaged': for link in package['links']: if link['type'] == 'bug' and link['note'].startswith('CLOSED'): results.append(package) if ctx.obj['verbose'] > 0: if results: print("\nThe following packages are both 'mispackaged' and " "their associated Bugzilla report is CLOSED:\n") for p in results: print("\t{}".format(p['name'])) print() else: print("\nThere are no packages both 'mispackaged' and " "having the associated Bugzilla report CLOSED.\n") else: for p in results: print("{}".format(p['name'])) if results: exit(1)
def naming(ctx, category): """List packages with selected naming scheme issue.""" data = get_data(*ctx.obj['datadirs']) for package in data['packages'].values(): if category == 'misnamed-subpackage' and package['is_misnamed']: print(package['name']) if category == 'ambiguous-requires' and package['unversioned_requires']: print(package['name']) if category == 'blocked' and package['blocked_requires']: print(package['name'])
def check_fti(ctx, repo, arch, results, open_bug_reports): """Check all Python 2 packages to whether they install""" data = get_data(*ctx.obj['datadirs']) rpms_srpms = pkgs_srpm(data) results = pathlib.Path(results) filtered = {} print('Querying Bugzilla...') bugz = bugzillas() sources_reported_now = set() print('Running installcheck...') for name, info in installcheck(repo, arch).items(): if name not in rpms_srpms.keys(): continue source = rpms_srpms[name] filtered[name] = info filtered[name]['source'] = source if source.startswith(('sugar-', 'gr-')): # too many broken sugars and radios to file separately continue will_file = False if source not in bugz: print(f'{source}: {name} has no bug') will_file = True elif (bugz[source].status == 'CLOSED' and bugz[source].resolution != 'EOL'): print(f'{source}: {name} has CLOSED bug: {bugz[source].id}') will_file = True if source in sources_reported_now: will_file = False if will_file and open_bug_reports: sources_reported_now.add(source) open_bz(name, **filtered[name]) results.write_text(json.dumps(filtered, indent=2)) print(f'\nResults in {results}\n' f'There are {len(filtered)} noninstallable Python 2 packages.')
def main(update, naming): excluded = set(BAD_COMMITS) tmpdir = tempfile.mkdtemp() writer = csv.DictWriter(sys.stdout, ['commit', 'date', 'status', 'num_packages'], lineterminator='\n') writer.writeheader() prev_date = None prev_commit = None if update: with open(update) as f: for row in csv.DictReader(f): excluded.add(row['commit']) prev_date = row['date'] prev_commit = row['commit'] writer.writerow(row) try: tmpclone = os.path.join(tmpdir, 'tmp_clone') tmpdata = os.path.join(tmpclone, 'data') run(['git', 'clone', '.', tmpclone]) prev_data_hash = None prev_batch = [] end_commit = HISTORY_NAMING_END_COMMIT if naming else HISTORY_END_COMMIT for commit in reversed(git_history(end=end_commit)): date = run(['git', 'log', '-n1', '--pretty=%ci', commit]).strip() if prev_date and prev_date > date: continue data_hash = run(['git', 'rev-parse', commit + ':' + 'data']) if (commit in excluded) or (data_hash == prev_data_hash): prev_data_hash = data_hash continue if prev_date and prev_date[:11] != date[:11]: prev_date = date prev_commit = commit for row in prev_batch: writer.writerow(row) elif not prev_date: prev_date = date else: prev_commit = commit print('{},{} - skipping'.format(prev_commit, prev_date), file=sys.stderr) continue prev_batch = [] # Note: we don't remove files that didn't exist in the old # version. run(['git', 'checkout', commit, '--', 'data'], cwd=tmpclone) data = get_data(tmpdata) if naming: prev_batch = get_history_naming_package_numbers( data, commit, date) else: prev_batch = get_history_package_numbers(data, commit, date) prev_data_hash = data_hash for row in prev_batch: writer.writerow(row) finally: shutil.rmtree(tmpdir) return
def check_drops(ctx, filelist, primary, cache_sax, cache_rpms): """Check packages that should be dropped from the distribution.""" data = get_data(*ctx.obj['datadirs']) cache_dir.mkdir(exist_ok=True) # Analyze filelists.xml.gz and primary.xml.gz cache_path = cache_dir / 'sax_results.json' if (cache_sax and cache_path.exists()): with cache_path.open('r') as f: results, sources = json.load(f) else: filelist = gzip.GzipFile(fileobj=filelist, mode='r') handler = SaxFilesHandler() xml.sax.parse(filelist, handler) results = handler.results primary = gzip.GzipFile(fileobj=primary, mode='r') handler = SaxPrimaryHandler() xml.sax.parse(primary, handler) sources = handler.sources with cache_path.open('w') as f: json.dump([results, sources], f) log('Packages considered: ', len(results)) # For packages with entrypoints, download the corresponding RPM entrypoint_packages = [] for name, result in results.items(): entrypoints = result.get('entrypoints') if entrypoints and not result.get('keep'): entrypoint_packages.append(name) log('Packages with interesting entrypoints: ', len(entrypoint_packages)) rpm_dl_path = cache_dir / 'rpm_cache' if rpm_dl_path.exists() and not cache_rpms: shutil.rmtree(rpm_dl_path) rpm_dl_path.mkdir(exist_ok=True) while entrypoint_packages: cp = subprocess.run( ['dnf', 'download', '--repo=rawhide', '--', *entrypoint_packages], cwd=rpm_dl_path, stdout=sys.stderr, stderr=subprocess.PIPE, universal_newlines=True, env={**os.environ, 'LANG': 'C.utf-8'}) if cp.returncode == 0: break log(cp.stderr, end='') # Error: No package python2-foo available. package = cp.stderr.splitlines()[-1].split(' ')[-2] entrypoint_packages.remove(package) # Analyze entrypoints from downloaded RPMs for rpm_path in rpm_dl_path.iterdir(): proc = subprocess.run( ['rpm', '-q', '--qf', '%{name}', '-p', rpm_path], stdout=subprocess.PIPE, check=True) name = proc.stdout.decode('utf-8') result = results.get(name) if result: for entrypoint in result.get('entrypoints'): rmp2cpio_proc = subprocess.Popen( ['rpm2cpio', rpm_path], stdout=subprocess.PIPE) cpio_proc = subprocess.run( ['cpio', '-i', '--to-stdout', '.' + entrypoint], stdout=subprocess.PIPE, stdin=rmp2cpio_proc.stdout, check=True) if rmp2cpio_proc.wait() != 0: raise Exception() config = configparser.ConfigParser() if not cpio_proc.stdout: result.setdefault('empty_entrypoints', []).append(entrypoint) result['needs_investigation'] = True result['keep'] = True continue try: config.read_string(cpio_proc.stdout.decode('utf-8')) except configparser.Error as e: result.setdefault('bad_entrypoints', {})[entrypoint] = str(e) result['needs_investigation'] = True result['keep'] = True continue handle_entrypoints(result, config) result['entrypoints_handled'] = True # Adjust "needs_investigation" for unknown files and unhandled entrypoints for name, result in results.items(): if not result.get('keep'): entrypoints = result.get('entrypoints') if result.get('entrypoints'): if not result.pop('entrypoints_handled', False): result['notes'].append('Entrypoints not handled') result['needs_investigation'] = True if result.get('filename_unknown'): result['needs_investigation'] = True # Set legacy_leaf flags for pkg in data['packages'].values(): for rpm_name, rpm in pkg['rpms'].items(): # TODO: better way to match portingdb entry to package name name = rpm_name.rsplit('-', 2)[0] result = results.get(name) if result: result['legacy_leaf'] = rpm['legacy_leaf'] # hardcoded packages # catfish is seriously mispackaged, # see https://src.fedoraproject.org/rpms/catfish/pull-request/1 if 'catfish' in results: results['catfish']['needs_investigation'] = True def keep_manually(name, reason): if name in results: results[name]['keep'] = True results[name]['notes'].append(reason) # rpkg needs to stay for 3rd party consumers keep_manually('python2-rpkg', 'rhpkg+rfpkg dependency') # https://bugzilla.redhat.com/show_bug.cgi?id=1629435 keep_manually('python2-q', 'debugging tool') for result in results.values(): if result.get('needs_investigation'): result['verdict'] = 'investigate' elif result.get('keep'): result['verdict'] = 'keep' elif result.get('legacy_leaf'): result['verdict'] = 'drop_now' else: result['verdict'] = 'drop_later' # Set sources and determine retirement action for name, result in results.items(): result['source'], *_ = (s for s, p in sources.items() if name in p) for source, pkgs in sources.items(): local_results = [r for r in results.values() if r['name'] in pkgs] if len(local_results) < len(pkgs): # subpackages we know nothing about source_verdict = 'keep' elif all(r['verdict'] == 'drop_now' for r in local_results): source_verdict = 'retire_now' elif all(r['verdict'].startswith('drop_') for r in local_results): source_verdict = 'retire_later' else: source_verdict = 'keep' for result in local_results: result['source_verdict'] = source_verdict # Output it all print(json.dumps(results, indent=2)) with open(cache_dir / ('results.json'), 'w') as f: json.dump(results, f, indent=2) with open(cache_dir / ('results-sources.json'), 'w') as f: json.dump(sources, f, indent=2) log('\nBinary packages:') stats_counter = collections.Counter(r['verdict'] for r in results.values()) for package, number in stats_counter.most_common(): log('{}: {}'.format(number, package)) for verdict in stats_counter: filtered = {n: r for n, r in results.items() if r['verdict'] == verdict} with open(cache_dir / ('results-' + verdict + '.json'), 'w') as f: json.dump(filtered, f, indent=2) with open(cache_dir / ('results-' + verdict + '.txt'), 'w') as f: for name in filtered: print(name, file=f) log('\nSource packages:') # we will loose some information here, but that is OK for stats source_results = {result['source']: result for result in results.values()} stats_counter = collections.Counter(r['source_verdict'] for r in source_results.values()) for package, number in stats_counter.most_common(): log('{}: {}'.format(number, package)) for verdict in stats_counter: if verdict == 'keep': continue filtered = {n: r for n, r in results.items() if r['source_verdict'] == verdict} with open(cache_dir / ('results-' + verdict + '-srpms.json'), 'w') as f: json.dump(filtered, f, indent=2) with open(cache_dir / ('results-' + verdict + '-srpms.txt'), 'w') as f: for name in set(r['source'] for r in filtered.values()): print(name, file=f)