def garbageCollects(self): GC_PACKAGE = ("ocaml-curses", "1.0.3-1") PKG_SUITE = "squeeze" PKG_AREA = "main" # make fresh copies of sources/ and mirror dir orig_sources = TEST_DATA_DIR / "sources" orig_mirror = TEST_DATA_DIR / "mirror" new_sources = self.tmpdir / "sources2" new_mirror = self.tmpdir / "mirror2" shutil.copytree(orig_sources, new_sources) shutil.copytree(orig_mirror, new_mirror) self.conf["mirror_dir"] = new_mirror self.conf["sources_dir"] = new_sources pkgdir = (new_sources / PKG_AREA / GC_PACKAGE[0][0] / GC_PACKAGE[0] / GC_PACKAGE[1]) src_index = (new_mirror / "dists" / PKG_SUITE / PKG_AREA / "source" / "Sources.gz") # rm package to be GC'd from mirror (actually, remove everything...) with open(src_index, "w") as f: f.truncate() # update run that should not GC, due to timestamp os.utime(pkgdir, None) self.conf["expire_days"] = 3 self.do_update() self.assertTrue( pkgdir.exists(), "young gone package %s/%s disappeared from FS storage" % GC_PACKAGE, ) self.assertTrue( db_storage.lookup_package(self.session, *GC_PACKAGE), "young gone package %s/%s disappeared from DB storage" % GC_PACKAGE, ) # another update run without grace period, package should go self.conf["expire_days"] = 0 self.do_update() self.assertFalse( pkgdir.exists(), "gone package %s/%s persisted in FS storage" % GC_PACKAGE) self.assertFalse( db_storage.lookup_package(self.session, *GC_PACKAGE), "gone package %s/%s persisted in DB storage" % GC_PACKAGE, )
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) metric_type = 'size' metric_value = None metricsfile = metricsfile_path(pkgdir) metricsfile_tmp = metricsfile + '.new' if 'hooks.fs' in conf['backends']: if not os.path.exists(metricsfile): # run du only if needed cmd = ['du', '--summarize', pkgdir] metric_value = int(subprocess.check_output(cmd).split()[0]) with open(metricsfile_tmp, 'w') as out: out.write('%s\t%d\n' % (metric_type, metric_value)) os.rename(metricsfile_tmp, metricsfile) if 'hooks.db' in conf['backends']: if metric_value is None: # hooks.db is enabled but hooks.fs is not, so we don't have a # metric_value handy. Parse it from metrics file, hoping it exists # from previous runs... metric_value = parse_metrics(metricsfile)[metric_type] db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) metric = session.query(Metric) \ .filter_by(package_id=db_package.id, metric=metric_type, value=metric_value) \ .first() if not metric: metric = Metric(db_package, metric_type, metric_value) session.add(metric)
def _rm_package(pkg, conf, session, db_package=None): """remove package `pkg` from both FS and DB storage, and notify plugins handles and logs exceptions """ logging.info("remove %s..." % pkg) pkgdir = pkg.extraction_dir(conf['sources_dir']) if not db_package: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) if not db_package: logging.warn('cannot find package %s, not removing' % pkg) return try: if not conf['dry_run'] and 'hooks' in conf['backends']: notify(conf, 'rm-package', session, pkg, pkgdir) if not conf['dry_run'] and 'fs' in conf['backends']: fs_storage.remove_package(pkg, pkgdir) if not conf['dry_run'] and 'db' in conf['backends']: if not conf['single_transaction']: with session.begin(): db_storage.rm_package(session, pkg, db_package) else: with session.begin_nested(): db_storage.rm_package(session, pkg, db_package) except: logging.exception('failed to remove %s' % pkg)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) sumsfile = sums_path(pkgdir) sumsfile_tmp = sumsfile + '.new' def emit_checksum(out, relpath, abspath): if os.path.islink(abspath) or not os.path.isfile(abspath): # Do not checksum symlinks, if they are not dangling / external we # will checksum their target anyhow. Do not check special files # either; they shouldn't be there per policy, but they might be # (and they are in old releases) return sha256 = hashutil.sha256sum(abspath) out.write('%s %s\n' % (sha256, relpath)) if 'hooks.fs' in conf['backends']: if not os.path.exists(sumsfile): # compute checksums only if needed with open(sumsfile_tmp, 'w') as out: for (relpath, abspath) in \ fs_storage.walk_pkg_files(pkgdir, file_table): emit_checksum(out, relpath, abspath) os.rename(sumsfile_tmp, sumsfile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) insert_q = sql.insert(Checksum.__table__) insert_params = [] if not session.query(Checksum) \ .filter_by(package_id=db_package.id) \ .first(): # ASSUMPTION: if *a* checksum of this package has already # been added to the db in the past, then *all* of them have, # as additions are part of the same transaction for (sha256, relpath) in parse_checksums(sumsfile): params = {'package_id': db_package.id, 'sha256': sha256} if file_table: try: file_id = file_table[relpath] params['file_id'] = file_id except KeyError: continue else: file_ = session.query(File) \ .filter_by(package_id=db_package.id, path=relpath) \ .first() if not file_: continue params['file_id'] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # source packages shouldn't be empty but... session.execute(insert_q, insert_params) session.flush()
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) metric_type = "size" metric_value = None metricsfile = metricsfile_path(pkgdir) metricsfile_tmp = Path(str(metricsfile) + ".new") if "hooks.fs" in conf["backends"]: if not metricsfile.exists(): # run du only if needed cmd = ["du", "--summarize", pkgdir] metric_value = int(subprocess.check_output(cmd).split()[0]) with open(metricsfile_tmp, "w") as out: out.write("%s\t%d\n" % (metric_type, metric_value)) os.rename(metricsfile_tmp, metricsfile) if "hooks.db" in conf["backends"]: if metric_value is None: # hooks.db is enabled but hooks.fs is not, so we don't have a # metric_value handy. Parse it from metrics file, hoping it exists # from previous runs... metric_value = parse_metrics(metricsfile)[metric_type] db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) metric = ( session.query(Metric) .filter_by(package_id=db_package.id, metric=metric_type, value=metric_value) .first() ) if not metric: metric = Metric(db_package, metric_type, metric_value) session.add(metric)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) slocfile = slocfile_path(pkgdir) slocfile_tmp = slocfile + '.new' if 'hooks.fs' in conf['backends']: if not os.path.exists(slocfile): # run sloccount only if needed try: cmd = ['sloccount'] + SLOCCOUNT_FLAGS + [pkgdir] with open(slocfile_tmp, 'w') as out: subprocess.check_call(cmd, stdout=out, stderr=subprocess.STDOUT) except subprocess.CalledProcessError: if not grep(['^SLOC total is zero,', slocfile_tmp]): # rationale: sloccount fails when it can't find source code raise finally: os.rename(slocfile_tmp, slocfile) if 'hooks.db' in conf['backends']: slocs = parse_sloccount(slocfile) db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) if not session.query(SlocCount).filter_by(package_id=db_package.id)\ .first(): # ASSUMPTION: if *a* loc count of this package has already been # added to the db in the past, then *all* of them have, as # additions are part of the same transaction for (lang, locs) in six.iteritems(slocs): sloccount = SlocCount(db_package, lang, locs) session.add(sloccount)
def _rm_package(pkg, conf, session, db_package=None): """remove package `pkg` from both FS and DB storage, and notify plugins handles and logs exceptions """ logging.info("remove %s..." % pkg) pkgdir = pkg.extraction_dir(conf["sources_dir"]) if not db_package: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) if not db_package: logging.warn("cannot find package %s, not removing" % pkg) return try: if not conf["dry_run"] and "hooks" in conf["backends"]: notify(conf, "rm-package", session, pkg, pkgdir) if not conf["dry_run"] and "fs" in conf["backends"]: fs_storage.remove_package(pkg, pkgdir) if not conf["dry_run"] and "db" in conf["backends"]: if not conf["single_transaction"]: with session.begin(): db_storage.rm_package(session, pkg, db_package) else: with session.begin_nested(): db_storage.rm_package(session, pkg, db_package) except Exception: logging.exception("failed to remove %s" % pkg)
def add_package(pkg): if is_excluded_package(pkg, conf['exclude']): logging.info('skipping excluded package %s' % pkg) return if not db_storage.lookup_package(session, pkg['package'], pkg['version']): # use DB as completion marker: if the package has been inserted, it # means everything went fine last time we tried. If not, we redo # everything, just to be safe _add_package(pkg, conf, session) pkgdir = pkg.extraction_dir(conf['sources_dir']) if conf['force_triggers']: try: notify_plugins(conf['observers'], 'add-package', session, pkg, pkgdir, triggers=conf['force_triggers'], dry=conf['dry_run']) except: logging.exception('trigger failure on %s' % pkg) # add entry for sources.txt, temporarily with no suite associated pkg_id = (pkg['package'], pkg['version']) dsc_rel = os.path.relpath(pkg.dsc_path(), conf['mirror_dir']) pkgdir_rel = os.path.relpath(pkg.extraction_dir(conf['sources_dir']), conf['sources_dir']) status.sources[pkg_id] = pkg.archive_area(), dsc_rel, pkgdir_rel, []
def add_package(pkg): if is_excluded_package(pkg, conf["exclude"]): logging.info("skipping excluded package %s" % pkg) return if not db_storage.lookup_package(session, pkg["package"], pkg["version"]): # use DB as completion marker: if the package has been inserted, it # means everything went fine last time we tried. If not, we redo # everything, just to be safe _add_package(pkg, conf, session) pkgdir = pkg.extraction_dir(conf["sources_dir"]) if conf["force_triggers"]: try: notify_plugins( conf["observers"], "add-package", session, pkg, pkgdir, triggers=conf["force_triggers"], dry=conf["dry_run"], ) except Exception: logging.exception("trigger failure on %s" % pkg) # add entry for sources.txt, temporarily with no suite associated pkg_id = (pkg["package"], pkg["version"]) dsc_rel = pkg.dsc_path().relative_to(conf["mirror_dir"]) pkgdir_rel = pkg.extraction_dir(conf["sources_dir"]).relative_to( conf["sources_dir"]) status.sources[pkg_id] = pkg.archive_area(), dsc_rel, pkgdir_rel, []
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) slocfile = slocfile_path(pkgdir) slocfile_tmp = Path(str(slocfile) + ".new") if "hooks.fs" in conf["backends"]: if not slocfile.exists(): # run sloccount only if needed try: cmd = ["sloccount"] + SLOCCOUNT_FLAGS + [pkgdir] with open(slocfile_tmp, "w") as out: subprocess.check_call(cmd, stdout=out, stderr=subprocess.STDOUT) except subprocess.CalledProcessError: if not grep(["^SLOC total is zero,", slocfile_tmp]): # rationale: sloccount fails when it can't find source code raise finally: os.rename(slocfile_tmp, slocfile) if "hooks.db" in conf["backends"]: slocs = parse_sloccount(slocfile) db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) if not session.query(SlocCount).filter_by(package_id=db_package.id).first(): # ASSUMPTION: if *a* loc count of this package has already been # added to the db in the past, then *all* of them have, as # additions are part of the same transaction for (lang, locs) in slocs.items(): sloccount = SlocCount(db_package, lang, locs) session.add(sloccount)
def update_suites(status, conf, session, mirror): """update stage: sweep and recreate suite mappings """ logging.info('update suites mappings...') insert_q = sql.insert(Suite.__table__) insert_params = [] # load suites aliases suites_aliases = mirror.ls_suites_with_aliases() if not conf['dry_run'] and 'db' in conf['backends']: session.query(SuiteAlias).delete() for (suite, pkgs) in six.iteritems(mirror.suites): if not conf['dry_run'] and 'db' in conf['backends']: session.query(Suite).filter_by(suite=suite).delete() for pkg_id in pkgs: (pkg, version) = pkg_id db_package = db_storage.lookup_package(session, pkg, version) if not db_package: logging.warn('package %s/%s not found in suite %s, skipping' % (pkg, version, suite)) else: logging.debug('add suite mapping: %s/%s -> %s' % (pkg, version, suite)) params = {'package_id': db_package.id, 'suite': suite} insert_params.append(params) if pkg_id in status.sources: # fill-in incomplete suite information in status status.sources[pkg_id][-1].append(suite) else: # defensive measure to make update_suites() more reusable logging.warn('cannot find %s/%s during suite update' % (pkg, version)) if not conf['dry_run'] and 'db' in conf['backends'] \ and len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if not conf['dry_run'] and 'db' in conf['backends']: session.query(SuiteInfo).filter_by(name=suite).delete() _add_suite(conf, session, suite, aliases=suites_aliases[suite]) if not conf['dry_run'] and 'db' in conf['backends'] \ and insert_params: session.execute(insert_q, insert_params) session.flush() # update sources.txt, now that we know the suite mappings src_list_path = os.path.join(conf['cache_dir'], 'sources.txt') with open(src_list_path + '.new', 'w') as src_list: for pkg_id, src_entry in six.iteritems(status.sources): fields = list(pkg_id) fields.extend(src_entry[:-1]) # all except suites fields.append(string.join(src_entry[-1], ',')) src_list.write(string.join(fields, '\t') + '\n') os.rename(src_list_path + '.new', src_list_path)
def garbageCollects(self): GC_PACKAGE = ('ocaml-curses', '1.0.3-1') PKG_SUITE = 'squeeze' PKG_AREA = 'main' # make fresh copies of sources/ and mirror dir orig_sources = os.path.join(TEST_DATA_DIR, 'sources') orig_mirror = os.path.join(TEST_DATA_DIR, 'mirror') new_sources = os.path.join(self.tmpdir, 'sources2') new_mirror = os.path.join(self.tmpdir, 'mirror2') shutil.copytree(orig_sources, new_sources) shutil.copytree(orig_mirror, new_mirror) self.conf['mirror_dir'] = new_mirror self.conf['sources_dir'] = new_sources pkgdir = os.path.join(new_sources, PKG_AREA, GC_PACKAGE[0][0], GC_PACKAGE[0], GC_PACKAGE[1]) src_index = os.path.join(new_mirror, 'dists', PKG_SUITE, PKG_AREA, 'source', 'Sources.gz') # rm package to be GC'd from mirror (actually, remove everything...) with open(src_index, 'w') as f: f.truncate() # update run that should not GC, due to timestamp os.utime(pkgdir, None) self.conf['expire_days'] = 3 self.do_update() self.assertTrue( os.path.exists(pkgdir), 'young gone package %s/%s disappeared from FS storage' % GC_PACKAGE) self.assertTrue( db_storage.lookup_package(self.session, *GC_PACKAGE), 'young gone package %s/%s disappeared from DB storage' % GC_PACKAGE) # another update run without grace period, package should go self.conf['expire_days'] = 0 self.do_update() self.assertFalse( os.path.exists(pkgdir), 'gone package %s/%s persisted in FS storage' % GC_PACKAGE) self.assertFalse( db_storage.lookup_package(self.session, *GC_PACKAGE), 'gone package %s/%s persisted in DB storage' % GC_PACKAGE)
def add_suite(conf, session, suite, archive): logging.info('add sticky suite %s to the archive...' % suite) db_suite = db_storage.lookup_db_suite(session, suite, sticky=True) if not db_suite: if updater.STAGE_EXTRACT in conf['stages']: updater._add_suite(conf, session, suite, sticky=True) else: logging.warn('sticky suite %s already exist, looking for new packages' % suite) if updater.STAGE_EXTRACT in conf['stages']: for pkg in archive.ls(suite): db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) if db_package: # avoid GC upon removal from a non-sticky suite if not db_package.sticky and not conf['dry_run']: logging.debug('setting sticky bit on %s' % pkg) db_package.sticky = True else: if not conf['single_transaction']: with session.begin(): updater._add_package(pkg, conf, session, sticky=True) else: updater._add_package(pkg, conf, session, sticky=True) session.flush() # to fill Package.id-s if updater.STAGE_SUITES in conf['stages']: suitemap_q = sql.insert(Suite.__table__) suitemaps = [] for (pkg, version) in archive.suites[suite]: db_package = db_storage.lookup_package(session, pkg, version) if not db_package: logging.warn('package %s/%s not found in sticky suite' ' %s, skipping' % (pkg, version, suite)) continue if not db_storage.lookup_suitemapping(session, db_package, suite): suitemaps.append({'package_id': db_package.id, 'suite': suite}) if suitemaps and not conf['dry_run']: session.execute(suitemap_q, suitemaps) _add_stats_for(conf, session, suite) logging.info('sticky suite %s added to the archive.' % suite)
def update_suites(status, conf, session, mirror): """update stage: sweep and recreate suite mappings""" logging.info("update suites mappings...") insert_q = sql.insert(Suite.__table__) insert_params = [] # load suites aliases suites_aliases = mirror.ls_suites_with_aliases() if not conf["dry_run"] and "db" in conf["backends"]: session.query(SuiteAlias).delete() for (suite, pkgs) in mirror.suites.items(): if not conf["dry_run"] and "db" in conf["backends"]: session.query(Suite).filter_by(suite=suite).delete() for pkg_id in pkgs: (pkg, version) = pkg_id db_package = db_storage.lookup_package(session, pkg, version) if not db_package: logging.warn("package %s/%s not found in suite %s, skipping" % (pkg, version, suite)) else: logging.debug("add suite mapping: %s/%s -> %s" % (pkg, version, suite)) params = {"package_id": db_package.id, "suite": suite} insert_params.append(params) if pkg_id in status.sources: # fill-in incomplete suite information in status status.sources[pkg_id][-1].append(suite) else: # defensive measure to make update_suites() more reusable logging.warn("cannot find %s/%s during suite update" % (pkg, version)) if (not conf["dry_run"] and "db" in conf["backends"] and len(insert_params) >= BULK_FLUSH_THRESHOLD): session.execute(insert_q, insert_params) session.flush() insert_params = [] if not conf["dry_run"] and "db" in conf["backends"]: session.query(SuiteInfo).filter_by(name=suite).delete() _add_suite(conf, session, suite, aliases=suites_aliases[suite]) if not conf["dry_run"] and "db" in conf["backends"] and insert_params: session.execute(insert_q, insert_params) session.flush() # update sources.txt, now that we know the suite mappings src_list_path = conf["cache_dir"] / "sources.txt" src_list_path_new = Path(str(src_list_path) + ".new") with src_list_path_new.open("w") as src_list: for pkg_id, src_entry in status.sources.items(): fields = list(pkg_id) fields.extend(str(x) for x in src_entry[:-1]) # all except suites # suites are alphabetically sorted, more determinism fields.append(",".join(sorted(src_entry[-1]))) src_list.write("\t".join(fields) + "\n") src_list_path_new.rename(src_list_path)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) license_file = license_path(pkgdir) license_file_tmp = license_file + '.new' def emit_license(out, session, package, version, relpath, pkgdir): """ Retrieve license of the file. We use `relpath` as we want the path inside the package directory which is used in the d/copyright files paragraphs """ # join path for debian/copyright file as we are already in the sources # directory. synopsis = helper.get_license(session, package, version, relpath, os.path.join(pkgdir, 'debian/copyright')) if synopsis is not None: s = '%s\t%s\n' % (synopsis, relpath.decode('utf-8')) out.write(s) if 'hooks.fs' in conf['backends']: if not os.path.exists(license_file): # run license only if needed with io.open(license_file_tmp, 'w', encoding='utf-8') as out: for (relpath, abspath) in \ fs_storage.walk_pkg_files(pkgdir, file_table): emit_license(out, session, pkg['package'], pkg['version'], relpath, pkgdir) os.rename(license_file_tmp, license_file) if 'hooks.db' in conf['backends']: licenses = parse_license_file(license_file) db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) session.query(FileCopyright) \ .join(File) \ .filter(File.package_id == db_package.id) if not session.query(FileCopyright).join(File)\ .filter(File.package_id == db_package.id).first(): # ASSUMPTION: if *a* license of this package has already been # added to the db in the past, then *all* of them have, as # additions are part of the same transaction for (synopsis, path) in licenses: if file_table: try: file_id = file_table[path] except KeyError: continue else: file_ = session.query(File) \ .filter_by(package_id=db_package.id, path=path) \ .first() if not file_: continue file_id = file_.id license = FileCopyright(file_id, 'debian', synopsis) session.add(license)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) sumsfile = sums_path(pkgdir) sumsfile_tmp = Path(str(sumsfile) + ".new") def emit_checksum(out, relpath, abspath): if abspath.is_symlink() or not abspath.is_file(): # Do not checksum symlinks, if they are not dangling / external we # will checksum their target anyhow. Do not check special files # either; they shouldn't be there per policy, but they might be # (and they are in old releases) return sha256 = hashutil.sha256sum(bytes(abspath)) out.write(sha256.encode("ascii") + b" " + bytes(relpath) + b"\n") if "hooks.fs" in conf["backends"]: if not sumsfile.exists(): # compute checksums only if needed with open(sumsfile_tmp, "wb") as out: for relpath in file_table: abspath = pkgdir / relpath emit_checksum(out, relpath, abspath) os.rename(sumsfile_tmp, sumsfile) if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) insert_q = sql.insert(Checksum.__table__) insert_params = [] if not session.query(Checksum).filter_by( package_id=db_package.id).first(): # ASSUMPTION: if *a* checksum of this package has already # been added to the db in the past, then *all* of them have, # as additions are part of the same transaction for (sha256, relpath) in parse_checksums(sumsfile): params = {"package_id": db_package.id, "sha256": sha256} if file_table: try: file_id = file_table[relpath] params["file_id"] = file_id except KeyError: continue else: file_ = (session.query(File).filter_by( package_id=db_package.id, path=relpath).first()) if not file_: continue params["file_id"] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # source packages shouldn't be empty but... session.execute(insert_q, insert_params) session.flush()
def garbageCollects(self): GC_PACKAGE = ('ocaml-curses', '1.0.3-1') PKG_SUITE = 'squeeze' PKG_AREA = 'main' # make fresh copies of sources/ and mirror dir orig_sources = os.path.join(TEST_DATA_DIR, 'sources') orig_mirror = os.path.join(TEST_DATA_DIR, 'mirror') new_sources = os.path.join(self.tmpdir, 'sources2') new_mirror = os.path.join(self.tmpdir, 'mirror2') shutil.copytree(orig_sources, new_sources) shutil.copytree(orig_mirror, new_mirror) self.conf['mirror_dir'] = new_mirror self.conf['sources_dir'] = new_sources pkgdir = os.path.join(new_sources, PKG_AREA, GC_PACKAGE[0][0], GC_PACKAGE[0], GC_PACKAGE[1]) src_index = os.path.join(new_mirror, 'dists', PKG_SUITE, PKG_AREA, 'source', 'Sources.gz') # rm package to be GC'd from mirror (actually, remove everything...) with open(src_index, 'w') as f: f.truncate() # update run that should not GC, due to timestamp os.utime(pkgdir, None) self.conf['expire_days'] = 3 self.do_update() self.assertTrue(os.path.exists(pkgdir), 'young gone package %s/%s disappeared from FS storage' % GC_PACKAGE) self.assertTrue(db_storage.lookup_package(self.session, *GC_PACKAGE), 'young gone package %s/%s disappeared from DB storage' % GC_PACKAGE) # another update run without grace period, package should go self.conf['expire_days'] = 0 self.do_update() self.assertFalse(os.path.exists(pkgdir), 'gone package %s/%s persisted in FS storage' % GC_PACKAGE) self.assertFalse(db_storage.lookup_package(self.session, *GC_PACKAGE), 'gone package %s/%s persisted in DB storage' % GC_PACKAGE)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) license_file = license_path(pkgdir) license_file_tmp = Path(str(license_file) + ".new") try: c = helper.parse_license(pkgdir / "debian/copyright") except copyright.NotMachineReadableError: return def emit_license(out, package, version, relpath, copyright): """Retrieve license of the file. We use `relpath` as we want the path inside the package directory which is used in the d/copyright files paragraphs """ synopsis = helper.get_license(package, version, relpath, copyright) if synopsis is not None: s = b"%s\t%s\n" % (synopsis.encode("utf8"), relpath) out.write(s) if "hooks.fs" in conf["backends"]: if not license_file.exists(): # run license only if needed with io.open(license_file_tmp, "wb") as out: for relpath in file_table: emit_license(out, pkg["package"], pkg["version"], relpath, c) os.rename(license_file_tmp, license_file) if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) if (not session.query(FileCopyright).join( File, File.id == FileCopyright.file_id).filter( File.package_id == db_package.id).first()): # ASSUMPTION: if *a* license of this package has already been # added to the db in the past, then *all* of them have, as # additions are part of the same transaction licenses = parse_license_file(license_file) for (synopsis, path) in licenses: if file_table: try: file_id = file_table[path] except KeyError: continue else: file_ = (session.query(File).filter_by( package_id=db_package.id, path=path).first()) if not file_: continue file_id = file_.id license = FileCopyright(file_id, "debian", synopsis) session.add(license)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) sumsfile = sums_path(pkgdir) sumsfile_tmp = sumsfile + ".new" def emit_checksum(out, relpath, abspath): if os.path.islink(abspath) or not os.path.isfile(abspath): # Do not checksum symlinks, if they are not dangling / external we # will checksum their target anyhow. Do not check special files # either; they shouldn't be there per policy, but they might be # (and they are in old releases) return sha256 = hashutil.sha256sum(abspath) out.write("%s %s\n" % (sha256, relpath)) if "hooks.fs" in conf["backends"]: if not os.path.exists(sumsfile): # compute checksums only if needed with open(sumsfile_tmp, "w") as out: for (relpath, abspath) in fs_storage.walk_pkg_files(pkgdir, file_table): emit_checksum(out, relpath, abspath) os.rename(sumsfile_tmp, sumsfile) if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) insert_q = sql.insert(Checksum.__table__) insert_params = [] if not session.query(Checksum).filter_by(package_id=db_package.id).first(): # ASSUMPTION: if *a* checksum of this package has already # been added to the db in the past, then *all* of them have, # as additions are part of the same transaction for (sha256, relpath) in parse_checksums(sumsfile): params = {"package_id": db_package.id, "sha256": sha256} if file_table: try: file_id = file_table[relpath] params["file_id"] = file_id except KeyError: continue else: file_ = session.query(File).filter_by(package_id=db_package.id, path=relpath).first() if not file_: continue params["file_id"] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # source packages shouldn't be empty but... session.execute(insert_q, insert_params) session.flush()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug("rm-package %s" % pkg) if "hooks.fs" in conf["backends"]: sumsfile = sums_path(pkgdir) if os.path.exists(sumsfile): os.unlink(sumsfile) if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) session.query(Checksum).filter_by(package_id=db_package.id).delete()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug("rm-package %s" % pkg) if "hooks.fs" in conf["backends"]: ctagsfile = ctags_path(pkgdir) if ctagsfile.exists(): ctagsfile.unlink() if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) session.query(Ctag).filter_by(package_id=db_package.id).delete()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug('rm-package %s' % pkg) if 'hooks.fs' in conf['backends']: metricsfile = metricsfile_path(pkgdir) if os.path.exists(metricsfile): os.unlink(metricsfile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) session.query(Metric) \ .filter_by(package_id=db_package.id) \ .delete()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug('rm-package %s' % pkg) if 'hooks.fs' in conf['backends']: slocfile = slocfile_path(pkgdir) if os.path.exists(slocfile): os.unlink(slocfile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) session.query(SlocCount) \ .filter_by(package_id=db_package.id) \ .delete()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug('rm-package %s' % pkg) if 'hooks.fs' in conf['backends']: licensefile = license_path(pkgdir) if os.path.exists(licensefile): os.unlink(licensefile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) files = (session.query(FileCopyright.id).join(File).filter( File.package_id == db_package.id)).all() for f in files: session.query(FileCopyright) \ .filter(FileCopyright.id == f).delete()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug('rm-package %s' % pkg) if 'hooks.fs' in conf['backends']: licensefile = license_path(pkgdir) if os.path.exists(licensefile): os.unlink(licensefile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) session.execute( 'DELETE FROM copyright c ' 'Using files f ' 'WHERE f.package_id = :package ' 'AND c.file_id = f.id', {"package": db_package.id})
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug('rm-package %s' % pkg) if 'hooks.fs' in conf['backends']: licensefile = license_path(pkgdir) if os.path.exists(licensefile): os.unlink(licensefile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) files = (session.query(FileCopyright.id) .join(File) .filter(File.package_id == db_package.id)).all() for f in files: session.query(FileCopyright) \ .filter(FileCopyright.id == f).delete()
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug('rm-package %s' % pkg) if 'hooks.fs' in conf['backends']: licensefile = license_path(pkgdir) if os.path.exists(licensefile): os.unlink(licensefile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) session.execute( 'DELETE FROM copyright c ' 'Using files f ' 'WHERE f.package_id = :package ' 'AND c.file_id = f.id', {"package": db_package.id} )
def rm_package(session, pkg, pkgdir, file_table): global conf logging.debug("rm-package %s" % pkg) if "hooks.fs" in conf["backends"]: licensefile = license_path(pkgdir) if licensefile.exists(): licensefile.unlink() if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) session.execute( "DELETE FROM copyright c " "Using files f " "WHERE f.package_id = :package " "AND c.file_id = f.id", {"package": db_package.id}, )
def assertLacksStickyPackage(self, package, version): p = db_storage.lookup_package(self.session, package, version) self.assertIsNone(p, msg='missing sticky package %s/%s' % (package, version))
def assertHasPackage(self, package, version): p = db_storage.lookup_package(self.session, package, version) self.assertIsNotNone(p, msg='missing package %s/%s' % (package, version)) return p
def guessAreaForSectionlessPkgs(self): sectionless_pkg = ('tripwire', '1.2-15') archiver.add_suite(self.conf, self.session, 'slink', self.archive) p = db_storage.lookup_package(self.session, *sectionless_pkg) self.assertEqual('non-free', p.area)
def guessAreaForSectionlessPkgs(self): sectionless_pkg = ("tripwire", "1.2-15") archiver.add_suite(self.conf, self.session, "slink", self.archive) p = db_storage.lookup_package(self.session, *sectionless_pkg) self.assertEqual("non-free", p.area)
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug("add-package %s" % pkg) ctagsfile = ctags_path(pkgdir) ctagsfile_tmp = Path(str(ctagsfile) + ".new") if "hooks.fs" in conf["backends"]: if not ctagsfile.exists(): # extract tags only if needed cmd = ["ctags"] + CTAGS_FLAGS + ["-o", ctagsfile_tmp] # ASSUMPTION: will be run under pkgdir as CWD, which is needed to # get relative paths right. The assumption is enforced by the # updater with open(os.devnull, "w") as null: subprocess.check_call(cmd, stderr=null) os.rename(ctagsfile_tmp, ctagsfile) if "hooks.db" in conf["backends"]: db_package = db_storage.lookup_package(session, pkg["package"], pkg["version"]) # poor man's cache for last <relpath, file_id>; # rely on the fact that ctags file are path-sorted curfile = {None: None} insert_q = sql.insert(Ctag.__table__) insert_params = [] if not session.query(Ctag).filter_by(package_id=db_package.id).first(): # ASSUMPTION: if *a* ctag of this package has already been added to # the db in the past, then *all* of them have, as additions are # part of the same transaction for tag in parse_ctags(ctagsfile): params = { "package_id": db_package.id, "tag": tag["tag"], # 'file_id': # will be filled below "line": tag["line"], "kind": tag["kind"], "language": tag["language"], } relpath = tag["path"] if file_table: try: params["file_id"] = file_table[relpath] except KeyError: continue else: try: params["file_id"] = curfile[relpath] except KeyError: file_ = ( session.query(File) .filter_by(package_id=db_package.id, path=relpath) .first() ) if not file_: continue curfile = {relpath: file_.id} params["file_id"] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # might be empty if there are no ctags at all! session.execute(insert_q, insert_params) session.flush()
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) ctagsfile = ctags_path(pkgdir) ctagsfile_tmp = ctagsfile + '.new' if 'hooks.fs' in conf['backends']: if not os.path.exists(ctagsfile): # extract tags only if needed cmd = ['ctags'] + CTAGS_FLAGS + ['-o', ctagsfile_tmp] # ASSUMPTION: will be run under pkgdir as CWD, which is needed to # get relative paths right. The assumption is enforced by the # updater with open(os.devnull, 'w') as null: subprocess.check_call(cmd, stderr=null) os.rename(ctagsfile_tmp, ctagsfile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) # poor man's cache for last <relpath, file_id>; # rely on the fact that ctags file are path-sorted curfile = {None: None} insert_q = sql.insert(Ctag.__table__) insert_params = [] if not session.query(Ctag).filter_by(package_id=db_package.id).first(): # ASSUMPTION: if *a* ctag of this package has already been added to # the db in the past, then *all* of them have, as additions are # part of the same transaction for tag in parse_ctags(ctagsfile): params = ({'package_id': db_package.id, 'tag': tag['tag'], # 'file_id': # will be filled below 'line': tag['line'], 'kind': tag['kind'], 'language': tag['language']}) relpath = tag['path'] if file_table: try: params['file_id'] = file_table[relpath] except KeyError: continue else: try: params['file_id'] = curfile[relpath] except KeyError: file_ = session.query(File) \ .filter_by(package_id=db_package.id, path=relpath) \ .first() if not file_: continue curfile = {relpath: file_.id} params['file_id'] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # might be empty if there are no ctags at all! session.execute(insert_q, insert_params) session.flush()
def add_package(session, pkg, pkgdir, file_table): global conf logging.debug('add-package %s' % pkg) ctagsfile = ctags_path(pkgdir) ctagsfile_tmp = ctagsfile + '.new' if 'hooks.fs' in conf['backends']: if not os.path.exists(ctagsfile): # extract tags only if needed cmd = ['ctags'] + CTAGS_FLAGS + ['-o', ctagsfile_tmp] # ASSUMPTION: will be run under pkgdir as CWD, which is needed to # get relative paths right. The assumption is enforced by the # updater with open(os.devnull, 'w') as null: subprocess.check_call(cmd, stderr=null) os.rename(ctagsfile_tmp, ctagsfile) if 'hooks.db' in conf['backends']: db_package = db_storage.lookup_package(session, pkg['package'], pkg['version']) # poor man's cache for last <relpath, file_id>; # rely on the fact that ctags file are path-sorted curfile = {None: None} insert_q = sql.insert(Ctag.__table__) insert_params = [] if not session.query(Ctag).filter_by(package_id=db_package.id).first(): # ASSUMPTION: if *a* ctag of this package has already been added to # the db in the past, then *all* of them have, as additions are # part of the same transaction for tag in parse_ctags(ctagsfile): params = ({ 'package_id': db_package.id, 'tag': tag['tag'], # 'file_id': # will be filled below 'line': tag['line'], 'kind': tag['kind'], 'language': tag['language'] }) relpath = tag['path'] if file_table: try: params['file_id'] = file_table[relpath] except KeyError: continue else: try: params['file_id'] = curfile[relpath] except KeyError: file_ = session.query(File) \ .filter_by(package_id=db_package.id, path=relpath) \ .first() if not file_: continue curfile = {relpath: file_.id} params['file_id'] = file_.id insert_params.append(params) if len(insert_params) >= BULK_FLUSH_THRESHOLD: session.execute(insert_q, insert_params) session.flush() insert_params = [] if insert_params: # might be empty if there are no ctags at all! session.execute(insert_q, insert_params) session.flush()