def test_diff_manifest_missing_files(test_data, testname, monkeypatch): """Diff two fileinfo lists having one file's name changed. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime p1 = base_dir / "rnd.dat" p2 = base_dir / "a.dat" p1.rename(p2) os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 2 status, fi_a, fi_b = diff[0] assert status == DiffStatus.MISSING_B assert fi_a.type == 'f' assert fi_a.path == p2 assert fi_b is None status, fi_a, fi_b = diff[1] assert status == DiffStatus.MISSING_A assert fi_b.type == 'f' assert fi_b.path == p1 assert fi_a is None
def setup(self): self.archive = Archive('tests', Hive()) self.events = ExternalTable( 'atomic', 'events', partitioned=True )
def test_diff_manifest_add_file_last(test_data, testname, monkeypatch): """Diff two fileinfo lists, one having an additional file as last item. The implementation of the corresponding command line tool used to have a flaw in this particular case, ref. #55. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime p = base_dir / "zzz.dat" shutil.copy(gettestdata("rnd2.dat"), p) os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] assert status == DiffStatus.MISSING_B assert fi_a.type == 'f' assert fi_a.path == p assert fi_b is None diff = list(filter(non_match, diff_manifest(manifest_ref, fileinfos))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] assert status == DiffStatus.MISSING_A assert fi_b.type == 'f' assert fi_b.path == p assert fi_a is None
def test_diff_manifest_mult(test_data, testname, monkeypatch): """Diff two fileinfo lists having multiple differences. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime mtime_data = os.stat(base_dir / "data").st_mtime pm = base_dir / "data" / "rnd.dat" shutil.copy(gettestdata("rnd2.dat"), pm) p1 = base_dir / "msg.txt" p2 = base_dir / "o.txt" p1.rename(p2) os.utime(base_dir, times=(mtime_base, mtime_base)) os.utime(base_dir / "data", times=(mtime_data, mtime_data)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 3 status, fi_a, fi_b = diff[0] assert status == DiffStatus.CONTENT assert fi_a.type == fi_b.type == 'f' assert fi_a.path == fi_b.path == pm status, fi_a, fi_b = diff[1] assert status == DiffStatus.MISSING_A assert fi_b.type == 'f' assert fi_b.path == p1 assert fi_a is None status, fi_a, fi_b = diff[2] assert status == DiffStatus.MISSING_B assert fi_a.type == 'f' assert fi_a.path == p2 assert fi_b is None
def create(args, config): schedule = get_schedule(config) if schedule is None: return 0 config['schedule'] = schedule.name fileinfos = get_fileinfos(config, schedule) if not isinstance(fileinfos, Sequence): fileinfos = list(fileinfos) if not fileinfos: log.debug("nothing to archive") return 0 log.debug("creating archive %s", config.path) tags = [ "host:%s" % config.host, "policy:%s" % config.policy, "schedule:%s" % schedule.name, "type:%s" % schedule.ClsName, ] if config.user: tags.append("user:%s" % config.user) with tmp_umask(0o277): arch = Archive().create(config.path, fileinfos=fileinfos, tags=tags, dedup=config.dedup) if config.user: chown(arch.path, config.user) return 0
def check(args): if args.stdin: if args.files: raise ArgError("can't accept both, --stdin and the files argument") files = [Path(l.strip()) for l in sys.stdin] else: if args.files: files = args.files else: files = None with Archive().open(args.archive) as archive: if files is None: files = [archive.basedir] metadata = {Path(md) for md in archive.manifest.metadata} FileInfo.Checksums = archive.manifest.checksums file_iter = FileInfo.iterpaths(files, set()) skip = None while True: try: fi = file_iter.send(skip) except StopIteration: break skip = False entry = archive.manifest.find(args.prefix / fi.path) if (args.prefix / fi.path in metadata or entry and _matches(args.prefix, fi, entry)): if args.present and not fi.is_dir(): print(fi.path) else: if not args.present: print(fi.path) if fi.is_dir(): skip = True return 0
def main(): argparser = argparse.ArgumentParser() argparser.add_argument('base', type=Path, nargs='+', help=("basis archives")) argparser.add_argument('input', type=Path, help=("input archive")) argparser.add_argument('output', type=Path, help=("output archive")) args = argparser.parse_args() inp_archive = Archive().open(args.input) fileinfos = inp_archive.manifest algorithm = fileinfos.checksums[0] for p in args.base: with Archive().open(p) as base: fileinfos = filter_fileinfos(base.manifest, fileinfos, algorithm) archive = CopyArchive(inp_archive).create(args.output, fileinfos=fileinfos)
def test_cli_create(test_dir, monkeypatch, testcase): dedup = testcase monkeypatch.chdir(str(test_dir)) archive_path = archive_name(tags=[dedup.value]) basedir = "base" args = ["create", "--deduplicate", dedup.value, archive_path, basedir] callscript("archive-tool.py", args) with Archive().open(Path(archive_path)) as archive: assert str(archive.basedir) == basedir check_manifest(archive.manifest, testdata)
def test_diff_manifest_equal(test_data, testname, monkeypatch): """Diff two fileinfo lists having equal content. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert diff == []
def create(args): if args.compression == 'none': args.compression = '' archive = Archive().create(args.archive, args.compression, args.files, basedir=args.basedir, workdir=args.directory, excludes=args.exclude, dedup=DedupMode(args.deduplicate), tags=args.tag) return 0
def get_fileinfos(config, schedule): fileinfos = Manifest(paths=config.dirs, excludes=config.excludes) try: base_archives = schedule.get_base_archives(get_prev_backups(config)) except NoFullBackupError: raise ArchiveCreateError("No previous full backup found, can not " "create %s archive" % schedule.name) for p in [i.path for i in base_archives]: log.debug("considering %s to create differential archive", p) with Archive().open(p) as base: fileinfos = filter_fileinfos(base.manifest, fileinfos) return fileinfos
def add_archives(self, paths, prune=False): seen = set() for p in paths: p = p.resolve() seen.add(p) if self.find(p): continue with Archive().open(p) as archive: self.append(IndexItem(archive=archive)) if prune: items = [i for i in self if i.path in seen] self.items = items
def fingerprint(file, io=None): archive_instance = None if not io: io = open(file, "r") try: if file.endswith(".jar"): archive_instance = Archive(JarReader(io)).fingerprint() elif file.endswith(".gem"): archive_instance = Archive(GemReader(io)).fingerprint() elif file.endswith(".egg"): archive_instance = Archive(EggReader(io)).fingerprint() if not archive_instance: raise NotImplemtedError("No support for %s files." % file) return archive_instance except Exception, ex: # Blind raising ... raise ex
def create(args): if args.compression is None: try: args.compression = suffix_map["".join(args.archive.suffixes)] except KeyError: # Last ressort default args.compression = 'gz' if args.compression == 'none': args.compression = '' archive = Archive().create(args.archive, args.compression, args.files, basedir=args.basedir, excludes=args.exclude, dedup=DedupMode(args.deduplicate), tags=args.tag) return 0
class TestExternalTable: def setup(self): self.archive = Archive('tests', Hive()) @raises(TemplateNotFound) def test_missing_hql(self): misnamed = self.archive.add(ExternalTable( 'atomic', 'misnamed' )) misnamed.hql() def test_graph(self): events = self.archive.add(ExternalTable( 'atomic', 'events', partitioned=True )) assert_equal('ExternalTable(atomic.events)', events.graph()) def test_partitioning(self): pattern = re.compile('.*RECOVER PARTITIONS', re.DOTALL) events = self.archive.add(ExternalTable( 'atomic', 'events', partitioned=True )) assert_true(pattern.match(events.create_hql())) # Reset Archive self.setup() events = self.archive.add(ExternalTable( 'atomic', 'events' )) assert_is_none(pattern.match(events.create_hql()))
def ls(args): with Archive().open(args.archive) as archive: if args.format == 'ls': ls_ls_format(archive) elif args.format == 'checksum': if not args.checksum: args.checksum = archive.manifest.checksums[0] else: if args.checksum not in archive.manifest.checksums: raise ArchiveReadError("Checksums using '%s' hashes " "not available" % args.checksum) ls_checksum_format(archive, args.checksum) else: raise ValueError("invalid format '%s'" % args.format) return 0
def test_diff_manifest_metadata(test_data, testname, monkeypatch): """Diff two fileinfo lists having one file's metadata modified. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") p = base_dir / "rnd.dat" p.chmod(0o0444) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] assert status == DiffStatus.META assert fi_a.type == fi_b.type == 'f' assert fi_a.path == fi_b.path == p
def test_diff_manifest_modified_file(test_data, testname, monkeypatch): """Diff two fileinfo lists having one file's content modified. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime p = base_dir / "rnd.dat" shutil.copy(gettestdata("rnd2.dat"), p) os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] assert status == DiffStatus.CONTENT assert fi_a.type == fi_b.type == 'f' assert fi_a.path == fi_b.path == p
def test_diff_manifest_symlink_target(test_data, testname, monkeypatch): """Diff two fileinfo lists having one symlink's target modified. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime p = base_dir / "s.dat" p.unlink() p.symlink_to(Path("msg.txt")) os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] assert status == DiffStatus.SYMLNK_TARGET assert fi_a.type == fi_b.type == 'l' assert fi_a.path == fi_b.path == p
def test_verify(test_dir, dep_testcase): dedup = dep_testcase archive_path = test_dir / archive_name(tags=[dedup.value]) with Archive().open(archive_path) as archive: ti_lnk = archive._file.getmember(str(dest_lnk)) ti_cp = archive._file.getmember(str(dest_cp)) if dedup == DedupMode.NEVER: assert ti_lnk.isfile() assert ti_cp.isfile() elif dedup == DedupMode.LINK: assert ti_lnk.islnk() assert ti_lnk.linkname == str(src) assert ti_cp.isfile() elif dedup == DedupMode.CONTENT: assert ti_lnk.islnk() assert ti_lnk.linkname == str(src) assert ti_cp.islnk() assert ti_cp.linkname == str(src) else: assert False, "invalid dedup mode"
def test_diff_manifest_wrong_type(test_data, testname, monkeypatch): """Diff two fileinfo lists with one entry having a wrong type. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime p = base_dir / "rnd.dat" p.unlink() p.symlink_to(Path("data", "rnd.dat")) os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 1 status, fi_a, fi_b = diff[0] assert status == DiffStatus.TYPE assert fi_a.type == 'l' assert fi_b.type == 'f' assert fi_a.path == fi_b.path == p
def info(args): typename = {"f": "file", "d": "directory", "l": "symbolic link"} with Archive().open(args.archive) as archive: fi = archive.manifest.find(args.entry) if not fi: raise ArchiveReadError("%s: not found in archive" % args.entry) infolines = [] infolines.append("Path: %s" % fi.path) infolines.append("Type: %s" % typename[fi.type]) infolines.append("Mode: %s" % stat.filemode(fi.st_mode)) infolines.append("Owner: %s:%s (%d:%d)" % (fi.uname, fi.gname, fi.uid, fi.gid)) mtime = datetime.datetime.fromtimestamp(fi.mtime) infolines.append("Mtime: %s" % mtime.strftime("%Y-%m-%d %H:%M:%S")) if fi.is_file(): infolines.append("Size: %d" % fi.size) if fi.is_symlink(): infolines.append("Target: %s" % fi.target) print(*infolines, sep="\n") return 0
def __init__(self): try: self.resources_path = os.environ['BOI_RESOURCES_PATH'] if self.resources_path[-1:] is "/": pass else: self.resources_path = self.resources_path + "/" except KeyError: errprint("You must set $BOI_RESOUCES_PATH") exit(1) else: dprint(" - Using", self.resources_path, "as resources path") dprint("Loading archives...") self.archives = {} archive_paths = glob.glob(self.resources_path + "*.a") archive_paths.sort() if len(archive_paths) > 0: dprint(" - Found", str(len(archive_paths)), "archives") for archive in archive_paths: a = Archive(archive) self.archives[a.name] = a
def diff(args): archive1 = Archive().open(args.archive1) manifest1 = archive1.manifest archive1.close() archive2 = Archive().open(args.archive2) manifest2 = archive2.manifest archive2.close() algorithm = _common_checksum(manifest1, manifest2) diff = diff_manifest(manifest1, manifest2, algorithm) if args.skip_dir_content: diff = _skip_dir_filter(diff) status = 0 for diff_stat, fi1, fi2 in diff: if diff_stat == DiffStatus.MISSING_A: print("Only in %s: %s" % (args.archive2, fi2.path)) status = max(status, 102) elif diff_stat == DiffStatus.MISSING_B: print("Only in %s: %s" % (args.archive1, fi1.path)) status = max(status, 102) elif diff_stat == DiffStatus.TYPE: print("Entries %s:%s and %s:%s have different type" % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 102) elif diff_stat == DiffStatus.SYMLNK_TARGET: print("Symbol links %s:%s and %s:%s have different target" % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 101) elif diff_stat == DiffStatus.CONTENT: print("Files %s:%s and %s:%s differ" % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 101) elif diff_stat == DiffStatus.META and args.report_meta: print("File system metadata for %s:%s and %s:%s differ" % (args.archive1, fi1.path, args.archive2, fi2.path)) status = max(status, 100) return status
def test_diff_manifest_dircontent(test_data, testname, monkeypatch): """Diff two fileinfo lists with one subdirectory missing. """ monkeypatch.chdir(test_data) with Archive().open(Path("archive.tar")) as archive: manifest_ref = archive.manifest base_dir = Path("base") mtime_base = os.stat(base_dir).st_mtime pd = base_dir / "data" shutil.rmtree(pd) os.utime(base_dir, times=(mtime_base, mtime_base)) fileinfos = get_fileinfos(base_dir) diff = list(filter(non_match, diff_manifest(fileinfos, manifest_ref))) assert len(diff) == 2 status, fi_a, fi_b = diff[0] assert status == DiffStatus.MISSING_A assert fi_b.type == 'd' assert fi_b.path == pd assert fi_a is None status, fi_a, fi_b = diff[1] assert status == DiffStatus.MISSING_A assert fi_b.type == 'f' assert fi_b.path == pd / "rnd.dat" assert fi_a is None
def test_create(test_dir, monkeypatch, testcase): dedup = testcase monkeypatch.chdir(str(test_dir)) archive_path = Path(archive_name(tags=[dedup.value])) paths = [Path("base")] Archive().create(archive_path, '', paths, dedup=dedup)
def find(args): searchfilter = SearchFilter(args) for path in args.archives: with Archive().open(path) as archive: for fi in filter(searchfilter, archive.manifest): print("%s:%s" % (path, fi.path))
def setup(self): self.archive = Archive('tests', Hive())
def test_check_manifest(test_dir, dep_testcase): dedup = dep_testcase archive_path = test_dir / archive_name(tags=[dedup.value]) with Archive().open(archive_path) as archive: check_manifest(archive.manifest, testdata)
class TestArchive: def setup(self): self.archive = Archive('tests', Hive()) self.events = ExternalTable( 'atomic', 'events', partitioned=True ) def test_lookup(self): self.archive.add(self.events) events = self.archive.lookup('events') assert_is_not_none(events) assert_equal('events', events.name) assert_equal(self.events, events) @raises(KeyError) def test_missing_lookup(self): self.archive.lookup('doesnotexist') @raises(RuntimeError) def test_unique_names(self): self.archive.add(self.events) self.archive.add(self.events) @raises(RuntimeError) def test_missing_input(self): self.archive.add(self.events) doesnotexist = View( 'atomic', 'doesnotexist', self.events ) self.archive.add(View( 'atomic', 'missing_input', doesnotexist )) def test_graph(self): self.archive.add(self.events) assert_equal( 'Archive: tests\nExternalTable(atomic.events)', self.archive.graph() ) def test_stats(self): self.archive.add(self.events) stats = self.archive.optimize() assert_equal(1, stats['archive']['databases']) assert_equal(1, stats['archive']['depth']) assert_equal(1, stats['archive']['queries']) assert_equal(1, stats['databases']['references']['atomic']) assert_equal(set(['atomic']), stats['databases']['unique_databases']) assert_equal(1, stats['queries']['references']['events']) assert_equal(set(['events']), stats['queries']['unique_queries'])
def test_dir(tmpdir): setup_testdata(tmpdir, testdata) Archive().create(Path("archive.tar"), "", [Path("base")], workdir=tmpdir) return tmpdir
def diff(args): archive1 = Archive().open(args.archive1) archive1.close() archive2 = Archive().open(args.archive2) archive2.close() algorithm = _common_checksum(archive1.manifest, archive2.manifest) # In principle, we might rely on the fact that the manifest of an # archive is always sorted at creation time. On the other hand, # as we depend on this, we sort them again to be on the safe side. archive1.manifest.sort() archive2.manifest.sort() it1 = iter(archive1.manifest) it2 = iter(archive2.manifest) fi1 = _next(it1) fi2 = _next(it2) status = 0 while True: path1 = _relpath(fi1, archive1.basedir) path2 = _relpath(fi2, archive2.basedir) if path1 is None and path2 is None: break elif path1 is None or path1 > path2: print("Only in %s: %s" % (archive2.path, fi2.path)) if args.skip_dir_content and fi2.is_dir(): fi2 = _next(it2, skip=fi2.path) else: fi2 = _next(it2) status = max(status, 102) elif path2 is None or path2 > path1: print("Only in %s: %s" % (archive1.path, fi1.path)) if args.skip_dir_content and fi1.is_dir(): fi1 = _next(it1, skip=fi1.path) else: fi1 = _next(it1) status = max(status, 102) else: assert path1 == path2 if fi1.type != fi2.type: print("Entries %s:%s and %s:%s have different type" % (archive1.path, fi1.path, archive2.path, fi2.path)) status = max(status, 102) elif fi1.type == "l": if fi1.target != fi2.target: print( "Symbol links %s:%s and %s:%s have different target" % (archive1.path, fi1.path, archive2.path, fi2.path)) status = max(status, 101) elif fi1.type == "f": # Note: we don't need to compare the size, because if # the size differs, it's mostly certain that also the # checksums do. if fi1.checksum[algorithm] != fi2.checksum[algorithm]: print("Files %s:%s and %s:%s differ" % (archive1.path, fi1.path, archive2.path, fi2.path)) status = max(status, 101) elif args.report_meta and ( fi1.uid != fi2.uid or fi1.uname != fi2.uname or fi1.gid != fi2.gid or fi1.gname != fi2.gname or fi1.mode != fi2.mode or int(fi1.mtime) != int(fi2.mtime)): print("File system metadata for %s:%s and %s:%s differ" % (archive1.path, fi1.path, archive2.path, fi2.path)) status = max(status, 100) fi1 = _next(it1) fi2 = _next(it2) return status
def verify(args): with Archive().open(args.archive) as archive: archive.verify() return 0
def test_data(request, test_dir): shutil.rmtree(test_dir / "base", ignore_errors=True) with Archive().open(test_dir / "archive.tar") as archive: archive.extract(test_dir) return test_dir