def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % path_msg(nicename)) try: ix = git.open_idx(name) except git.GitError as e: add_error('%s: %s' % (path_msg(name), e)) return for count,subname in enumerate(ix.idxnames): sub = git.open_idx(os.path.join(os.path.dirname(name), subname)) for ecount,e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len(ix.idxnames), git.shorten_hash(subname).decode('ascii'), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (path_msg(nicename), git.shorten_hash(subname).decode('ascii'), hexstr(e))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (path_msg(nicename), git.shorten_hash(subname).decode('ascii'), hexstr(e))) prev = None for ecount,e in enumerate(ix): if not (ecount % 1234): qprogress(' Ordering: %d/%d\r' % (ecount, len(ix))) if e and prev and not e >= prev: add_error('%s: ordering error: %s < %s' % (nicename, hexstr(e), hexstr(prev))) prev = e
def log_item(name, type, opt, tree=None, commit=None, tag=None): if tag and opt.print_tags: print(hexstr(tag)) if tree and opt.print_trees: print(hexstr(tree)) if commit and opt.print_commits: print(hexstr(commit)) if opt.verbose: last = '' if type in ('root', 'branch', 'save', 'commit', 'tree'): if not name.endswith(b'/'): last = '/' log('%s%s\n' % (path_msg(name), last))
def bup_rm(repo, paths, compression=6, verbosity=None): dead_branches, dead_saves = dead_items(repo, paths) die_if_errors('not proceeding with any removals\n') updated_refs = {} # ref_name -> (original_ref, tip_commit(bin)) for branchname, branchitem in dead_branches.items(): ref = b'refs/heads/' + branchname assert(not ref in updated_refs) updated_refs[ref] = (branchitem.oid, None) if dead_saves: writer = git.PackWriter(compression_level=compression) try: for branch, saves in dead_saves.items(): assert(saves) updated_refs[b'refs/heads/' + branch] = rm_saves(saves, writer) except BaseException as ex: with pending_raise(ex): writer.abort() finally: writer.close() # Only update the refs here, at the very end, so that if something # goes wrong above, the old refs will be undisturbed. Make an attempt # to update each ref. for ref_name, info in updated_refs.items(): orig_ref, new_ref = info try: if not new_ref: git.delete_ref(ref_name, hexlify(orig_ref)) else: git.update_ref(ref_name, new_ref, orig_ref) if verbosity: log('updated %s (%s%s)\n' % (path_msg(ref_name), hexstr(orig_ref) + ' -> ' if orig_ref else '', hexstr(new_ref))) except (git.GitError, ClientError) as ex: if new_ref: add_error('while trying to update %s (%s%s): %s' % (path_msg(ref_name), hexstr(orig_ref) + ' -> ' if orig_ref else '', hexstr(new_ref), ex)) else: add_error('while trying to delete %r (%s): %s' % (ref_name, hexstr(orig_ref), ex))
def bup_gc(threshold=10, compression=1, verbosity=0): cat_pipe = git.cp() existing_count = count_objects(git.repo(b'objects/pack'), verbosity) if verbosity: log('found %d objects\n' % existing_count) if not existing_count: if verbosity: log('nothing to collect\n') else: try: live_objects = find_live_objects(existing_count, cat_pipe, verbosity=verbosity) except MissingObject as ex: log('bup: missing object %r \n' % hexstr(ex.oid)) sys.exit(1) try: # FIXME: just rename midxes and bloom, and restore them at the end if # we didn't change any packs? packdir = git.repo(b'objects/pack') if verbosity: log('clearing midx files\n') midx.clear_midxes(packdir) if verbosity: log('clearing bloom filter\n') bloom.clear_bloom(packdir) if verbosity: log('clearing reflog\n') expirelog_cmd = [ b'git', b'reflog', b'expire', b'--all', b'--expire=all' ] expirelog = subprocess.Popen(expirelog_cmd, env=git._gitenv()) git._git_wait(b' '.join(expirelog_cmd), expirelog) if verbosity: log('removing unreachable data\n') sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity) finally: live_objects.close()
def report_live_item(n, total, ref_name, ref_id, item, verbosity): status = 'scanned %02.2f%%' % (n * 100.0 / total) hex_id = hexstr(ref_id) dirslash = b'/' if item.type == b'tree' else b'' chunk_path = item.chunk_path if chunk_path: if verbosity < 4: return ps = b'/'.join(item.path) chunk_ps = b'/'.join(chunk_path) log('%s %s:%s/%s%s\n' % (status, hex_id, path_msg(ps), path_msg(chunk_ps), path_msg(dirslash))) return # Top commit, for example has none. demangled = git.demangle_name(item.path[-1], item.mode)[0] if item.path \ else None # Don't print mangled paths unless the verbosity is over 3. if demangled: ps = b'/'.join(item.path[:-1] + [demangled]) if verbosity == 1: qprogress('%s %s:%s%s\r' % (status, hex_id, path_msg(ps), path_msg(dirslash))) elif (verbosity > 1 and item.type == b'tree') \ or (verbosity > 2 and item.type == b'blob'): log('%s %s:%s%s\n' % (status, hex_id, path_msg(ps), path_msg(dirslash))) elif verbosity > 3: ps = b'/'.join(item.path) log('%s %s:%s%s\n' % (status, hex_id, path_msg(ps), path_msg(dirslash)))
def receive_objects_v2(conn, junk): global suspended_w _init_session() suggested = set() if suspended_w: w = suspended_w suspended_w = None else: if dumb_server_mode: w = git.PackWriter(objcache_maker=None) else: w = git.PackWriter() while 1: ns = conn.read(4) if not ns: w.abort() raise Exception('object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #debug2('expecting %d bytes\n' % n) if not n: debug1('bup server: received %d object%s.\n' % (w.count, w.count!=1 and "s" or '')) fullpath = w.close(run_midx=not dumb_server_mode) if fullpath: (dir, name) = os.path.split(fullpath) conn.write(b'%s.idx\n' % name) conn.ok() return elif n == 0xffffffff: debug2('bup server: receive-objects suspended.\n') suspended_w = w conn.ok() return shar = conn.read(20) crcr = struct.unpack('!I', conn.read(4))[0] n -= 20 + 4 buf = conn.read(n) # object sizes in bup are reasonably small #debug2('read %d bytes\n' % n) _check(w, n, len(buf), 'object read: expected %d bytes, got %d\n') if not dumb_server_mode: oldpack = w.exists(shar, want_source=True) if oldpack: assert(not oldpack == True) assert(oldpack.endswith(b'.idx')) (dir,name) = os.path.split(oldpack) if not (name in suggested): debug1("bup server: suggesting index %s\n" % git.shorten_hash(name).decode('ascii')) debug1("bup server: because of object %s\n" % hexstr(shar)) conn.write(b'index %s\n' % name) suggested.add(name) continue nw, crc = w._raw_write((buf,), sha=shar) _check(w, crcr, crc, 'object read: expected crc %d, got %d\n')
def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) if not os.path.exists(bloomfilename): log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename)) return b = bloom.ShaBloom(bloomfilename) if not b.valid(): add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename)) return base = os.path.basename(idx) if base not in b.idxnames: log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename)) return if base == idx: idx = os.path.join(path, idx) log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) log('bloom: checking %s\n' % path_msg(ridx)) for objsha in git.open_idx(idx): if not b.exists(objsha): add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
def bup_gc(repo, threshold=10, compression=1, verbosity=0): # Yes - this is a hack. We should use repo.cat() instead of cat_pipe.get(), # but the repo abstraction right now can't properly deal with the fact that # we modify the repository underneath. repodir = os.path.join(repo.packdir(), b'..', b'..') cat_pipe = git.cp(repodir) existing_count = count_objects(repo.packdir(), verbosity) if verbosity: log('found %d objects\n' % existing_count) if not existing_count: if verbosity: log('nothing to collect\n') else: try: live_objects = find_live_objects(repo, existing_count, cat_pipe, verbosity=verbosity) except MissingObject as ex: log('bup: missing object %r \n' % hexstr(ex.oid)) sys.exit(1) try: # FIXME: just rename midxes and bloom, and restore them at the end if # we didn't change any packs? packdir = repo.packdir() if verbosity: log('clearing midx files\n') midx.clear_midxes(packdir) if verbosity: log('clearing bloom filter\n') bloom.clear_bloom(packdir) if verbosity: log('clearing reflog\n') expirelog_cmd = [b'git', b'reflog', b'expire', b'--all', b'--expire=all'] expirelog = subprocess.Popen(expirelog_cmd, env=git._gitenv(repo_dir=repodir)) git._git_wait(b' '.join(expirelog_cmd), expirelog) if verbosity: log('removing unreachable data\n') sweep(repo, live_objects, existing_count, cat_pipe, threshold, compression, verbosity) finally: live_objects.close()
def tree_data_and_bupm(repo, oid): """Return (tree_bytes, bupm_oid) where bupm_oid will be None if the tree has no metadata (i.e. older bup save, or non-bup tree). """ assert len(oid) == 20 it = repo.cat(hexlify(oid)) _, item_t, size = next(it) data = b''.join(it) if item_t == b'commit': commit = parse_commit(data) it = repo.cat(commit.tree) _, item_t, size = next(it) data = b''.join(it) assert item_t == b'tree' elif item_t != b'tree': raise Exception('%s is not a tree or commit' % hexstr(oid)) for _, mangled_name, sub_oid in tree_decode(data): if mangled_name == b'.bupm': return data, sub_oid if mangled_name > b'.bupm': break return data, None
def receive_objects_v2(self, junk): self.init_session() suggested = set() if self.suspended: self.suspended = False else: if self.dumb_server_mode: objcache_maker = lambda: None else: objcache_maker = None # FIXME: this goes together with the direct accesses below self.repo._ensure_packwriter() while 1: ns = self.conn.read(4) if not ns: self.repo.abort_writing() raise Exception( 'object read: expected length header, got EOF\n') n = struct.unpack('!I', ns)[0] #debug2('expecting %d bytes\n' % n) if not n: # FIXME: don't be lazy and count ourselves, or something, at least # don't access self.repo internals debug1('bup server: received %d object%s.\n' % (self.repo._packwriter.count, self.repo._packwriter.count != 1 and "s" or '')) fullpath = self.repo.finish_writing( run_midx=not self.dumb_server_mode) if fullpath: (dir, name) = os.path.split(fullpath) self.conn.write(b'%s.idx\n' % name) self.conn.ok() return elif n == 0xffffffff: debug2('bup server: receive-objects suspended.\n') self.suspended = True self.conn.ok() return shar = self.conn.read(20) crcr = struct.unpack('!I', self.conn.read(4))[0] n -= 20 + 4 buf = self.conn.read(n) # object sizes in bup are reasonably small #debug2('read %d bytes\n' % n) self._check(n, len(buf), 'object read: expected %d bytes, got %d\n') if not self.dumb_server_mode: result = self.repo.exists(shar, want_source=True) if result: oldpack = result.pack assert (oldpack.endswith(b'.idx')) (dir, name) = os.path.split(oldpack) if not (name in suggested): debug1("bup server: suggesting index %s\n" % git.shorten_hash(name).decode('ascii')) debug1("bup server: because of object %s\n" % hexstr(shar)) self.conn.write(b'index %s\n' % name) suggested.add(name) continue # FIXME: figure out the right abstraction for this; or better yet, # make the protocol aware of the object type nw, crc = self.repo._packwriter._raw_write((buf, ), sha=shar) self._check(crcr, crc, 'object read: expected crc %d, got %d\n')