def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % path_msg(nicename)) try: ix = git.open_idx(name) except git.GitError as e: add_error('%s: %s' % (path_msg(name), e)) return for count,subname in enumerate(ix.idxnames): sub = git.open_idx(os.path.join(os.path.dirname(name), subname)) for ecount,e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len(ix.idxnames), git.shorten_hash(subname).decode('ascii'), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (path_msg(nicename), git.shorten_hash(subname).decode('ascii'), hexstr(e))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (path_msg(nicename), git.shorten_hash(subname).decode('ascii'), hexstr(e))) prev = None for ecount,e in enumerate(ix): if not (ecount % 1234): qprogress(' Ordering: %d/%d\r' % (ecount, len(ix))) if e and prev and not e >= prev: add_error('%s: ordering error: %s < %s' % (nicename, hexstr(e), hexstr(prev))) prev = e
def test_midx_refreshing(tmpdir): environ[b'BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob(s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + b'/objects/pack') assert len(pi.packs) == 2 pi.refresh() assert len(pi.packs) == 2 assert sorted([os.path.basename(i.name) for i in pi.packs]) == sorted([p1base, p2base]) p1 = git.open_idx(p1name) assert p1.exists(s1sha) p2 = git.open_idx(p2name) assert not p2.exists(s1sha) assert p2.exists(s2sha) subprocess.call([path.exe(), b'midx', b'-f']) pi.refresh() assert len(pi.packs) == 1 pi.refresh(skip_midx=True) assert len(pi.packs) == 2 pi.refresh(skip_midx=False) assert len(pi.packs) == 1
def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % nicename) try: ix = git.open_idx(name) except git.GitError as e: add_error('%s: %s' % (name, e)) return for count,subname in enumerate(ix.idxnames): sub = git.open_idx(os.path.join(os.path.dirname(name), subname)) for ecount,e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len(ix.idxnames), git.shorten_hash(subname), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) prev = None for ecount,e in enumerate(ix): if not (ecount % 1234): qprogress(' Ordering: %d/%d\r' % (ecount, len(ix))) if not e >= prev: add_error('%s: ordering error: %s < %s' % (nicename, str(e).encode('hex'), str(prev).encode('hex'))) prev = e
def do_midx_dir(path): already = {} sizes = {} if opt.force and not opt.auto: midxs = [] # don't use existing midx files else: midxs = glob.glob('%s/*.midx' % path) contents = {} for mname in midxs: m = git.open_idx(mname) contents[mname] = [('%s/%s' % (path, i)) for i in m.idxnames] sizes[mname] = len(m) # sort the biggest+newest midxes first, so that we can eliminate # smaller (or older) redundant ones that come later in the list midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime)) for mname in midxs: any = 0 for iname in contents[mname]: if not already.get(iname): already[iname] = 1 any = 1 if not any: debug1('%r is redundant\n' % mname) unlink(mname) already[mname] = 1 midxs = [k for k in midxs if not already.get(k)] idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)] for iname in idxs: i = git.open_idx(iname) sizes[iname] = len(i) all = [(sizes[n], n) for n in (midxs + idxs)] # FIXME: what are the optimal values? Does this make sense? DESIRED_HWM = opt.force and 1 or 5 DESIRED_LWM = opt.force and 1 or 2 existed = dict((name, 1) for sz, name in all) debug1('midx: %d indexes; want no more than %d.\n' % (len(all), DESIRED_HWM)) if len(all) <= DESIRED_HWM: debug1('midx: nothing to do.\n') while len(all) > DESIRED_HWM: all.sort() part1 = [name for sz, name in all[:len(all) - DESIRED_LWM + 1]] part2 = all[len(all) - DESIRED_LWM + 1:] all = list(do_midx_group(path, part1)) + part2 if len(all) > DESIRED_HWM: debug1('\nStill too many indexes (%d > %d). Merging again.\n' % (len(all), DESIRED_HWM)) if opt['print']: for sz, name in all: if not existed.get(name): print name
def do_midx_dir(path): already = {} sizes = {} if opt.force and not opt.auto: midxs = [] # don't use existing midx files else: midxs = glob.glob('%s/*.midx' % path) contents = {} for mname in midxs: m = git.open_idx(mname) contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames] sizes[mname] = len(m) # sort the biggest+newest midxes first, so that we can eliminate # smaller (or older) redundant ones that come later in the list midxs.sort(key=lambda ix: (-sizes[ix], -os.stat(ix).st_mtime)) for mname in midxs: any = 0 for iname in contents[mname]: if not already.get(iname): already[iname] = 1 any = 1 if not any: debug1('%r is redundant\n' % mname) unlink(mname) already[mname] = 1 midxs = [k for k in midxs if not already.get(k)] idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)] for iname in idxs: i = git.open_idx(iname) sizes[iname] = len(i) all = [(sizes[n],n) for n in (midxs + idxs)] # FIXME: what are the optimal values? Does this make sense? DESIRED_HWM = opt.force and 1 or 5 DESIRED_LWM = opt.force and 1 or 2 existed = dict((name,1) for sz,name in all) debug1('midx: %d indexes; want no more than %d.\n' % (len(all), DESIRED_HWM)) if len(all) <= DESIRED_HWM: debug1('midx: nothing to do.\n') while len(all) > DESIRED_HWM: all.sort() part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]] part2 = all[len(all)-DESIRED_LWM+1:] all = list(do_midx_group(path, part1)) + part2 if len(all) > DESIRED_HWM: debug1('\nStill too many indexes (%d > %d). Merging again.\n' % (len(all), DESIRED_HWM)) if opt['print']: for sz,name in all: if not existed.get(name): print name
def send_index(conn, name): _init_session() assert name.find("/") < 0 assert name.endswith(".idx") idx = git.open_idx(git.repo("objects/pack/%s" % name)) conn.write(struct.pack("!I", len(idx.map))) conn.write(idx.map) conn.ok()
def send_index(conn, name): _init_session() assert(name.find('/') < 0) assert(name.endswith('.idx')) idx = git.open_idx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok()
def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % nicename) try: ix = git.open_idx(name) except git.GitError, e: add_error('%s: %s' % (name, e)) return
def send_index(conn, name): git.check_repo_or_die() assert(name.find('/') < 0) assert(name.endswith('.idx')) idx = git.open_idx(git.repo('objects/pack/%s' % name)) conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok()
def send_index(conn, name): _init_session() assert name.find(b'/') < 0 assert name.endswith(b'.idx') with git.open_idx(git.repo(b'objects/pack/%s' % name)) as idx: conn.write(struct.pack('!I', len(idx.map))) conn.write(idx.map) conn.ok()
def count_objects(dir, verbosity): # For now we'll just use open_idx(), but we could probably be much # more efficient since all we need is a single integer (the last # fanout entry) from each index. object_count = 0 indexes = glob.glob(os.path.join(dir, '*.idx')) for i, idx_name in enumerate(indexes): if verbosity: log('found %d objects (%d/%d %s)\r' % (object_count, i + 1, len(indexes), basename(idx_name))) idx = git.open_idx(idx_name) object_count += len(idx) return object_count
def test_midx_refreshing(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-') os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob(s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + '/objects/pack') WVPASSEQ(len(pi.packs), 2) pi.refresh() WVPASSEQ(len(pi.packs), 2) WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]), sorted([p1base, p2base])) p1 = git.open_idx(p1name) WVPASS(p1.exists(s1sha)) p2 = git.open_idx(p2name) WVFAIL(p2.exists(s1sha)) WVPASS(p2.exists(s2sha)) subprocess.call([bupmain, 'midx', '-f']) pi.refresh() WVPASSEQ(len(pi.packs), 1) pi.refresh(skip_midx=True) WVPASSEQ(len(pi.packs), 2) pi.refresh(skip_midx=False) WVPASSEQ(len(pi.packs), 1) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def test_midx_refreshing(): with no_lingering_errors(): with test_tempdir('bup-tclient-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob( s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + '/objects/pack') WVPASSEQ(len(pi.packs), 2) pi.refresh() WVPASSEQ(len(pi.packs), 2) WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]), sorted([p1base, p2base])) p1 = git.open_idx(p1name) WVPASS(p1.exists(s1sha)) p2 = git.open_idx(p2name) WVFAIL(p2.exists(s1sha)) WVPASS(p2.exists(s2sha)) subprocess.call([bupmain, 'midx', '-f']) pi.refresh() WVPASSEQ(len(pi.packs), 1) pi.refresh(skip_midx=True) WVPASSEQ(len(pi.packs), 2) pi.refresh(skip_midx=False) WVPASSEQ(len(pi.packs), 1)
def main(argv): o = options.Options(optspec) opt, flags, extra = o.parse_bytes(argv[1:]) handle_ctrl_c() opt.find = argv_bytes(opt.find) if opt.find else b'' if not extra: o.fatal('you must provide at least one filename') if len(opt.find) > 40: o.fatal('--find parameter must be <= 40 chars long') else: if len(opt.find) % 2: s = opt.find + b'0' else: s = opt.find try: bin = unhexlify(s) except TypeError: o.fatal('--find parameter is not a valid hex string') sys.stdout.flush() out = byte_stream(sys.stdout) find = opt.find.lower() count = 0 idxfiles = [argv_bytes(x) for x in extra] for name in idxfiles: try: ix = git.open_idx(name) except git.GitError as e: add_error('%r: %s' % (name, e)) continue if len(opt.find) == 40: if ix.exists(bin): out.write(b'%s %s\n' % (name, find)) else: # slow, exhaustive search for _i in ix: i = hexlify(_i) if i.startswith(find): out.write(b'%s %s\n' % (name, i)) qprogress('Searching: %d\r' % count) count += 1 if saved_errors: log('WARNING: %d errors encountered while saving.\n' % len(saved_errors)) sys.exit(1)
def test_midx_refreshing(): with no_lingering_errors(): with test_tempdir('bup-tclient-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = tmpdir git.init_repo(bupdir) c = client.Client(bupdir, create=True) rw = c.new_packwriter() rw.new_blob(s1) p1base = rw.breakpoint() p1name = os.path.join(c.cachedir, p1base) s1sha = rw.new_blob(s1) # should not be written; it's already in p1 s2sha = rw.new_blob(s2) p2base = rw.close() p2name = os.path.join(c.cachedir, p2base) del rw pi = git.PackIdxList(bupdir + '/objects/pack') WVPASSEQ(len(pi.packs), 2) pi.refresh() WVPASSEQ(len(pi.packs), 2) WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]), sorted([p1base, p2base])) p1 = git.open_idx(p1name) WVPASS(p1.exists(s1sha)) p2 = git.open_idx(p2name) WVFAIL(p2.exists(s1sha)) WVPASS(p2.exists(s2sha)) subprocess.call([bupmain, 'midx', '-f']) pi.refresh() WVPASSEQ(len(pi.packs), 1) pi.refresh(skip_midx=True) WVPASSEQ(len(pi.packs), 2) pi.refresh(skip_midx=False) WVPASSEQ(len(pi.packs), 1)
def testpacks(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20)) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def testpacks(): initial_failures = wvfailure_count() tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-') os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0'*20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0'*20)) if wvfailure_count() == initial_failures: subprocess.call(['rm', '-rf', tmpdir])
def testpacks(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0'*20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0'*20))
def testpacks(): with no_lingering_errors(): with test_tempdir('bup-tgit-') as tmpdir: os.environ['BUP_MAIN_EXE'] = bup_exe os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup" git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList(bupdir + '/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20))
def testpacks(): with no_lingering_errors(): with test_tempdir(b'bup-tgit-') as tmpdir: environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup' git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(b'%d' % i)) log('\n') nameprefix = w.close() print(repr(nameprefix)) WVPASS(os.path.exists(nameprefix + b'.pack')) WVPASS(os.path.exists(nameprefix + b'.idx')) r = git.open_idx(nameprefix + b'.idx') print(repr(r.fanout)) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists(b'\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(hexlify(next(pi)), hexlify(h)) WVFAIL(r.find_offset(b'\0'*20)) r = git.PackIdxList(bupdir + b'/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists(b'\0'*20))
def testpacks(): os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp' subprocess.call(['rm', '-rf', bupdir]) git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0' * 20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0' * 20)) r = git.PackIdxList('pybuptest.tmp/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0' * 20))
def testpacks(): os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup' os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp' subprocess.call(['rm','-rf', bupdir]) git.init_repo(bupdir) git.verbose = 1 w = git.PackWriter() w.new_blob(os.urandom(100)) w.new_blob(os.urandom(100)) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.new_blob(str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0'*20)) r = git.PackIdxList('pybuptest.tmp/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0'*20))
def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) if not os.path.exists(bloomfilename): log("bloom: %s: does not exist.\n" % rbloomfilename) return b = bloom.ShaBloom(bloomfilename) if not b.valid(): add_error("bloom: %r is invalid.\n" % rbloomfilename) return base = os.path.basename(idx) if base not in b.idxnames: log("bloom: %s does not contain the idx.\n" % rbloomfilename) return if base == idx: idx = os.path.join(path, idx) log("bloom: bloom file: %s\n" % rbloomfilename) log("bloom: checking %s\n" % ridx) for objsha in git.open_idx(idx): if not b.exists(objsha): add_error("bloom: ERROR: object %s missing" % str(objsha).encode("hex"))
def check_bloom(path, bloomfilename, idx): rbloomfilename = git.repo_rel(bloomfilename) ridx = git.repo_rel(idx) if not os.path.exists(bloomfilename): log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename)) return b = bloom.ShaBloom(bloomfilename) if not b.valid(): add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename)) return base = os.path.basename(idx) if base not in b.idxnames: log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename)) return if base == idx: idx = os.path.join(path, idx) log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) log('bloom: checking %s\n' % path_msg(ridx)) for objsha in git.open_idx(idx): if not b.exists(objsha): add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
def testpacks(): git.init_repo('pybuptest.tmp') git.verbose = 1 now = str(time.time()) # hopefully not in any packs yet w = git.PackWriter() w.write('blob', now) w.write('blob', now) w.abort() w = git.PackWriter() hashes = [] nobj = 1000 for i in range(nobj): hashes.append(w.write('blob', str(i))) log('\n') nameprefix = w.close() print repr(nameprefix) WVPASS(os.path.exists(nameprefix + '.pack')) WVPASS(os.path.exists(nameprefix + '.idx')) r = git.open_idx(nameprefix + '.idx') print repr(r.fanout) for i in range(nobj): WVPASS(r.find_offset(hashes[i]) > 0) WVPASS(r.exists(hashes[99])) WVFAIL(r.exists('\0'*20)) pi = iter(r) for h in sorted(hashes): WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex')) WVFAIL(r.find_offset('\0'*20)) r = git.PackIdxList('pybuptest.tmp/objects/pack') WVPASS(r.exists(hashes[5])) WVPASS(r.exists(hashes[6])) WVFAIL(r.exists('\0'*20))
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert (outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(lambda x, y: cmp(str(y[0][y[2]:y[2] + 20]), str(x[0][x[2]:x[2] + 20]))) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + ': ' or '' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total / SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits)) unlink(outfilename) f = open(outfilename + '.tmp', 'w+b') f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert (f.tell() == 12) f.truncate(12 + 4 * entries + 20 * total + 4 * total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None f.seek(0, os.SEEK_END) f.write('\0'.join(allfilenames)) f.close() os.rename(outfilename + '.tmp', outfilename) # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert (len(p.idxnames) == len(infilenames)) print p.idxnames assert (len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = pi.next() assert (i == pin) assert (p.exists(i)) return total, outfilename
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] midxs = [] try: for name in infilenames: ix = git.open_idx(name) midxs.append(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: str(x[0][x[2]:x[2]+20])) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or '' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'wb') as f: f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) f.flush() fdatasync(f.fileno()) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap # Assume this calls msync() now. f.seek(0, os.SEEK_END) f.write('\0'.join(allfilenames)) finally: for ix in midxs: if isinstance(ix, midx.PackMidx): ix.close() midxs = None inp = None # This is just for testing (if you enable this, don't clear inp above) if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): pin = next(pi) assert(i == pin) assert(p.exists(i)) return total, outfilename
def _do_midx(outdir, outfilename, infilenames, prefixstr, auto=False, force=False): global _first if not outfilename: assert (outdir) sum = hexlify(Sha1(b'\0'.join(infilenames)).digest()) outfilename = b'%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] with ExitStack() as contexts: for name in infilenames: ix = git.open_idx(name) contexts.enter_context(ix) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(reverse=True, key=lambda x: x[0][x[2]:x[2] + 20]) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b'' debug1('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (auto and (total < 1024 and len(infilenames) < 3)) \ or ((auto or force) and len(infilenames) < 2) \ or (force and not total): debug1('midx: nothing to do.\n') return None pages = int(total / SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits)) unlink(outfilename) with atomically_replaced_file(outfilename, 'w+b') as f: f.write(b'MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert (f.tell() == 12) f.truncate(12 + 4 * entries + 20 * total + 4 * total) f.flush() fdatasync(f.fileno()) with mmap_readwrite(f, close=False) as fmap: count = merge_into(fmap, bits, total, inp) f.seek(0, os.SEEK_END) f.write(b'\0'.join(allfilenames)) # This is just for testing (if you enable this, don't clear inp above) # if 0: # p = midx.PackMidx(outfilename) # assert(len(p.idxnames) == len(infilenames)) # log(repr(p.idxnames) + '\n') # assert(len(p) == total) # for pe, e in p, git.idxmerge(inp, final_progress=False): # pin = next(pi) # assert(i == pin) # assert(p.exists(i)) return total, outfilename
mf -= 20 # just a safety margin else: mf -= 6 # minimum safety margin return mf def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % nicename) try: ix = git.open_idx(name) except git.GitError, e: add_error('%s: %s' % (name, e)) return for count, subname in enumerate(ix.idxnames): sub = git.open_idx(os.path.join(os.path.dirname(name), subname)) for ecount, e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len( ix.idxnames), git.shorten_hash(subname), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) prev = None for ecount, e in enumerate(ix): if not (ecount % 1234):
def _do_midx(outdir, outfilename, infilenames, prefixstr): global _first if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = [] for name in infilenames: ix = git.open_idx(name) inp.append(( ix.map, len(ix), ix.sha_ofs, isinstance(ix, midx.PackMidx) and ix.which_ofs or 0, len(allfilenames), )) for n in ix.idxnames: allfilenames.append(os.path.basename(n)) total += len(ix) inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20]))) if not _first: _first = outdir dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or '' log('midx: %s%screating from %d files (%d objects).\n' % (dirprefix, prefixstr, len(infilenames), total)) if (opt.auto and (total < 1024 and len(infilenames) < 3)) \ or ((opt.auto or opt.force) and len(infilenames) < 2) \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) unlink(outfilename) f = open(outfilename + '.tmp', 'w+b') f.write('MIDX') f.write(struct.pack('!II', midx.MIDX_VERSION, bits)) assert(f.tell() == 12) f.truncate(12 + 4*entries + 20*total + 4*total) fmap = mmap_readwrite(f, close=False) count = merge_into(fmap, bits, total, inp) del fmap f.seek(0, git.SEEK_END) f.write('\0'.join(allfilenames)) f.close() os.rename(outfilename + '.tmp', outfilename) # this is just for testing if 0: p = midx.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) for pe, e in p, git.idxmerge(inp, final_progress=False): assert(i == pi.next()) assert(p.exists(i)) return total, outfilename
def send_index(self, name, conn, send_size): data = git.open_idx(git.repo(b'objects/pack/%s' % name, repo_dir=self.repo_dir)).map send_size(len(data)) conn.write(data)
def do_bloom(path, outfilename): global _first b = None if os.path.exists(outfilename) and not opt.force: b = bloom.ShaBloom(outfilename) if not b.valid(): debug1("bloom: Existing invalid bloom found, regenerating.\n") b = None add = [] rest = [] add_count = 0 rest_count = 0 for i, name in enumerate(glob.glob('%s/*.idx' % path)): progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) if b and (ixbase in b.idxnames): rest.append(name) rest_count += len(ix) else: add.append(name) add_count += len(ix) total = add_count + rest_count if not add: debug1("bloom: nothing to do.\n") return if b: if len(b) != rest_count: debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) if not b: # Need all idxs to build from scratch add += rest add_count += rest_count del rest del rest_count msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path) + ': ' or '' progress('bloom: %s%s %d file%s (%d object%s).\n' % (dirprefix, msg, len(add), len(add) != 1 and 's' or '', add_count, add_count != 1 and 's' or '')) tfname = None if b is None: tfname = os.path.join(path, 'bup.tmp.bloom') tf = open(tfname, 'w+') b = bloom.create(tfname, f=tf, expected=add_count, k=opt.k) count = 0 icount = 0 for name in add: ix = git.open_idx(name) qprogress('bloom: writing %.2f%% (%d/%d objects)\r' % (icount * 100.0 / add_count, icount, add_count)) b.add_idx(ix) count += 1 icount += len(ix) if tfname: os.rename(tfname, outfilename)
def sweep(live_objects, existing_count, cat_pipe, opt): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if opt.verbose and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if opt.verbose: log('removing ' + basename(p) + '\n') os.unlink(p) ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=opt.compress, run_midx=False, on_pack_finish=remove_stale_files) # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')): if opt.verbose: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) idx = git.open_idx(idx_name) idx_live_count = 0 for i in xrange(0, len(idx)): sha = idx.shatable[i * 20:(i + 1) * 20] if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if opt.verbose: log('deleting %s\n' % git.repo_rel(basename(idx_name))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - opt.threshold) / 100.0): if opt.verbose: log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), live_frac * 100)) continue if opt.verbose: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for i in xrange(0, len(idx)): sha = idx.shatable[i * 20:(i + 1) * 20] if live_objects.exists(sha): item_it = cat_pipe.get(sha.encode('hex')) type = item_it.next() writer.write(sha, type, ''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') if opt.verbose: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo('objects/pack') assert (not os.path.exists(os.path.join(pack_dir, 'bup.bloom'))) assert (not glob.glob(os.path.join(pack_dir, '*.midx'))) # try/catch should call writer.abort()? # This will finally run midx. writer.close() # Can only change refs (if needed) after this. remove_stale_files(None) # In case we didn't write to the writer. if opt.verbose: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir)) / float(existing_count) * 100))
def _do_midx(outdir, outfilename, infilenames, prefixstr): if not outfilename: assert(outdir) sum = Sha1('\0'.join(infilenames)).hexdigest() outfilename = '%s/midx-%s.midx' % (outdir, sum) inp = [] total = 0 allfilenames = {} for name in infilenames: ix = git.open_idx(name) for n in ix.idxnames: allfilenames[n] = 1 inp.append(ix) total += len(ix) log('midx: %screating from %d files (%d objects).\n' % (prefixstr, len(infilenames), total)) if (not opt.force and (total < 1024 and len(infilenames) < 3)) \ or len(infilenames) < 2 \ or (opt.force and not total): debug1('midx: nothing to do.\n') return pages = int(total/SHA_PER_PAGE) or 1 bits = int(math.ceil(math.log(pages, 2))) entries = 2**bits debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits)) table = [0]*entries try: os.unlink(outfilename) except OSError: pass f = open(outfilename + '.tmp', 'w+') f.write('MIDX\0\0\0\2') f.write(struct.pack('!I', bits)) assert(f.tell() == 12) f.write('\0'*4*entries) for e in merge(inp, bits, table): f.write(e) f.write('\0'.join(os.path.basename(p) for p in allfilenames.keys())) f.seek(12) f.write(struct.pack('!%dI' % entries, *table)) f.close() os.rename(outfilename + '.tmp', outfilename) # this is just for testing if 0: p = git.PackMidx(outfilename) assert(len(p.idxnames) == len(infilenames)) print p.idxnames assert(len(p) == total) pi = iter(p) for i in merge(inp, total, bits, table): assert(i == pi.next()) assert(p.exists(i)) return total,outfilename
mf -= 20 # just a safety margin else: mf -= 6 # minimum safety margin return mf def check_midx(name): nicename = git.repo_rel(name) log('Checking %s.\n' % nicename) try: ix = git.open_idx(name) except git.GitError, e: add_error('%s: %s' % (name, e)) return for count,subname in enumerate(ix.idxnames): sub = git.open_idx(os.path.join(os.path.dirname(name), subname)) for ecount,e in enumerate(sub): if not (ecount % 1234): qprogress(' %d/%d: %s %d/%d\r' % (count, len(ix.idxnames), git.shorten_hash(subname), ecount, len(sub))) if not sub.exists(e): add_error("%s: %s: %s missing from idx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) if not ix.exists(e): add_error("%s: %s: %s missing from midx" % (nicename, git.shorten_hash(subname), str(e).encode('hex'))) prev = None for ecount,e in enumerate(ix):
def sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + basename(new_pack_prefix) + '\n') for p in ns.stale_files: if new_pack_prefix and p.startswith(new_pack_prefix): continue # Don't remove the new pack file if verbosity: log('removing ' + basename(p) + '\n') os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=compression, run_midx=False, on_pack_finish=remove_stale_files) # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')): if verbosity: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) idx = git.open_idx(idx_name) idx_live_count = 0 for i in xrange(0, len(idx)): sha = idx.shatable[i * 20 : (i + 1) * 20] if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if verbosity: log('deleting %s\n' % git.repo_rel(basename(idx_name))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - threshold) / 100.0): if verbosity: log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)), live_frac * 100)) continue if verbosity: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for i in xrange(0, len(idx)): sha = idx.shatable[i * 20 : (i + 1) * 20] if live_objects.exists(sha): item_it = cat_pipe.get(sha.encode('hex')) type = item_it.next() writer.just_write(sha, type, ''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + 'pack') if verbosity: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo('objects/pack') assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom'))) assert(not glob.glob(os.path.join(pack_dir, '*.midx'))) # try/catch should call writer.abort()? # This will finally run midx. writer.close() # Can only change refs (if needed) after this. remove_stale_files(None) # In case we didn't write to the writer. if verbosity: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir, verbosity)) / float(existing_count) * 100))
def do_bloom(path, outfilename, k, force): global _first assert k in (None, 4, 5) b = None if os.path.exists(outfilename) and not force: b = bloom.ShaBloom(outfilename) if not b.valid(): debug1("bloom: Existing invalid bloom found, regenerating.\n") b = None add = [] rest = [] add_count = 0 rest_count = 0 for i, name in enumerate(glob.glob(b'%s/*.idx' % path)): progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) if b and (ixbase in b.idxnames): rest.append(name) rest_count += len(ix) else: add.append(name) add_count += len(ix) if not add: debug1("bloom: nothing to do.\n") return if b: if len(b) != rest_count: debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None elif k is not None and k != b.k: debug1("bloom: new k %d != existing k %d, regenerating\n" % (k, b.k)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) if not b: # Need all idxs to build from scratch add += rest add_count += rest_count del rest del rest_count msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b'' progress('bloom: %s%s %d file%s (%d object%s).\r' % (path_msg(dirprefix), msg, len(add), len(add) != 1 and 's' or '', add_count, add_count != 1 and 's' or '')) tfname = None if b is None: tfname = os.path.join(path, b'bup.tmp.bloom') b = bloom.create(tfname, expected=add_count, k=k) count = 0 icount = 0 for name in add: ix = git.open_idx(name) qprogress('bloom: writing %.2f%% (%d/%d objects)\r' % (icount * 100.0 / add_count, icount, add_count)) b.add_idx(ix) count += 1 icount += len(ix) # Currently, there's an open file object for tfname inside b. # Make sure it's closed before rename. b.close() if tfname: os.rename(tfname, outfilename)
def sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity): # Traverse all the packs, saving the (probably) live data. ns = Nonlocal() ns.stale_files = [] def remove_stale_files(new_pack_prefix): if verbosity and new_pack_prefix: log('created ' + path_msg(basename(new_pack_prefix)) + '\n') for p in ns.stale_files: if new_pack_prefix and p.startswith(new_pack_prefix): continue # Don't remove the new pack file if verbosity: log('removing ' + path_msg(basename(p)) + '\n') os.unlink(p) if ns.stale_files: # So git cat-pipe will close them cat_pipe.restart() ns.stale_files = [] writer = git.PackWriter(objcache_maker=None, compression_level=compression, run_midx=False, on_pack_finish=remove_stale_files) try: # FIXME: sanity check .idx names vs .pack names? collect_count = 0 for idx_name in glob.glob( os.path.join(git.repo(b'objects/pack'), b'*.idx')): if verbosity: qprogress('preserving live data (%d%% complete)\r' % ((float(collect_count) / existing_count) * 100)) with git.open_idx(idx_name) as idx: idx_live_count = 0 for sha in idx: if live_objects.exists(sha): idx_live_count += 1 collect_count += idx_live_count if idx_live_count == 0: if verbosity: log('deleting %s\n' % path_msg(git.repo_rel(basename(idx_name)))) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + b'pack') continue live_frac = idx_live_count / float(len(idx)) if live_frac > ((100 - threshold) / 100.0): if verbosity: log('keeping %s (%d%% live)\n' % (git.repo_rel( basename(idx_name)), live_frac * 100)) continue if verbosity: log('rewriting %s (%.2f%% live)\n' % (basename(idx_name), live_frac * 100)) for sha in idx: if live_objects.exists(sha): item_it = cat_pipe.get(hexlify(sha)) _, typ, _ = next(item_it) writer.just_write(sha, typ, b''.join(item_it)) ns.stale_files.append(idx_name) ns.stale_files.append(idx_name[:-3] + b'pack') if verbosity: progress('preserving live data (%d%% complete)\n' % ((float(collect_count) / existing_count) * 100)) # Nothing should have recreated midx/bloom yet. pack_dir = git.repo(b'objects/pack') assert (not os.path.exists(os.path.join(pack_dir, b'bup.bloom'))) assert (not glob.glob(os.path.join(pack_dir, b'*.midx'))) except BaseException as ex: with pending_raise(ex): writer.abort() # This will finally run midx. # Can only change refs (if needed) after this. writer.close() remove_stale_files(None) # In case we didn't write to the writer. if verbosity: log('discarded %d%% of objects\n' % ((existing_count - count_objects(pack_dir, verbosity)) / float(existing_count) * 100))
else: if len(opt.find) % 2: s = opt.find + '0' else: s = opt.find try: bin = s.decode('hex') except TypeError: o.fatal('--find parameter is not a valid hex string') find = opt.find.lower() count = 0 for name in extra: try: ix = git.open_idx(name) except git.GitError as e: add_error('%s: %s' % (name, e)) continue if len(opt.find) == 40: if ix.exists(bin): print name, find else: # slow, exhaustive search for _i in ix: i = str(_i).encode('hex') if i.startswith(find): print name, i qprogress('Searching: %d\r' % count) count += 1
def do_bloom(path, outfilename): global _first b = None if os.path.exists(outfilename) and not opt.force: b = bloom.ShaBloom(outfilename) if not b.valid(): debug1("bloom: Existing invalid bloom found, regenerating.\n") b = None add = [] rest = [] add_count = 0 rest_count = 0 for i,name in enumerate(glob.glob('%s/*.idx' % path)): progress('bloom: counting: %d\r' % i) ix = git.open_idx(name) ixbase = os.path.basename(name) if b and (ixbase in b.idxnames): rest.append(name) rest_count += len(ix) else: add.append(name) add_count += len(ix) total = add_count + rest_count if not add: debug1("bloom: nothing to do.\n") return if b: if len(b) != rest_count: debug1("bloom: size %d != idx total %d, regenerating\n" % (len(b), rest_count)) b = None elif (b.bits < bloom.MAX_BLOOM_BITS and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): debug1("bloom: regenerating: adding %d entries gives " "%.2f%% false positives.\n" % (add_count, b.pfalse_positive(add_count))) b = None else: b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) if not b: # Need all idxs to build from scratch add += rest add_count += rest_count del rest del rest_count msg = b is None and 'creating from' or 'adding' if not _first: _first = path dirprefix = (_first != path) and git.repo_rel(path)+': ' or '' progress('bloom: %s%s %d file%s (%d object%s).\n' % (dirprefix, msg, len(add), len(add)!=1 and 's' or '', add_count, add_count!=1 and 's' or '')) tfname = None if b is None: tfname = os.path.join(path, 'bup.tmp.bloom') b = bloom.create(tfname, expected=add_count, k=opt.k) count = 0 icount = 0 for name in add: ix = git.open_idx(name) qprogress('bloom: writing %.2f%% (%d/%d objects)\r' % (icount*100.0/add_count, icount, add_count)) b.add_idx(ix) count += 1 icount += len(ix) # Currently, there's an open file object for tfname inside b. # Make sure it's closed before rename. b.close() if tfname: os.rename(tfname, outfilename)