示例#1
0
文件: midx.py 项目: bup/bup
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % path_msg(nicename))
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (path_msg(name), e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname).decode('ascii'),
                             ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (path_msg(nicename),
                             git.shorten_hash(subname).decode('ascii'),
                             hexstr(e)))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (path_msg(nicename),
                             git.shorten_hash(subname).decode('ascii'),
                             hexstr(e)))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if e and prev and not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename, hexstr(e), hexstr(prev)))
        prev = e
示例#2
0
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (name, e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname), ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename,
                         str(e).encode('hex'), str(prev).encode('hex')))
        prev = e
示例#3
0
文件: bloom.py 项目: jmberg/bup
def ruin_bloom(bloomfilename):
    rbloomfilename = git.repo_rel(bloomfilename)
    if not os.path.exists(bloomfilename):
        log(path_msg(bloomfilename) + '\n')
        add_error('bloom: %s not found to ruin\n' % path_msg(rbloomfilename))
        return
    b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1)
    b.map[16:16 + 2**b.bits] = b'\0' * 2**b.bits
示例#4
0
def ruin_bloom(bloomfilename):
    rbloomfilename = git.repo_rel(bloomfilename)
    if not os.path.exists(bloomfilename):
        log("%s\n" % bloomfilename)
        add_error("bloom: %s not found to ruin\n" % rbloomfilename)
        return
    b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1)
    b.map[16:16+2**b.bits] = '\0' * 2**b.bits
示例#5
0
def ruin_bloom(bloomfilename):
    rbloomfilename = git.repo_rel(bloomfilename)
    if not os.path.exists(bloomfilename):
        log("%s\n" % bloomfilename)
        add_error("bloom: %s not found to ruin\n" % rbloomfilename)
        return
    b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1)
    b.map[16:16+2**b.bits] = '\0' * 2**b.bits
示例#6
0
文件: midx-cmd.py 项目: huerlisi/bup
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError, e:
        add_error('%s: %s' % (name, e))
        return
示例#7
0
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError, e:
        add_error('%s: %s' % (name, e))
        return
示例#8
0
文件: bloom.py 项目: jmberg/bup
def check_bloom(path, bloomfilename, idx):
    rbloomfilename = git.repo_rel(bloomfilename)
    ridx = git.repo_rel(idx)
    if not os.path.exists(bloomfilename):
        log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename))
        return
    b = bloom.ShaBloom(bloomfilename)
    if not b.valid():
        add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
        return
    base = os.path.basename(idx)
    if base not in b.idxnames:
        log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
        return
    if base == idx:
        idx = os.path.join(path, idx)
    log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
    log('bloom:   checking %s\n' % path_msg(ridx))
    for objsha in git.open_idx(idx):
        if not b.exists(objsha):
            add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
示例#9
0
def check_bloom(path, bloomfilename, idx):
    rbloomfilename = git.repo_rel(bloomfilename)
    ridx = git.repo_rel(idx)
    if not os.path.exists(bloomfilename):
        log("bloom: %s: does not exist.\n" % rbloomfilename)
        return
    b = bloom.ShaBloom(bloomfilename)
    if not b.valid():
        add_error("bloom: %r is invalid.\n" % rbloomfilename)
        return
    base = os.path.basename(idx)
    if base not in b.idxnames:
        log("bloom: %s does not contain the idx.\n" % rbloomfilename)
        return
    if base == idx:
        idx = os.path.join(path, idx)
    log("bloom: bloom file: %s\n" % rbloomfilename)
    log("bloom:   checking %s\n" % ridx)
    for objsha in git.open_idx(idx):
        if not b.exists(objsha):
            add_error("bloom: ERROR: object %s missing" % str(objsha).encode("hex"))
示例#10
0
文件: midx-cmd.py 项目: huerlisi/bup
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    for name in infilenames:
        ix = git.open_idx(name)
        inp.append((
            ix.map,
            len(ix),
            ix.sha_ofs,
            isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
            len(allfilenames),
        ))
        for n in ix.idxnames:
            allfilenames.append(os.path.basename(n))
        total += len(ix)
    inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))

    if not _first: _first = outdir
    dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
    log('midx: %s%screating from %d files (%d objects).\n'
        % (dirprefix, prefixstr, len(infilenames), total))
    if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
       or ((opt.auto or opt.force) and len(infilenames) < 2) \
       or (opt.force and not total):
        debug1('midx: nothing to do.\n')
        return

    pages = int(total/SHA_PER_PAGE) or 1
    bits = int(math.ceil(math.log(pages, 2)))
    entries = 2**bits
    debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

    unlink(outfilename)
    f = open(outfilename + '.tmp', 'w+b')
    f.write('MIDX')
    f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
    assert(f.tell() == 12)

    f.truncate(12 + 4*entries + 20*total + 4*total)

    fmap = mmap_readwrite(f, close=False)

    count = merge_into(fmap, bits, total, inp)
    del fmap

    f.seek(0, git.SEEK_END)
    f.write('\0'.join(allfilenames))
    f.close()
    os.rename(outfilename + '.tmp', outfilename)

    # this is just for testing
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            assert(i == pi.next())
            assert(p.exists(i))

    return total, outfilename
示例#11
0
文件: gc.py 项目: xx4h/bup
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []
    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if verbosity:
            qprogress('preserving live data (%d%% complete)\r'
                      % ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if verbosity:
                log('deleting %s\n'
                    % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - threshold) / 100.0):
            if verbosity:
                log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
                                                  live_frac * 100))
            continue

        if verbosity:
            log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
                                                  live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.just_write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if verbosity:
        progress('preserving live data (%d%% complete)\n'
                 % ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert(not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n'
            % ((existing_count - count_objects(pack_dir, verbosity))
               / float(existing_count) * 100))
示例#12
0
def _do_midx(outdir,
             outfilename,
             infilenames,
             prefixstr,
             auto=False,
             force=False):
    global _first
    if not outfilename:
        assert (outdir)
        sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
        outfilename = b'%s/midx-%s.midx' % (outdir, sum)

    inp = []
    total = 0
    allfilenames = []
    with ExitStack() as contexts:
        for name in infilenames:
            ix = git.open_idx(name)
            contexts.enter_context(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(reverse=True, key=lambda x: x[0][x[2]:x[2] + 20])

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
        debug1('midx: %s%screating from %d files (%d objects).\n' %
               (dirprefix, prefixstr, len(infilenames), total))
        if (auto and (total < 1024 and len(infilenames) < 3)) \
           or ((auto or force) and len(infilenames) < 2) \
           or (force and not total):
            debug1('midx: nothing to do.\n')
            return None

        pages = int(total / SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'w+b') as f:
            f.write(b'MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert (f.tell() == 12)

            f.truncate(12 + 4 * entries + 20 * total + 4 * total)
            f.flush()
            fdatasync(f.fileno())

            with mmap_readwrite(f, close=False) as fmap:
                count = merge_into(fmap, bits, total, inp)
            f.seek(0, os.SEEK_END)
            f.write(b'\0'.join(allfilenames))

    # This is just for testing (if you enable this, don't clear inp above)
    # if 0:
    #     p = midx.PackMidx(outfilename)
    #     assert(len(p.idxnames) == len(infilenames))
    #     log(repr(p.idxnames) + '\n')
    #     assert(len(p) == total)
    #     for pe, e in p, git.idxmerge(inp, final_progress=False):
    #         pin = next(pi)
    #         assert(i == pin)
    #         assert(p.exists(i))

    return total, outfilename
示例#13
0
文件: gc-cmd.py 项目: tforsberg/bup
def sweep(live_objects, existing_count, cat_pipe, opt):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if opt.verbose and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if opt.verbose:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=opt.compress,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if opt.verbose:
            qprogress('preserving live data (%d%% complete)\r' %
                      ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if opt.verbose:
                log('deleting %s\n' % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - opt.threshold) / 100.0):
            if opt.verbose:
                log('keeping %s (%d%% live)\n' %
                    (git.repo_rel(basename(idx_name)), live_frac * 100))
            continue

        if opt.verbose:
            log('rewriting %s (%.2f%% live)\n' %
                (basename(idx_name), live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if opt.verbose:
        progress('preserving live data (%d%% complete)\n' %
                 ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert (not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert (not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if opt.verbose:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir)) /
             float(existing_count) * 100))
示例#14
0
def do_bloom(path, outfilename):
    global _first
    b = None
    if os.path.exists(outfilename) and not opt.force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i,name in enumerate(glob.glob('%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)
    total = add_count + rest_count

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n"
                   % (len(b), rest_count))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS and
              b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n"
                   % (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b: # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path)+': ' or ''
    progress('bloom: %s%s %d file%s (%d object%s).\n'
        % (dirprefix, msg,
           len(add), len(add)!=1 and 's' or '',
           add_count, add_count!=1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, 'bup.tmp.bloom')
        b = bloom.create(tfname, expected=add_count, k=opt.k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' 
                  % (icount*100.0/add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    # Currently, there's an open file object for tfname inside b.
    # Make sure it's closed before rename.
    b.close()

    if tfname:
        os.rename(tfname, outfilename)
示例#15
0
def do_bloom(path, outfilename):
    global _first
    b = None
    if os.path.exists(outfilename) and not opt.force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i, name in enumerate(glob.glob('%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)
    total = add_count + rest_count

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n" %
                   (len(b), rest_count))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS
              and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n" %
                   (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b:  # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path) + ': ' or ''
    progress('bloom: %s%s %d file%s (%d object%s).\n' %
             (dirprefix, msg, len(add), len(add) != 1 and 's'
              or '', add_count, add_count != 1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, 'bup.tmp.bloom')
        tf = open(tfname, 'w+')
        b = bloom.create(tfname, f=tf, expected=add_count, k=opt.k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' %
                  (icount * 100.0 / add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    if tfname:
        os.rename(tfname, outfilename)
示例#16
0
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + path_msg(basename(new_pack_prefix)) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + path_msg(basename(p)) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)
    try:
        # FIXME: sanity check .idx names vs .pack names?
        collect_count = 0
        for idx_name in glob.glob(
                os.path.join(git.repo(b'objects/pack'), b'*.idx')):
            if verbosity:
                qprogress('preserving live data (%d%% complete)\r' %
                          ((float(collect_count) / existing_count) * 100))
            with git.open_idx(idx_name) as idx:
                idx_live_count = 0
                for sha in idx:
                    if live_objects.exists(sha):
                        idx_live_count += 1

                collect_count += idx_live_count
                if idx_live_count == 0:
                    if verbosity:
                        log('deleting %s\n' %
                            path_msg(git.repo_rel(basename(idx_name))))
                    ns.stale_files.append(idx_name)
                    ns.stale_files.append(idx_name[:-3] + b'pack')
                    continue

                live_frac = idx_live_count / float(len(idx))
                if live_frac > ((100 - threshold) / 100.0):
                    if verbosity:
                        log('keeping %s (%d%% live)\n' % (git.repo_rel(
                            basename(idx_name)), live_frac * 100))
                    continue

                if verbosity:
                    log('rewriting %s (%.2f%% live)\n' %
                        (basename(idx_name), live_frac * 100))
                for sha in idx:
                    if live_objects.exists(sha):
                        item_it = cat_pipe.get(hexlify(sha))
                        _, typ, _ = next(item_it)
                        writer.just_write(sha, typ, b''.join(item_it))

                ns.stale_files.append(idx_name)
                ns.stale_files.append(idx_name[:-3] + b'pack')

        if verbosity:
            progress('preserving live data (%d%% complete)\n' %
                     ((float(collect_count) / existing_count) * 100))

        # Nothing should have recreated midx/bloom yet.
        pack_dir = git.repo(b'objects/pack')
        assert (not os.path.exists(os.path.join(pack_dir, b'bup.bloom')))
        assert (not glob.glob(os.path.join(pack_dir, b'*.midx')))

    except BaseException as ex:
        with pending_raise(ex):
            writer.abort()

    # This will finally run midx.
    # Can only change refs (if needed) after this.
    writer.close()

    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir, verbosity)) /
             float(existing_count) * 100))
示例#17
0
文件: midx-cmd.py 项目: bup/bup
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(reverse=True, key=lambda x: str(x[0][x[2]:x[2]+20]))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write('MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)

            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write('\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = next(pi)
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename
示例#18
0
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert (outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)

    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(lambda x, y: cmp(str(y[0][y[2]:y[2] + 20]),
                                  str(x[0][x[2]:x[2] + 20])))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + ': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n' %
               (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total / SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits))

        unlink(outfilename)
        f = open(outfilename + '.tmp', 'w+b')
        f.write('MIDX')
        f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
        assert (f.tell() == 12)

        f.truncate(12 + 4 * entries + 20 * total + 4 * total)
        f.flush()
        fdatasync(f.fileno())

        fmap = mmap_readwrite(f, close=False)

        count = merge_into(fmap, bits, total, inp)
        del fmap  # Assume this calls msync() now.
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None

    f.seek(0, os.SEEK_END)
    f.write('\0'.join(allfilenames))
    f.close()
    os.rename(outfilename + '.tmp', outfilename)

    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert (len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert (len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = pi.next()
            assert (i == pin)
            assert (p.exists(i))

    return total, outfilename
示例#19
0
文件: bloom.py 项目: jmberg/bup
def do_bloom(path, outfilename, k, force):
    global _first
    assert k in (None, 4, 5)
    b = None
    if os.path.exists(outfilename) and not force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n" %
                   (len(b), rest_count))
            b = None
        elif k is not None and k != b.k:
            debug1("bloom: new k %d != existing k %d, regenerating\n" %
                   (k, b.k))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS[b.k]
              and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n" %
                   (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b:  # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
    progress('bloom: %s%s %d file%s (%d object%s).\r' %
             (path_msg(dirprefix), msg, len(add), len(add) != 1 and 's'
              or '', add_count, add_count != 1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, b'bup.tmp.bloom')
        b = bloom.create(tfname, expected=add_count, k=k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' %
                  (icount * 100.0 / add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    # Currently, there's an open file object for tfname inside b.
    # Make sure it's closed before rename.
    b.close()

    if tfname:
        os.rename(tfname, outfilename)