Exemplo n.º 1
0
Arquivo: midx.py Projeto: bup/bup
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % path_msg(nicename))
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (path_msg(name), e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname).decode('ascii'),
                             ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (path_msg(nicename),
                             git.shorten_hash(subname).decode('ascii'),
                             hexstr(e)))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (path_msg(nicename),
                             git.shorten_hash(subname).decode('ascii'),
                             hexstr(e)))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if e and prev and not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename, hexstr(e), hexstr(prev)))
        prev = e
Exemplo n.º 2
0
def test_midx_refreshing(tmpdir):
    environ[b'BUP_DIR'] = bupdir = tmpdir
    git.init_repo(bupdir)
    c = client.Client(bupdir, create=True)
    rw = c.new_packwriter()
    rw.new_blob(s1)
    p1base = rw.breakpoint()
    p1name = os.path.join(c.cachedir, p1base)
    s1sha = rw.new_blob(s1)  # should not be written; it's already in p1
    s2sha = rw.new_blob(s2)
    p2base = rw.close()
    p2name = os.path.join(c.cachedir, p2base)
    del rw

    pi = git.PackIdxList(bupdir + b'/objects/pack')
    assert len(pi.packs) == 2
    pi.refresh()
    assert len(pi.packs) == 2
    assert sorted([os.path.basename(i.name)
                   for i in pi.packs]) == sorted([p1base, p2base])

    p1 = git.open_idx(p1name)
    assert p1.exists(s1sha)
    p2 = git.open_idx(p2name)
    assert not p2.exists(s1sha)
    assert p2.exists(s2sha)

    subprocess.call([path.exe(), b'midx', b'-f'])
    pi.refresh()
    assert len(pi.packs) == 1
    pi.refresh(skip_midx=True)
    assert len(pi.packs) == 2
    pi.refresh(skip_midx=False)
    assert len(pi.packs) == 1
Exemplo n.º 3
0
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (name, e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname), ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename,
                         str(e).encode('hex'), str(prev).encode('hex')))
        prev = e
Exemplo n.º 4
0
def do_midx_dir(path):
    already = {}
    sizes = {}
    if opt.force and not opt.auto:
        midxs = []  # don't use existing midx files
    else:
        midxs = glob.glob('%s/*.midx' % path)
        contents = {}
        for mname in midxs:
            m = git.open_idx(mname)
            contents[mname] = [('%s/%s' % (path, i)) for i in m.idxnames]
            sizes[mname] = len(m)

        # sort the biggest+newest midxes first, so that we can eliminate
        # smaller (or older) redundant ones that come later in the list
        midxs.sort(key=lambda ix: (-sizes[ix], -xstat.stat(ix).st_mtime))

        for mname in midxs:
            any = 0
            for iname in contents[mname]:
                if not already.get(iname):
                    already[iname] = 1
                    any = 1
            if not any:
                debug1('%r is redundant\n' % mname)
                unlink(mname)
                already[mname] = 1

    midxs = [k for k in midxs if not already.get(k)]
    idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]

    for iname in idxs:
        i = git.open_idx(iname)
        sizes[iname] = len(i)

    all = [(sizes[n], n) for n in (midxs + idxs)]

    # FIXME: what are the optimal values?  Does this make sense?
    DESIRED_HWM = opt.force and 1 or 5
    DESIRED_LWM = opt.force and 1 or 2
    existed = dict((name, 1) for sz, name in all)
    debug1('midx: %d indexes; want no more than %d.\n' %
           (len(all), DESIRED_HWM))
    if len(all) <= DESIRED_HWM:
        debug1('midx: nothing to do.\n')
    while len(all) > DESIRED_HWM:
        all.sort()
        part1 = [name for sz, name in all[:len(all) - DESIRED_LWM + 1]]
        part2 = all[len(all) - DESIRED_LWM + 1:]
        all = list(do_midx_group(path, part1)) + part2
        if len(all) > DESIRED_HWM:
            debug1('\nStill too many indexes (%d > %d).  Merging again.\n' %
                   (len(all), DESIRED_HWM))

    if opt['print']:
        for sz, name in all:
            if not existed.get(name):
                print name
Exemplo n.º 5
0
def do_midx_dir(path):
    already = {}
    sizes = {}
    if opt.force and not opt.auto:
        midxs = []   # don't use existing midx files
    else:
        midxs = glob.glob('%s/*.midx' % path)
        contents = {}
        for mname in midxs:
            m = git.open_idx(mname)
            contents[mname] = [('%s/%s' % (path,i)) for i in m.idxnames]
            sizes[mname] = len(m)
                    
        # sort the biggest+newest midxes first, so that we can eliminate
        # smaller (or older) redundant ones that come later in the list
        midxs.sort(key=lambda ix: (-sizes[ix], -os.stat(ix).st_mtime))
        
        for mname in midxs:
            any = 0
            for iname in contents[mname]:
                if not already.get(iname):
                    already[iname] = 1
                    any = 1
            if not any:
                debug1('%r is redundant\n' % mname)
                unlink(mname)
                already[mname] = 1

    midxs = [k for k in midxs if not already.get(k)]
    idxs = [k for k in glob.glob('%s/*.idx' % path) if not already.get(k)]

    for iname in idxs:
        i = git.open_idx(iname)
        sizes[iname] = len(i)

    all = [(sizes[n],n) for n in (midxs + idxs)]
    
    # FIXME: what are the optimal values?  Does this make sense?
    DESIRED_HWM = opt.force and 1 or 5
    DESIRED_LWM = opt.force and 1 or 2
    existed = dict((name,1) for sz,name in all)
    debug1('midx: %d indexes; want no more than %d.\n' 
           % (len(all), DESIRED_HWM))
    if len(all) <= DESIRED_HWM:
        debug1('midx: nothing to do.\n')
    while len(all) > DESIRED_HWM:
        all.sort()
        part1 = [name for sz,name in all[:len(all)-DESIRED_LWM+1]]
        part2 = all[len(all)-DESIRED_LWM+1:]
        all = list(do_midx_group(path, part1)) + part2
        if len(all) > DESIRED_HWM:
            debug1('\nStill too many indexes (%d > %d).  Merging again.\n'
                   % (len(all), DESIRED_HWM))

    if opt['print']:
        for sz,name in all:
            if not existed.get(name):
                print name
Exemplo n.º 6
0
def send_index(conn, name):
    _init_session()
    assert name.find("/") < 0
    assert name.endswith(".idx")
    idx = git.open_idx(git.repo("objects/pack/%s" % name))
    conn.write(struct.pack("!I", len(idx.map)))
    conn.write(idx.map)
    conn.ok()
Exemplo n.º 7
0
def send_index(conn, name):
    _init_session()
    assert(name.find('/') < 0)
    assert(name.endswith('.idx'))
    idx = git.open_idx(git.repo('objects/pack/%s' % name))
    conn.write(struct.pack('!I', len(idx.map)))
    conn.write(idx.map)
    conn.ok()
Exemplo n.º 8
0
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError, e:
        add_error('%s: %s' % (name, e))
        return
Exemplo n.º 9
0
def send_index(conn, name):
    git.check_repo_or_die()
    assert(name.find('/') < 0)
    assert(name.endswith('.idx'))
    idx = git.open_idx(git.repo('objects/pack/%s' % name))
    conn.write(struct.pack('!I', len(idx.map)))
    conn.write(idx.map)
    conn.ok()
Exemplo n.º 10
0
def send_index(conn, name):
    _init_session()
    assert name.find(b'/') < 0
    assert name.endswith(b'.idx')
    with git.open_idx(git.repo(b'objects/pack/%s' % name)) as idx:
        conn.write(struct.pack('!I', len(idx.map)))
        conn.write(idx.map)
    conn.ok()
Exemplo n.º 11
0
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError, e:
        add_error('%s: %s' % (name, e))
        return
Exemplo n.º 12
0
Arquivo: gc.py Projeto: xx4h/bup
def count_objects(dir, verbosity):
    # For now we'll just use open_idx(), but we could probably be much
    # more efficient since all we need is a single integer (the last
    # fanout entry) from each index.
    object_count = 0
    indexes = glob.glob(os.path.join(dir, '*.idx'))
    for i, idx_name in enumerate(indexes):
        if verbosity:
            log('found %d objects (%d/%d %s)\r'
                % (object_count, i + 1, len(indexes), basename(idx_name)))
        idx = git.open_idx(idx_name)
        object_count += len(idx)
    return object_count
Exemplo n.º 13
0
def test_midx_refreshing():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-')
    os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = tmpdir
    git.init_repo(bupdir)
    c = client.Client(bupdir, create=True)
    rw = c.new_packwriter()
    rw.new_blob(s1)
    p1base = rw.breakpoint()
    p1name = os.path.join(c.cachedir, p1base)
    s1sha = rw.new_blob(s1)  # should not be written; it's already in p1
    s2sha = rw.new_blob(s2)
    p2base = rw.close()
    p2name = os.path.join(c.cachedir, p2base)
    del rw

    pi = git.PackIdxList(bupdir + '/objects/pack')
    WVPASSEQ(len(pi.packs), 2)
    pi.refresh()
    WVPASSEQ(len(pi.packs), 2)
    WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]),
             sorted([p1base, p2base]))

    p1 = git.open_idx(p1name)
    WVPASS(p1.exists(s1sha))
    p2 = git.open_idx(p2name)
    WVFAIL(p2.exists(s1sha))
    WVPASS(p2.exists(s2sha))

    subprocess.call([bupmain, 'midx', '-f'])
    pi.refresh()
    WVPASSEQ(len(pi.packs), 1)
    pi.refresh(skip_midx=True)
    WVPASSEQ(len(pi.packs), 2)
    pi.refresh(skip_midx=False)
    WVPASSEQ(len(pi.packs), 1)
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Exemplo n.º 14
0
Arquivo: tclient.py Projeto: 3v/bup
def test_midx_refreshing():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-')
    os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = tmpdir
    git.init_repo(bupdir)
    c = client.Client(bupdir, create=True)
    rw = c.new_packwriter()
    rw.new_blob(s1)
    p1base = rw.breakpoint()
    p1name = os.path.join(c.cachedir, p1base)
    s1sha = rw.new_blob(s1)  # should not be written; it's already in p1
    s2sha = rw.new_blob(s2)
    p2base = rw.close()
    p2name = os.path.join(c.cachedir, p2base)
    del rw

    pi = git.PackIdxList(bupdir + '/objects/pack')
    WVPASSEQ(len(pi.packs), 2)
    pi.refresh()
    WVPASSEQ(len(pi.packs), 2)
    WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]),
             sorted([p1base, p2base]))

    p1 = git.open_idx(p1name)
    WVPASS(p1.exists(s1sha))
    p2 = git.open_idx(p2name)
    WVFAIL(p2.exists(s1sha))
    WVPASS(p2.exists(s2sha))

    subprocess.call([bupmain, 'midx', '-f'])
    pi.refresh()
    WVPASSEQ(len(pi.packs), 1)
    pi.refresh(skip_midx=True)
    WVPASSEQ(len(pi.packs), 2)
    pi.refresh(skip_midx=False)
    WVPASSEQ(len(pi.packs), 1)
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Exemplo n.º 15
0
Arquivo: gc.py Projeto: westes/bup
def count_objects(dir, verbosity):
    # For now we'll just use open_idx(), but we could probably be much
    # more efficient since all we need is a single integer (the last
    # fanout entry) from each index.
    object_count = 0
    indexes = glob.glob(os.path.join(dir, '*.idx'))
    for i, idx_name in enumerate(indexes):
        if verbosity:
            log('found %d objects (%d/%d %s)\r' %
                (object_count, i + 1, len(indexes), basename(idx_name)))
        idx = git.open_idx(idx_name)
        object_count += len(idx)
    return object_count
Exemplo n.º 16
0
Arquivo: tclient.py Projeto: yafey/bup
def test_midx_refreshing():
    with no_lingering_errors():
        with test_tempdir('bup-tclient-') as tmpdir:
            os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
            os.environ['BUP_DIR'] = bupdir = tmpdir
            git.init_repo(bupdir)
            c = client.Client(bupdir, create=True)
            rw = c.new_packwriter()
            rw.new_blob(s1)
            p1base = rw.breakpoint()
            p1name = os.path.join(c.cachedir, p1base)
            s1sha = rw.new_blob(
                s1)  # should not be written; it's already in p1
            s2sha = rw.new_blob(s2)
            p2base = rw.close()
            p2name = os.path.join(c.cachedir, p2base)
            del rw

            pi = git.PackIdxList(bupdir + '/objects/pack')
            WVPASSEQ(len(pi.packs), 2)
            pi.refresh()
            WVPASSEQ(len(pi.packs), 2)
            WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]),
                     sorted([p1base, p2base]))

            p1 = git.open_idx(p1name)
            WVPASS(p1.exists(s1sha))
            p2 = git.open_idx(p2name)
            WVFAIL(p2.exists(s1sha))
            WVPASS(p2.exists(s2sha))

            subprocess.call([bupmain, 'midx', '-f'])
            pi.refresh()
            WVPASSEQ(len(pi.packs), 1)
            pi.refresh(skip_midx=True)
            WVPASSEQ(len(pi.packs), 2)
            pi.refresh(skip_midx=False)
            WVPASSEQ(len(pi.packs), 1)
Exemplo n.º 17
0
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    handle_ctrl_c()

    opt.find = argv_bytes(opt.find) if opt.find else b''

    if not extra:
        o.fatal('you must provide at least one filename')

    if len(opt.find) > 40:
        o.fatal('--find parameter must be <= 40 chars long')
    else:
        if len(opt.find) % 2:
            s = opt.find + b'0'
        else:
            s = opt.find
        try:
            bin = unhexlify(s)
        except TypeError:
            o.fatal('--find parameter is not a valid hex string')

    sys.stdout.flush()
    out = byte_stream(sys.stdout)
    find = opt.find.lower()
    count = 0
    idxfiles = [argv_bytes(x) for x in extra]
    for name in idxfiles:
        try:
            ix = git.open_idx(name)
        except git.GitError as e:
            add_error('%r: %s' % (name, e))
            continue
        if len(opt.find) == 40:
            if ix.exists(bin):
                out.write(b'%s %s\n' % (name, find))
        else:
            # slow, exhaustive search
            for _i in ix:
                i = hexlify(_i)
                if i.startswith(find):
                    out.write(b'%s %s\n' % (name, i))
                qprogress('Searching: %d\r' % count)
                count += 1

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Exemplo n.º 18
0
def test_midx_refreshing():
    with no_lingering_errors():
        with test_tempdir('bup-tclient-') as tmpdir:
            os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
            os.environ['BUP_DIR'] = bupdir = tmpdir
            git.init_repo(bupdir)
            c = client.Client(bupdir, create=True)
            rw = c.new_packwriter()
            rw.new_blob(s1)
            p1base = rw.breakpoint()
            p1name = os.path.join(c.cachedir, p1base)
            s1sha = rw.new_blob(s1)  # should not be written; it's already in p1
            s2sha = rw.new_blob(s2)
            p2base = rw.close()
            p2name = os.path.join(c.cachedir, p2base)
            del rw

            pi = git.PackIdxList(bupdir + '/objects/pack')
            WVPASSEQ(len(pi.packs), 2)
            pi.refresh()
            WVPASSEQ(len(pi.packs), 2)
            WVPASSEQ(sorted([os.path.basename(i.name) for i in pi.packs]),
                     sorted([p1base, p2base]))

            p1 = git.open_idx(p1name)
            WVPASS(p1.exists(s1sha))
            p2 = git.open_idx(p2name)
            WVFAIL(p2.exists(s1sha))
            WVPASS(p2.exists(s2sha))

            subprocess.call([bupmain, 'midx', '-f'])
            pi.refresh()
            WVPASSEQ(len(pi.packs), 1)
            pi.refresh(skip_midx=True)
            WVPASSEQ(len(pi.packs), 2)
            pi.refresh(skip_midx=False)
            WVPASSEQ(len(pi.packs), 1)
Exemplo n.º 19
0
def testpacks():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-')
    os.environ['BUP_MAIN_EXE'] = bup_exe
    os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
    git.init_repo(bupdir)
    git.verbose = 1

    w = git.PackWriter()
    w.new_blob(os.urandom(100))
    w.new_blob(os.urandom(100))
    w.abort()

    w = git.PackWriter()
    hashes = []
    nobj = 1000
    for i in range(nobj):
        hashes.append(w.new_blob(str(i)))
    log('\n')
    nameprefix = w.close()
    print repr(nameprefix)
    WVPASS(os.path.exists(nameprefix + '.pack'))
    WVPASS(os.path.exists(nameprefix + '.idx'))

    r = git.open_idx(nameprefix + '.idx')
    print repr(r.fanout)

    for i in range(nobj):
        WVPASS(r.find_offset(hashes[i]) > 0)
    WVPASS(r.exists(hashes[99]))
    WVFAIL(r.exists('\0' * 20))

    pi = iter(r)
    for h in sorted(hashes):
        WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

    WVFAIL(r.find_offset('\0' * 20))

    r = git.PackIdxList(bupdir + '/objects/pack')
    WVPASS(r.exists(hashes[5]))
    WVPASS(r.exists(hashes[6]))
    WVFAIL(r.exists('\0' * 20))
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Exemplo n.º 20
0
Arquivo: tgit.py Projeto: senseb/bup
def testpacks():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-')
    os.environ['BUP_MAIN_EXE'] = bup_exe
    os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
    git.init_repo(bupdir)
    git.verbose = 1

    w = git.PackWriter()
    w.new_blob(os.urandom(100))
    w.new_blob(os.urandom(100))
    w.abort()

    w = git.PackWriter()
    hashes = []
    nobj = 1000
    for i in range(nobj):
        hashes.append(w.new_blob(str(i)))
    log('\n')
    nameprefix = w.close()
    print repr(nameprefix)
    WVPASS(os.path.exists(nameprefix + '.pack'))
    WVPASS(os.path.exists(nameprefix + '.idx'))

    r = git.open_idx(nameprefix + '.idx')
    print repr(r.fanout)

    for i in range(nobj):
        WVPASS(r.find_offset(hashes[i]) > 0)
    WVPASS(r.exists(hashes[99]))
    WVFAIL(r.exists('\0'*20))

    pi = iter(r)
    for h in sorted(hashes):
        WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

    WVFAIL(r.find_offset('\0'*20))

    r = git.PackIdxList(bupdir + '/objects/pack')
    WVPASS(r.exists(hashes[5]))
    WVPASS(r.exists(hashes[6]))
    WVFAIL(r.exists('\0'*20))
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Exemplo n.º 21
0
def testpacks():
    with no_lingering_errors():
        with test_tempdir('bup-tgit-') as tmpdir:
            os.environ['BUP_MAIN_EXE'] = bup_exe
            os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
            git.init_repo(bupdir)
            git.verbose = 1

            w = git.PackWriter()
            w.new_blob(os.urandom(100))
            w.new_blob(os.urandom(100))
            w.abort()

            w = git.PackWriter()
            hashes = []
            nobj = 1000
            for i in range(nobj):
                hashes.append(w.new_blob(str(i)))
            log('\n')
            nameprefix = w.close()
            print repr(nameprefix)
            WVPASS(os.path.exists(nameprefix + '.pack'))
            WVPASS(os.path.exists(nameprefix + '.idx'))

            r = git.open_idx(nameprefix + '.idx')
            print repr(r.fanout)

            for i in range(nobj):
                WVPASS(r.find_offset(hashes[i]) > 0)
            WVPASS(r.exists(hashes[99]))
            WVFAIL(r.exists('\0'*20))

            pi = iter(r)
            for h in sorted(hashes):
                WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

            WVFAIL(r.find_offset('\0'*20))

            r = git.PackIdxList(bupdir + '/objects/pack')
            WVPASS(r.exists(hashes[5]))
            WVPASS(r.exists(hashes[6]))
            WVFAIL(r.exists('\0'*20))
Exemplo n.º 22
0
Arquivo: tgit.py Projeto: sidiandi/bup
def testpacks():
    with no_lingering_errors():
        with test_tempdir('bup-tgit-') as tmpdir:
            os.environ['BUP_MAIN_EXE'] = bup_exe
            os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
            git.init_repo(bupdir)
            git.verbose = 1

            w = git.PackWriter()
            w.new_blob(os.urandom(100))
            w.new_blob(os.urandom(100))
            w.abort()

            w = git.PackWriter()
            hashes = []
            nobj = 1000
            for i in range(nobj):
                hashes.append(w.new_blob(str(i)))
            log('\n')
            nameprefix = w.close()
            print repr(nameprefix)
            WVPASS(os.path.exists(nameprefix + '.pack'))
            WVPASS(os.path.exists(nameprefix + '.idx'))

            r = git.open_idx(nameprefix + '.idx')
            print repr(r.fanout)

            for i in range(nobj):
                WVPASS(r.find_offset(hashes[i]) > 0)
            WVPASS(r.exists(hashes[99]))
            WVFAIL(r.exists('\0' * 20))

            pi = iter(r)
            for h in sorted(hashes):
                WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

            WVFAIL(r.find_offset('\0' * 20))

            r = git.PackIdxList(bupdir + '/objects/pack')
            WVPASS(r.exists(hashes[5]))
            WVPASS(r.exists(hashes[6]))
            WVFAIL(r.exists('\0' * 20))
Exemplo n.º 23
0
Arquivo: tgit.py Projeto: zzmjohn/bup
def testpacks():
    with no_lingering_errors():
        with test_tempdir(b'bup-tgit-') as tmpdir:
            environ[b'BUP_DIR'] = bupdir = tmpdir + b'/bup'
            git.init_repo(bupdir)
            git.verbose = 1

            w = git.PackWriter()
            w.new_blob(os.urandom(100))
            w.new_blob(os.urandom(100))
            w.abort()

            w = git.PackWriter()
            hashes = []
            nobj = 1000
            for i in range(nobj):
                hashes.append(w.new_blob(b'%d' % i))
            log('\n')
            nameprefix = w.close()
            print(repr(nameprefix))
            WVPASS(os.path.exists(nameprefix + b'.pack'))
            WVPASS(os.path.exists(nameprefix + b'.idx'))

            r = git.open_idx(nameprefix + b'.idx')
            print(repr(r.fanout))

            for i in range(nobj):
                WVPASS(r.find_offset(hashes[i]) > 0)
            WVPASS(r.exists(hashes[99]))
            WVFAIL(r.exists(b'\0'*20))

            pi = iter(r)
            for h in sorted(hashes):
                WVPASSEQ(hexlify(next(pi)), hexlify(h))

            WVFAIL(r.find_offset(b'\0'*20))

            r = git.PackIdxList(bupdir + b'/objects/pack')
            WVPASS(r.exists(hashes[5]))
            WVPASS(r.exists(hashes[6]))
            WVFAIL(r.exists(b'\0'*20))
Exemplo n.º 24
0
def testpacks():
    os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp'
    subprocess.call(['rm', '-rf', bupdir])
    git.init_repo(bupdir)
    git.verbose = 1

    w = git.PackWriter()
    w.new_blob(os.urandom(100))
    w.new_blob(os.urandom(100))
    w.abort()

    w = git.PackWriter()
    hashes = []
    nobj = 1000
    for i in range(nobj):
        hashes.append(w.new_blob(str(i)))
    log('\n')
    nameprefix = w.close()
    print repr(nameprefix)
    WVPASS(os.path.exists(nameprefix + '.pack'))
    WVPASS(os.path.exists(nameprefix + '.idx'))

    r = git.open_idx(nameprefix + '.idx')
    print repr(r.fanout)

    for i in range(nobj):
        WVPASS(r.find_offset(hashes[i]) > 0)
    WVPASS(r.exists(hashes[99]))
    WVFAIL(r.exists('\0' * 20))

    pi = iter(r)
    for h in sorted(hashes):
        WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

    WVFAIL(r.find_offset('\0' * 20))

    r = git.PackIdxList('pybuptest.tmp/objects/pack')
    WVPASS(r.exists(hashes[5]))
    WVPASS(r.exists(hashes[6]))
    WVFAIL(r.exists('\0' * 20))
Exemplo n.º 25
0
Arquivo: tgit.py Projeto: Kelimion/bup
def testpacks():
    os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp'
    subprocess.call(['rm','-rf', bupdir])
    git.init_repo(bupdir)
    git.verbose = 1

    w = git.PackWriter()
    w.new_blob(os.urandom(100))
    w.new_blob(os.urandom(100))
    w.abort()
    
    w = git.PackWriter()
    hashes = []
    nobj = 1000
    for i in range(nobj):
        hashes.append(w.new_blob(str(i)))
    log('\n')
    nameprefix = w.close()
    print repr(nameprefix)
    WVPASS(os.path.exists(nameprefix + '.pack'))
    WVPASS(os.path.exists(nameprefix + '.idx'))

    r = git.open_idx(nameprefix + '.idx')
    print repr(r.fanout)

    for i in range(nobj):
        WVPASS(r.find_offset(hashes[i]) > 0)
    WVPASS(r.exists(hashes[99]))
    WVFAIL(r.exists('\0'*20))

    pi = iter(r)
    for h in sorted(hashes):
        WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

    WVFAIL(r.find_offset('\0'*20))

    r = git.PackIdxList('pybuptest.tmp/objects/pack')
    WVPASS(r.exists(hashes[5]))
    WVPASS(r.exists(hashes[6]))
    WVFAIL(r.exists('\0'*20))
Exemplo n.º 26
0
def check_bloom(path, bloomfilename, idx):
    rbloomfilename = git.repo_rel(bloomfilename)
    ridx = git.repo_rel(idx)
    if not os.path.exists(bloomfilename):
        log("bloom: %s: does not exist.\n" % rbloomfilename)
        return
    b = bloom.ShaBloom(bloomfilename)
    if not b.valid():
        add_error("bloom: %r is invalid.\n" % rbloomfilename)
        return
    base = os.path.basename(idx)
    if base not in b.idxnames:
        log("bloom: %s does not contain the idx.\n" % rbloomfilename)
        return
    if base == idx:
        idx = os.path.join(path, idx)
    log("bloom: bloom file: %s\n" % rbloomfilename)
    log("bloom:   checking %s\n" % ridx)
    for objsha in git.open_idx(idx):
        if not b.exists(objsha):
            add_error("bloom: ERROR: object %s missing" % str(objsha).encode("hex"))
Exemplo n.º 27
0
Arquivo: bloom.py Projeto: jmberg/bup
def check_bloom(path, bloomfilename, idx):
    rbloomfilename = git.repo_rel(bloomfilename)
    ridx = git.repo_rel(idx)
    if not os.path.exists(bloomfilename):
        log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename))
        return
    b = bloom.ShaBloom(bloomfilename)
    if not b.valid():
        add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename))
        return
    base = os.path.basename(idx)
    if base not in b.idxnames:
        log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename))
        return
    if base == idx:
        idx = os.path.join(path, idx)
    log('bloom: bloom file: %s\n' % path_msg(rbloomfilename))
    log('bloom:   checking %s\n' % path_msg(ridx))
    for objsha in git.open_idx(idx):
        if not b.exists(objsha):
            add_error('bloom: ERROR: object %s missing' % hexstr(objsha))
Exemplo n.º 28
0
Arquivo: tgit.py Projeto: leto/bup
def testpacks():
    git.init_repo('pybuptest.tmp')
    git.verbose = 1

    now = str(time.time())  # hopefully not in any packs yet
    w = git.PackWriter()
    w.write('blob', now)
    w.write('blob', now)
    w.abort()
    
    w = git.PackWriter()
    hashes = []
    nobj = 1000
    for i in range(nobj):
        hashes.append(w.write('blob', str(i)))
    log('\n')
    nameprefix = w.close()
    print repr(nameprefix)
    WVPASS(os.path.exists(nameprefix + '.pack'))
    WVPASS(os.path.exists(nameprefix + '.idx'))

    r = git.open_idx(nameprefix + '.idx')
    print repr(r.fanout)

    for i in range(nobj):
        WVPASS(r.find_offset(hashes[i]) > 0)
    WVPASS(r.exists(hashes[99]))
    WVFAIL(r.exists('\0'*20))

    pi = iter(r)
    for h in sorted(hashes):
        WVPASSEQ(str(pi.next()).encode('hex'), h.encode('hex'))

    WVFAIL(r.find_offset('\0'*20))

    r = git.PackIdxList('pybuptest.tmp/objects/pack')
    WVPASS(r.exists(hashes[5]))
    WVPASS(r.exists(hashes[6]))
    WVFAIL(r.exists('\0'*20))
Exemplo n.º 29
0
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert (outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)

    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(lambda x, y: cmp(str(y[0][y[2]:y[2] + 20]),
                                  str(x[0][x[2]:x[2] + 20])))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + ': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n' %
               (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total / SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits))

        unlink(outfilename)
        f = open(outfilename + '.tmp', 'w+b')
        f.write('MIDX')
        f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
        assert (f.tell() == 12)

        f.truncate(12 + 4 * entries + 20 * total + 4 * total)
        f.flush()
        fdatasync(f.fileno())

        fmap = mmap_readwrite(f, close=False)

        count = merge_into(fmap, bits, total, inp)
        del fmap  # Assume this calls msync() now.
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None

    f.seek(0, os.SEEK_END)
    f.write('\0'.join(allfilenames))
    f.close()
    os.rename(outfilename + '.tmp', outfilename)

    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert (len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert (len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = pi.next()
            assert (i == pin)
            assert (p.exists(i))

    return total, outfilename
Exemplo n.º 30
0
Arquivo: midx-cmd.py Projeto: bup/bup
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    midxs = []
    try:
        for name in infilenames:
            ix = git.open_idx(name)
            midxs.append(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(reverse=True, key=lambda x: str(x[0][x[2]:x[2]+20]))

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
        debug1('midx: %s%screating from %d files (%d objects).\n'
               % (dirprefix, prefixstr, len(infilenames), total))
        if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
           or ((opt.auto or opt.force) and len(infilenames) < 2) \
           or (opt.force and not total):
            debug1('midx: nothing to do.\n')
            return

        pages = int(total/SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'wb') as f:
            f.write('MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert(f.tell() == 12)

            f.truncate(12 + 4*entries + 20*total + 4*total)
            f.flush()
            fdatasync(f.fileno())

            fmap = mmap_readwrite(f, close=False)

            count = merge_into(fmap, bits, total, inp)
            del fmap # Assume this calls msync() now.
            f.seek(0, os.SEEK_END)
            f.write('\0'.join(allfilenames))
    finally:
        for ix in midxs:
            if isinstance(ix, midx.PackMidx):
                ix.close()
        midxs = None
        inp = None


    # This is just for testing (if you enable this, don't clear inp above)
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            pin = next(pi)
            assert(i == pin)
            assert(p.exists(i))

    return total, outfilename
Exemplo n.º 31
0
def _do_midx(outdir,
             outfilename,
             infilenames,
             prefixstr,
             auto=False,
             force=False):
    global _first
    if not outfilename:
        assert (outdir)
        sum = hexlify(Sha1(b'\0'.join(infilenames)).digest())
        outfilename = b'%s/midx-%s.midx' % (outdir, sum)

    inp = []
    total = 0
    allfilenames = []
    with ExitStack() as contexts:
        for name in infilenames:
            ix = git.open_idx(name)
            contexts.enter_context(ix)
            inp.append((
                ix.map,
                len(ix),
                ix.sha_ofs,
                isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
                len(allfilenames),
            ))
            for n in ix.idxnames:
                allfilenames.append(os.path.basename(n))
            total += len(ix)
        inp.sort(reverse=True, key=lambda x: x[0][x[2]:x[2] + 20])

        if not _first: _first = outdir
        dirprefix = (_first != outdir) and git.repo_rel(outdir) + b': ' or b''
        debug1('midx: %s%screating from %d files (%d objects).\n' %
               (dirprefix, prefixstr, len(infilenames), total))
        if (auto and (total < 1024 and len(infilenames) < 3)) \
           or ((auto or force) and len(infilenames) < 2) \
           or (force and not total):
            debug1('midx: nothing to do.\n')
            return None

        pages = int(total / SHA_PER_PAGE) or 1
        bits = int(math.ceil(math.log(pages, 2)))
        entries = 2**bits
        debug1('midx: table size: %d (%d bits)\n' % (entries * 4, bits))

        unlink(outfilename)
        with atomically_replaced_file(outfilename, 'w+b') as f:
            f.write(b'MIDX')
            f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
            assert (f.tell() == 12)

            f.truncate(12 + 4 * entries + 20 * total + 4 * total)
            f.flush()
            fdatasync(f.fileno())

            with mmap_readwrite(f, close=False) as fmap:
                count = merge_into(fmap, bits, total, inp)
            f.seek(0, os.SEEK_END)
            f.write(b'\0'.join(allfilenames))

    # This is just for testing (if you enable this, don't clear inp above)
    # if 0:
    #     p = midx.PackMidx(outfilename)
    #     assert(len(p.idxnames) == len(infilenames))
    #     log(repr(p.idxnames) + '\n')
    #     assert(len(p) == total)
    #     for pe, e in p, git.idxmerge(inp, final_progress=False):
    #         pin = next(pi)
    #         assert(i == pin)
    #         assert(p.exists(i))

    return total, outfilename
Exemplo n.º 32
0
        mf -= 20  # just a safety margin
    else:
        mf -= 6  # minimum safety margin
    return mf


def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError, e:
        add_error('%s: %s' % (name, e))
        return
    for count, subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount, e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' % (count, len(
                    ix.idxnames), git.shorten_hash(subname), ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx" %
                          (nicename, git.shorten_hash(subname),
                           str(e).encode('hex')))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx" %
                          (nicename, git.shorten_hash(subname),
                           str(e).encode('hex')))
    prev = None
    for ecount, e in enumerate(ix):
        if not (ecount % 1234):
Exemplo n.º 33
0
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    global _first
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = []
    for name in infilenames:
        ix = git.open_idx(name)
        inp.append((
            ix.map,
            len(ix),
            ix.sha_ofs,
            isinstance(ix, midx.PackMidx) and ix.which_ofs or 0,
            len(allfilenames),
        ))
        for n in ix.idxnames:
            allfilenames.append(os.path.basename(n))
        total += len(ix)
    inp.sort(lambda x,y: cmp(str(y[0][y[2]:y[2]+20]),str(x[0][x[2]:x[2]+20])))

    if not _first: _first = outdir
    dirprefix = (_first != outdir) and git.repo_rel(outdir)+': ' or ''
    log('midx: %s%screating from %d files (%d objects).\n'
        % (dirprefix, prefixstr, len(infilenames), total))
    if (opt.auto and (total < 1024 and len(infilenames) < 3)) \
       or ((opt.auto or opt.force) and len(infilenames) < 2) \
       or (opt.force and not total):
        debug1('midx: nothing to do.\n')
        return

    pages = int(total/SHA_PER_PAGE) or 1
    bits = int(math.ceil(math.log(pages, 2)))
    entries = 2**bits
    debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))

    unlink(outfilename)
    f = open(outfilename + '.tmp', 'w+b')
    f.write('MIDX')
    f.write(struct.pack('!II', midx.MIDX_VERSION, bits))
    assert(f.tell() == 12)

    f.truncate(12 + 4*entries + 20*total + 4*total)

    fmap = mmap_readwrite(f, close=False)

    count = merge_into(fmap, bits, total, inp)
    del fmap

    f.seek(0, git.SEEK_END)
    f.write('\0'.join(allfilenames))
    f.close()
    os.rename(outfilename + '.tmp', outfilename)

    # this is just for testing
    if 0:
        p = midx.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        for pe, e in p, git.idxmerge(inp, final_progress=False):
            assert(i == pi.next())
            assert(p.exists(i))

    return total, outfilename
Exemplo n.º 34
0
 def send_index(self, name, conn, send_size):
     data = git.open_idx(git.repo(b'objects/pack/%s' % name,
                                  repo_dir=self.repo_dir)).map
     send_size(len(data))
     conn.write(data)
Exemplo n.º 35
0
def do_bloom(path, outfilename):
    global _first
    b = None
    if os.path.exists(outfilename) and not opt.force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i, name in enumerate(glob.glob('%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)
    total = add_count + rest_count

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n" %
                   (len(b), rest_count))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS
              and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n" %
                   (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b:  # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path) + ': ' or ''
    progress('bloom: %s%s %d file%s (%d object%s).\n' %
             (dirprefix, msg, len(add), len(add) != 1 and 's'
              or '', add_count, add_count != 1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, 'bup.tmp.bloom')
        tf = open(tfname, 'w+')
        b = bloom.create(tfname, f=tf, expected=add_count, k=opt.k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' %
                  (icount * 100.0 / add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    if tfname:
        os.rename(tfname, outfilename)
Exemplo n.º 36
0
def sweep(live_objects, existing_count, cat_pipe, opt):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if opt.verbose and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if opt.verbose:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=opt.compress,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if opt.verbose:
            qprogress('preserving live data (%d%% complete)\r' %
                      ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if opt.verbose:
                log('deleting %s\n' % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - opt.threshold) / 100.0):
            if opt.verbose:
                log('keeping %s (%d%% live)\n' %
                    (git.repo_rel(basename(idx_name)), live_frac * 100))
            continue

        if opt.verbose:
            log('rewriting %s (%.2f%% live)\n' %
                (basename(idx_name), live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if opt.verbose:
        progress('preserving live data (%d%% complete)\n' %
                 ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert (not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert (not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if opt.verbose:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir)) /
             float(existing_count) * 100))
Exemplo n.º 37
0
Arquivo: midx-cmd.py Projeto: leto/bup
def _do_midx(outdir, outfilename, infilenames, prefixstr):
    if not outfilename:
        assert(outdir)
        sum = Sha1('\0'.join(infilenames)).hexdigest()
        outfilename = '%s/midx-%s.midx' % (outdir, sum)
    
    inp = []
    total = 0
    allfilenames = {}
    for name in infilenames:
        ix = git.open_idx(name)
        for n in ix.idxnames:
            allfilenames[n] = 1
        inp.append(ix)
        total += len(ix)

    log('midx: %screating from %d files (%d objects).\n'
        % (prefixstr, len(infilenames), total))
    if (not opt.force and (total < 1024 and len(infilenames) < 3)) \
       or len(infilenames) < 2 \
       or (opt.force and not total):
        debug1('midx: nothing to do.\n')
        return

    pages = int(total/SHA_PER_PAGE) or 1
    bits = int(math.ceil(math.log(pages, 2)))
    entries = 2**bits
    debug1('midx: table size: %d (%d bits)\n' % (entries*4, bits))
    
    table = [0]*entries

    try:
        os.unlink(outfilename)
    except OSError:
        pass
    f = open(outfilename + '.tmp', 'w+')
    f.write('MIDX\0\0\0\2')
    f.write(struct.pack('!I', bits))
    assert(f.tell() == 12)
    f.write('\0'*4*entries)
    
    for e in merge(inp, bits, table):
        f.write(e)
        
    f.write('\0'.join(os.path.basename(p) for p in allfilenames.keys()))

    f.seek(12)
    f.write(struct.pack('!%dI' % entries, *table))
    f.close()
    os.rename(outfilename + '.tmp', outfilename)

    # this is just for testing
    if 0:
        p = git.PackMidx(outfilename)
        assert(len(p.idxnames) == len(infilenames))
        print p.idxnames
        assert(len(p) == total)
        pi = iter(p)
        for i in merge(inp, total, bits, table):
            assert(i == pi.next())
            assert(p.exists(i))

    return total,outfilename
Exemplo n.º 38
0
        mf -= 20  # just a safety margin
    else:
        mf -= 6   # minimum safety margin
    return mf


def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError, e:
        add_error('%s: %s' % (name, e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname), ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
    prev = None
    for ecount,e in enumerate(ix):
Exemplo n.º 39
0
Arquivo: gc.py Projeto: xx4h/bup
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []
    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if verbosity:
            qprogress('preserving live data (%d%% complete)\r'
                      % ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if verbosity:
                log('deleting %s\n'
                    % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - threshold) / 100.0):
            if verbosity:
                log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
                                                  live_frac * 100))
            continue

        if verbosity:
            log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
                                                  live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.just_write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if verbosity:
        progress('preserving live data (%d%% complete)\n'
                 % ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert(not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n'
            % ((existing_count - count_objects(pack_dir, verbosity))
               / float(existing_count) * 100))
Exemplo n.º 40
0
Arquivo: bloom.py Projeto: jmberg/bup
def do_bloom(path, outfilename, k, force):
    global _first
    assert k in (None, 4, 5)
    b = None
    if os.path.exists(outfilename) and not force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n" %
                   (len(b), rest_count))
            b = None
        elif k is not None and k != b.k:
            debug1("bloom: new k %d != existing k %d, regenerating\n" %
                   (k, b.k))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS[b.k]
              and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n" %
                   (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b:  # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
    progress('bloom: %s%s %d file%s (%d object%s).\r' %
             (path_msg(dirprefix), msg, len(add), len(add) != 1 and 's'
              or '', add_count, add_count != 1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, b'bup.tmp.bloom')
        b = bloom.create(tfname, expected=add_count, k=k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' %
                  (icount * 100.0 / add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    # Currently, there's an open file object for tfname inside b.
    # Make sure it's closed before rename.
    b.close()

    if tfname:
        os.rename(tfname, outfilename)
Exemplo n.º 41
0
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + path_msg(basename(new_pack_prefix)) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + path_msg(basename(p)) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)
    try:
        # FIXME: sanity check .idx names vs .pack names?
        collect_count = 0
        for idx_name in glob.glob(
                os.path.join(git.repo(b'objects/pack'), b'*.idx')):
            if verbosity:
                qprogress('preserving live data (%d%% complete)\r' %
                          ((float(collect_count) / existing_count) * 100))
            with git.open_idx(idx_name) as idx:
                idx_live_count = 0
                for sha in idx:
                    if live_objects.exists(sha):
                        idx_live_count += 1

                collect_count += idx_live_count
                if idx_live_count == 0:
                    if verbosity:
                        log('deleting %s\n' %
                            path_msg(git.repo_rel(basename(idx_name))))
                    ns.stale_files.append(idx_name)
                    ns.stale_files.append(idx_name[:-3] + b'pack')
                    continue

                live_frac = idx_live_count / float(len(idx))
                if live_frac > ((100 - threshold) / 100.0):
                    if verbosity:
                        log('keeping %s (%d%% live)\n' % (git.repo_rel(
                            basename(idx_name)), live_frac * 100))
                    continue

                if verbosity:
                    log('rewriting %s (%.2f%% live)\n' %
                        (basename(idx_name), live_frac * 100))
                for sha in idx:
                    if live_objects.exists(sha):
                        item_it = cat_pipe.get(hexlify(sha))
                        _, typ, _ = next(item_it)
                        writer.just_write(sha, typ, b''.join(item_it))

                ns.stale_files.append(idx_name)
                ns.stale_files.append(idx_name[:-3] + b'pack')

        if verbosity:
            progress('preserving live data (%d%% complete)\n' %
                     ((float(collect_count) / existing_count) * 100))

        # Nothing should have recreated midx/bloom yet.
        pack_dir = git.repo(b'objects/pack')
        assert (not os.path.exists(os.path.join(pack_dir, b'bup.bloom')))
        assert (not glob.glob(os.path.join(pack_dir, b'*.midx')))

    except BaseException as ex:
        with pending_raise(ex):
            writer.abort()

    # This will finally run midx.
    # Can only change refs (if needed) after this.
    writer.close()

    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir, verbosity)) /
             float(existing_count) * 100))
Exemplo n.º 42
0
else:
    if len(opt.find) % 2:
        s = opt.find + '0'
    else:
        s = opt.find
    try:
        bin = s.decode('hex')
    except TypeError:
        o.fatal('--find parameter is not a valid hex string')

find = opt.find.lower()

count = 0
for name in extra:
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (name, e))
        continue
    if len(opt.find) == 40:
        if ix.exists(bin):
            print name, find
    else:
        # slow, exhaustive search
        for _i in ix:
            i = str(_i).encode('hex')
            if i.startswith(find):
                print name, i
            qprogress('Searching: %d\r' % count)
            count += 1
Exemplo n.º 43
0
def do_bloom(path, outfilename):
    global _first
    b = None
    if os.path.exists(outfilename) and not opt.force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i,name in enumerate(glob.glob('%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)
    total = add_count + rest_count

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n"
                   % (len(b), rest_count))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS and
              b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n"
                   % (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b: # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path)+': ' or ''
    progress('bloom: %s%s %d file%s (%d object%s).\n'
        % (dirprefix, msg,
           len(add), len(add)!=1 and 's' or '',
           add_count, add_count!=1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, 'bup.tmp.bloom')
        b = bloom.create(tfname, expected=add_count, k=opt.k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' 
                  % (icount*100.0/add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    # Currently, there's an open file object for tfname inside b.
    # Make sure it's closed before rename.
    b.close()

    if tfname:
        os.rename(tfname, outfilename)