Пример #1
0
Файл: gc.py Проект: xx4h/bup
def bup_gc(threshold=10, compression=1, verbosity=0):
    cat_pipe = git.cp()
    existing_count = count_objects(git.repo('objects/pack'), verbosity)
    if verbosity:
        log('found %d objects\n' % existing_count)
    if not existing_count:
        if verbosity:
            log('nothing to collect\n')
    else:
        try:
            live_objects = find_live_objects(existing_count, cat_pipe,
                                             verbosity=verbosity)
        except MissingObject as ex:
            log('bup: missing object %r \n' % ex.id.encode('hex'))
            sys.exit(1)
        try:
            # FIXME: just rename midxes and bloom, and restore them at the end if
            # we didn't change any packs?
            if verbosity: log('clearing midx files\n')
            midx.clear_midxes()
            if verbosity: log('clearing bloom filter\n')
            bloom.clear_bloom(git.repo('objects/pack'))
            if verbosity: log('clearing reflog\n')
            expirelog_cmd = ['git', 'reflog', 'expire', '--all', '--expire=all']
            expirelog = subprocess.Popen(expirelog_cmd, preexec_fn = git._gitenv())
            git._git_wait(' '.join(expirelog_cmd), expirelog)
            if verbosity: log('removing unreachable data\n')
            sweep(live_objects, existing_count, cat_pipe,
                  threshold, compression,
                  verbosity)
        finally:
            live_objects.close()
Пример #2
0
def find_live_objects(existing_count, cat_pipe, opt):
    prune_visited_trees = True # In case we want a command line option later
    pack_dir = git.repo('objects/pack')
    ffd, bloom_filename = tempfile.mkstemp('.bloom', 'tmp-gc-', pack_dir)
    os.close(ffd)
    # FIXME: allow selection of k?
    # FIXME: support ephemeral bloom filters (i.e. *never* written to disk)
    live_objs = bloom.create(bloom_filename, expected=existing_count, k=None)
    stop_at, trees_visited = None, None
    if prune_visited_trees:
        trees_visited = set()
        stop_at = lambda (x): x.decode('hex') in trees_visited
    approx_live_count = 0
    for ref_name, ref_id in git.list_refs():
        for item in walk_object(cat_pipe, ref_id.encode('hex'),
                                stop_at=stop_at,
                                include_data=None):
            # FIXME: batch ids
            if opt.verbose:
                report_live_item(approx_live_count, existing_count,
                                 ref_name, ref_id, item)
            bin_id = item.id.decode('hex')
            if trees_visited is not None and item.type == 'tree':
                trees_visited.add(bin_id)
            if opt.verbose:
                if not live_objs.exists(bin_id):
                    live_objs.add(bin_id)
                    approx_live_count += 1
            else:
                live_objs.add(bin_id)
    trees_visited = None
    if opt.verbose:
        log('expecting to retain about %.2f%% unnecessary objects\n'
            % live_objs.pfalse_positive())
    return live_objs
Пример #3
0
 def __init__(self, remote, create=False):
     self._busy = self.conn = None
     self.sock = self.p = self.pout = self.pin = None
     is_reverse = os.environ.get('BUP_SERVER_REVERSE')
     if is_reverse:
         assert(not remote)
         remote = '%s:' % is_reverse
     (self.protocol, self.host, self.port, self.dir) = parse_remote(remote)
     self.cachedir = git.repo('index-cache/%s'
                              % re.sub(r'[^@\w]', '_', 
                                       "%s:%s" % (self.host, self.dir)))
     if is_reverse:
         self.pout = os.fdopen(3, 'rb')
         self.pin = os.fdopen(4, 'wb')
         self.conn = Conn(self.pout, self.pin)
     else:
         if self.protocol in ('ssh', 'file'):
             try:
                 # FIXME: ssh and file shouldn't use the same module
                 self.p = ssh.connect(self.host, self.port, 'server')
                 self.pout = self.p.stdout
                 self.pin = self.p.stdin
                 self.conn = Conn(self.pout, self.pin)
             except OSError, e:
                 raise ClientError, 'connect: %s' % e, sys.exc_info()[2]
         elif self.protocol == 'bup':
             self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             self.sock.connect((self.host, atoi(self.port) or 1982))
             self.sockw = self.sock.makefile('wb')
             self.conn = DemuxConn(self.sock.fileno(), self.sockw)
Пример #4
0
def test_multiple_suggestions():
    with no_lingering_errors():
        with test_tempdir('bup-tclient-') as tmpdir:
            os.environ['BUP_MAIN_EXE'] = '../../../bup'
            os.environ['BUP_DIR'] = bupdir = tmpdir
            git.init_repo(bupdir)

            lw = git.PackWriter()
            lw.new_blob(s1)
            lw.close()
            lw = git.PackWriter()
            lw.new_blob(s2)
            lw.close()
            WVPASSEQ(len(glob.glob(git.repo('objects/pack'+IDX_PAT))), 2)

            c = client.Client(bupdir, create=True)
            WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 0)
            rw = c.new_packwriter()
            s1sha = rw.new_blob(s1)
            WVPASS(rw.exists(s1sha))
            s2sha = rw.new_blob(s2)
            # This is a little hacky, but ensures that we test the
            # code under test
            while (len(glob.glob(c.cachedir+IDX_PAT)) < 2 and
                   not c.conn.has_input()):
                pass
            rw.new_blob(s2)
            WVPASS(rw.objcache.exists(s1sha))
            WVPASS(rw.objcache.exists(s2sha))
            rw.new_blob(s3)
            WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
            rw.close()
            WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 3)
Пример #5
0
Файл: tgit.py Проект: senseb/bup
def test_pack_name_lookup():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-')
    os.environ['BUP_MAIN_EXE'] = bup_exe
    os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
    git.init_repo(bupdir)
    git.verbose = 1
    packdir = git.repo('objects/pack')

    idxnames = []
    hashes = []

    for start in range(0,28,2):
        w = git.PackWriter()
        for i in range(start, start+2):
            hashes.append(w.new_blob(str(i)))
        log('\n')
        idxnames.append(os.path.basename(w.close() + '.idx'))

    r = git.PackIdxList(packdir)
    WVPASSEQ(len(r.packs), 2)
    for e,idxname in enumerate(idxnames):
        for i in range(e*2, (e+1)*2):
            WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Пример #6
0
Файл: tgit.py Проект: senseb/bup
def test_long_index():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tgit-')
    os.environ['BUP_MAIN_EXE'] = bup_exe
    os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
    git.init_repo(bupdir)
    w = git.PackWriter()
    obj_bin = struct.pack('!IIIII',
            0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899)
    obj2_bin = struct.pack('!IIIII',
            0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900)
    obj3_bin = struct.pack('!IIIII',
            0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011)
    pack_bin = struct.pack('!IIIII',
            0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100)
    idx = list(list() for i in xrange(256))
    idx[0].append((obj_bin, 1, 0xfffffffff))
    idx[0x11].append((obj2_bin, 2, 0xffffffffff))
    idx[0x22].append((obj3_bin, 3, 0xff))
    (fd,name) = tempfile.mkstemp(suffix='.idx', dir=git.repo('objects'))
    os.close(fd)
    w.count = 3
    r = w._write_pack_idx_v2(name, idx, pack_bin)
    i = git.PackIdxV2(name, open(name, 'rb'))
    WVPASSEQ(i.find_offset(obj_bin), 0xfffffffff)
    WVPASSEQ(i.find_offset(obj2_bin), 0xffffffffff)
    WVPASSEQ(i.find_offset(obj3_bin), 0xff)
    if wvfailure_count() == initial_failures:
        os.remove(name)
        subprocess.call(['rm', '-rf', tmpdir])
Пример #7
0
Файл: tclient.py Проект: 3v/bup
def test_multiple_suggestions():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-')
    os.environ['BUP_MAIN_EXE'] = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = tmpdir
    git.init_repo(bupdir)

    lw = git.PackWriter()
    lw.new_blob(s1)
    lw.close()
    lw = git.PackWriter()
    lw.new_blob(s2)
    lw.close()
    WVPASSEQ(len(glob.glob(git.repo('objects/pack'+IDX_PAT))), 2)

    c = client.Client(bupdir, create=True)
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 0)
    rw = c.new_packwriter()
    s1sha = rw.new_blob(s1)
    WVPASS(rw.exists(s1sha))
    s2sha = rw.new_blob(s2)
    # This is a little hacky, but ensures that we test the code under test
    while (len(glob.glob(c.cachedir+IDX_PAT)) < 2 and
           not c.conn.has_input()):
        pass
    rw.new_blob(s2)
    WVPASS(rw.objcache.exists(s1sha))
    WVPASS(rw.objcache.exists(s2sha))
    rw.new_blob(s3)
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
    rw.close()
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 3)
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Пример #8
0
Файл: client.py Проект: maw/bup
 def __init__(self, remote, create=False):
     self._busy = self.conn = self.p = self.pout = self.pin = None
     is_reverse = os.environ.get('BUP_SERVER_REVERSE')
     if is_reverse:
         assert(not remote)
         remote = '%s:' % is_reverse
     rs = remote.split(':', 1)
     if len(rs) == 1:
         (host, dir) = (None, remote)
     else:
         (host, dir) = rs
     (self.host, self.dir) = (host, dir)
     self.cachedir = git.repo('index-cache/%s'
                              % re.sub(r'[^@\w]', '_', 
                                       "%s:%s" % (host, dir)))
     try:
         if is_reverse:
             self.pout = os.fdopen(3, 'rb')
             self.pin = os.fdopen(4, 'wb')
         else:
             self.p = ssh.connect(host, 'server')
             self.pout = self.p.stdout
             self.pin = self.p.stdin
     except OSError, e:
         raise ClientError, 'exec %r: %s' % (argv[0], e), sys.exc_info()[2]
Пример #9
0
def send_index(conn, name):
    _init_session()
    assert name.find("/") < 0
    assert name.endswith(".idx")
    idx = git.open_idx(git.repo("objects/pack/%s" % name))
    conn.write(struct.pack("!I", len(idx.map)))
    conn.write(idx.map)
    conn.ok()
Пример #10
0
def send_index(conn, name):
    git.check_repo_or_die()
    assert(name.find('/') < 0)
    assert(name.endswith('.idx'))
    idx = git.open_idx(git.repo('objects/pack/%s' % name))
    conn.write(struct.pack('!I', len(idx.map)))
    conn.write(idx.map)
    conn.ok()
Пример #11
0
def list_indexes(conn, junk):
    git.check_repo_or_die()
    suffix = ''
    if dumb_server_mode:
        suffix = ' load'
    for f in os.listdir(git.repo('objects/pack')):
        if f.endswith('.idx'):
            conn.write('%s%s\n' % (f, suffix))
    conn.ok()
Пример #12
0
def list_indexes(conn, junk):
    _init_session()
    suffix = ""
    if dumb_server_mode:
        suffix = " load"
    for f in os.listdir(git.repo("objects/pack")):
        if f.endswith(".idx"):
            conn.write("%s%s\n" % (f, suffix))
    conn.ok()
Пример #13
0
def clear_index(indexfile):
    indexfiles = [indexfile, indexfile + ".meta", indexfile + ".hlink"]
    for indexfile in indexfiles:
        path = git.repo(indexfile)
        try:
            os.remove(path)
            if opt.verbose:
                log("clear: removed %s\n" % path)
        except OSError as e:
            if e.errno != errno.ENOENT:
                raise
Пример #14
0
def clear_index(indexfile):
    indexfiles = [indexfile, indexfile + '.meta', indexfile + '.hlink']
    for indexfile in indexfiles:
        path = git.repo(indexfile)
        try:
            os.remove(path)
            if opt.verbose:
                log('clear: removed %s\n' % path)
        except OSError, e:
            if e.errno != errno.ENOENT:
                raise
Пример #15
0
def test_dumb_client_server():
    os.environ['BUP_MAIN_EXE'] = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = 'buptest_tclient.tmp'
    subprocess.call(['rm', '-rf', bupdir])
    git.init_repo(bupdir)
    open(git.repo('bup-dumb-server'), 'w').close()

    lw = git.PackWriter()
    lw.new_blob(s1)
    lw.close()

    c = client.Client(bupdir, create=True)
    rw = c.new_packwriter()
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1)
    rw.new_blob(s1)
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1)
    rw.new_blob(s2)
    rw.close()
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
Пример #16
0
def test_dumb_client_server():
    with no_lingering_errors(), test_tempdir('bup-tclient-') as tmpdir:
        os.environ['BUP_MAIN_EXE'] = '../../../bup'
        os.environ['BUP_DIR'] = bupdir = tmpdir
        git.init_repo(bupdir)
        open(git.repo('bup-dumb-server'), 'w').close()

        lw = git.PackWriter()
        lw.new_blob(s1)
        lw.close()

        c = client.Client(bupdir, create=True)
        rw = c.new_packwriter()
        WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1)
        rw.new_blob(s1)
        WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1)
        rw.new_blob(s2)
        rw.close()
        WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
Пример #17
0
Файл: client.py Проект: maw/bup
    def sync_indexes_del(self):
        self.check_busy()
        conn = self.conn
        conn.write('list-indexes\n')
        packdir = git.repo('objects/pack')
        all = {}
        needed = {}
        for line in linereader(conn):
            if not line:
                break
            all[line] = 1
            assert(line.find('/') < 0)
            if not os.path.exists(os.path.join(self.cachedir, line)):
                needed[line] = 1
        self.check_ok()

        mkdirp(self.cachedir)
        for f in os.listdir(self.cachedir):
            if f.endswith('.idx') and not f in all:
                log('pruning old index: %r\n' % f)
                os.unlink(os.path.join(self.cachedir, f))
Пример #18
0
Файл: tclient.py Проект: 3v/bup
def test_dumb_client_server():
    initial_failures = wvfailure_count()
    tmpdir = tempfile.mkdtemp(dir=bup_tmp, prefix='bup-tclient-')
    os.environ['BUP_MAIN_EXE'] = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = tmpdir
    git.init_repo(bupdir)
    open(git.repo('bup-dumb-server'), 'w').close()

    lw = git.PackWriter()
    lw.new_blob(s1)
    lw.close()

    c = client.Client(bupdir, create=True)
    rw = c.new_packwriter()
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1)
    rw.new_blob(s1)
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 1)
    rw.new_blob(s2)
    rw.close()
    WVPASSEQ(len(glob.glob(c.cachedir+IDX_PAT)), 2)
    if wvfailure_count() == initial_failures:
        subprocess.call(['rm', '-rf', tmpdir])
Пример #19
0
def test_pack_name_lookup():
    os.environ['BUP_MAIN_EXE'] = bupmain = '../../../bup'
    os.environ['BUP_DIR'] = bupdir = 'pybuptest.tmp'
    subprocess.call(['rm','-rf', bupdir])
    git.init_repo(bupdir)
    git.verbose = 1
    packdir = git.repo('objects/pack')

    idxnames = []
    hashes = []

    for start in range(0,28,2):
        w = git.PackWriter()
        for i in range(start, start+2):
            hashes.append(w.new_blob(str(i)))
        log('\n')
        idxnames.append(os.path.basename(w.close() + '.idx'))

    r = git.PackIdxList(packdir)
    WVPASSEQ(len(r.packs), 2)
    for e,idxname in enumerate(idxnames):
        for i in range(e*2, (e+1)*2):
            WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
Пример #20
0
def test_long_index():
    w = git.PackWriter()
    obj_bin = struct.pack('!IIIII',
            0x00112233, 0x44556677, 0x88990011, 0x22334455, 0x66778899)
    obj2_bin = struct.pack('!IIIII',
            0x11223344, 0x55667788, 0x99001122, 0x33445566, 0x77889900)
    obj3_bin = struct.pack('!IIIII',
            0x22334455, 0x66778899, 0x00112233, 0x44556677, 0x88990011)
    pack_bin = struct.pack('!IIIII',
            0x99887766, 0x55443322, 0x11009988, 0x77665544, 0x33221100)
    idx = list(list() for i in xrange(256))
    idx[0].append((obj_bin, 1, 0xfffffffff))
    idx[0x11].append((obj2_bin, 2, 0xffffffffff))
    idx[0x22].append((obj3_bin, 3, 0xff))
    (fd,name) = tempfile.mkstemp(suffix='.idx', dir=git.repo('objects'))
    os.close(fd)
    w.count = 3
    r = w._write_pack_idx_v2(name, idx, pack_bin)
    i = git.PackIdxV2(name, open(name, 'rb'))
    WVPASSEQ(i.find_offset(obj_bin), 0xfffffffff)
    WVPASSEQ(i.find_offset(obj2_bin), 0xffffffffff)
    WVPASSEQ(i.find_offset(obj3_bin), 0xff)
    os.remove(name)
Пример #21
0
Файл: tgit.py Проект: 0xkag/bup
def test_pack_name_lookup():
    with no_lingering_errors(), test_tempdir('bup-tgit-') as tmpdir:
        os.environ['BUP_MAIN_EXE'] = bup_exe
        os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
        git.init_repo(bupdir)
        git.verbose = 1
        packdir = git.repo('objects/pack')

        idxnames = []
        hashes = []

        for start in range(0,28,2):
            w = git.PackWriter()
            for i in range(start, start+2):
                hashes.append(w.new_blob(str(i)))
            log('\n')
            idxnames.append(os.path.basename(w.close() + '.idx'))

        r = git.PackIdxList(packdir)
        WVPASSEQ(len(r.packs), 2)
        for e,idxname in enumerate(idxnames):
            for i in range(e*2, (e+1)*2):
                WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
Пример #22
0
Файл: gc.py Проект: rrevans/bup
def find_live_objects(existing_count, cat_pipe, verbosity=0):
    prune_visited_trees = True  # In case we want a command line option later
    pack_dir = git.repo('objects/pack')
    ffd, bloom_filename = tempfile.mkstemp('.bloom', 'tmp-gc-', pack_dir)
    os.close(ffd)
    # FIXME: allow selection of k?
    # FIXME: support ephemeral bloom filters (i.e. *never* written to disk)
    live_objs = bloom.create(bloom_filename, expected=existing_count, k=None)
    # live_objs will hold on to the fd until close or exit
    os.unlink(bloom_filename)
    stop_at, trees_visited = None, None
    if prune_visited_trees:
        trees_visited = set()
        stop_at = lambda (x): x.decode('hex') in trees_visited
    approx_live_count = 0
    for ref_name, ref_id in git.list_refs():
        for item in walk_object(cat_pipe,
                                ref_id.encode('hex'),
                                stop_at=stop_at,
                                include_data=None):
            # FIXME: batch ids
            if verbosity:
                report_live_item(approx_live_count, existing_count, ref_name,
                                 ref_id, item, verbosity)
            if trees_visited is not None and item.type == 'tree':
                trees_visited.add(item.oid)
            if verbosity:
                if not live_objs.exists(item.oid):
                    live_objs.add(item.oid)
                    approx_live_count += 1
            else:
                live_objs.add(item.oid)
    trees_visited = None
    if verbosity:
        log('expecting to retain about %.2f%% unnecessary objects\n' %
            live_objs.pfalse_positive())
    return live_objs
Пример #23
0
 def __init__(self, remote, create=False):
     self._busy = self.conn = None
     self.sock = self.p = self.pout = self.pin = None
     is_reverse = os.environ.get("BUP_SERVER_REVERSE")
     if is_reverse:
         assert not remote
         remote = "%s:" % is_reverse
     (self.protocol, self.host, self.port, self.dir) = parse_remote(remote)
     self.cachedir = git.repo("index-cache/%s" % re.sub(r"[^@\w]", "_", "%s:%s" % (self.host, self.dir)))
     if is_reverse:
         self.pout = os.fdopen(3, "rb")
         self.pin = os.fdopen(4, "wb")
         self.conn = Conn(self.pout, self.pin)
     else:
         if self.protocol in ("ssh", "file"):
             try:
                 # FIXME: ssh and file shouldn't use the same module
                 self.p = ssh.connect(self.host, self.port, "server")
                 self.pout = self.p.stdout
                 self.pin = self.p.stdin
                 self.conn = Conn(self.pout, self.pin)
             except OSError as e:
                 raise ClientError, "connect: %s" % e, sys.exc_info()[2]
         elif self.protocol == "bup":
             self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
             self.sock.connect((self.host, atoi(self.port) or 1982))
             self.sockw = self.sock.makefile("wb")
             self.conn = DemuxConn(self.sock.fileno(), self.sockw)
     if self.dir:
         self.dir = re.sub(r"[\r\n]", " ", self.dir)
         if create:
             self.conn.write("init-dir %s\n" % self.dir)
         else:
             self.conn.write("set-dir %s\n" % self.dir)
         self.check_ok()
     self.sync_indexes()
Пример #24
0
def test_pack_name_lookup():
    with no_lingering_errors():
        with test_tempdir('bup-tgit-') as tmpdir:
            os.environ['BUP_MAIN_EXE'] = bup_exe
            os.environ['BUP_DIR'] = bupdir = tmpdir + "/bup"
            git.init_repo(bupdir)
            git.verbose = 1
            packdir = git.repo('objects/pack')

            idxnames = []
            hashes = []

            for start in range(0, 28, 2):
                w = git.PackWriter()
                for i in range(start, start + 2):
                    hashes.append(w.new_blob(str(i)))
                log('\n')
                idxnames.append(os.path.basename(w.close() + '.idx'))

            r = git.PackIdxList(packdir)
            WVPASSEQ(len(r.packs), 2)
            for e, idxname in enumerate(idxnames):
                for i in range(e * 2, (e + 1) * 2):
                    WVPASSEQ(r.exists(hashes[i], want_source=True), idxname)
Пример #25
0
--
n,number=  number of objects per cycle [10000]
c,cycles=  number of cycles to run [100]
ignore-midx  ignore .midx files, use only .idx files
existing   test with existing objects instead of fake ones
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if extra:
    o.fatal('no arguments expected')

git.ignore_midx = opt.ignore_midx

git.check_repo_or_die()
m = git.PackIdxList(git.repo('objects/pack'))

report(-1)
_helpers.random_sha()
report(0)

if opt.existing:
    def foreverit(mi):
        while 1:
            for e in mi:
                yield e
    objit = iter(foreverit(m))

for c in range(opt.cycles):
    for n in range(opt.number):
        if opt.existing:
Пример #26
0
def main(argv):

    # Hack around lack of nonlocal vars in python 2
    _nonlocal = {}

    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    if opt.indexfile:
        opt.indexfile = argv_bytes(opt.indexfile)
    if opt.name:
        opt.name = argv_bytes(opt.name)
    if opt.remote:
        opt.remote = argv_bytes(opt.remote)
    if opt.strip_path:
        opt.strip_path = argv_bytes(opt.strip_path)

    git.check_repo_or_die()
    if not (opt.tree or opt.commit or opt.name):
        o.fatal("use one or more of -t, -c, -n")
    if not extra:
        o.fatal("no filenames given")

    extra = [argv_bytes(x) for x in extra]

    opt.progress = (istty2 and not opt.quiet)
    opt.smaller = parse_num(opt.smaller or 0)
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)

    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    if opt.strip and opt.strip_path:
        o.fatal("--strip is incompatible with --strip-path")

    graft_points = []
    if opt.graft:
        if opt.strip:
            o.fatal("--strip is incompatible with --graft")

        if opt.strip_path:
            o.fatal("--strip-path is incompatible with --graft")

        for (option, parameter) in flags:
            if option == "--graft":
                parameter = argv_bytes(parameter)
                splitted_parameter = parameter.split(b'=')
                if len(splitted_parameter) != 2:
                    o.fatal(
                        "a graft point must be of the form old_path=new_path")
                old_path, new_path = splitted_parameter
                if not (old_path and new_path):
                    o.fatal("a graft point cannot be empty")
                graft_points.append(
                    (resolve_parent(old_path), resolve_parent(new_path)))

    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
    if is_reverse and opt.remote:
        o.fatal("don't use -r in reverse mode; it's automatic")

    name = opt.name
    if name and not valid_save_name(name):
        o.fatal("'%s' is not a valid branch name" % path_msg(name))
    refname = name and b'refs/heads/%s' % name or None
    if opt.remote or is_reverse:
        try:
            cli = client.Client(opt.remote)
        except client.ClientError as e:
            log('error: %s' % e)
            sys.exit(1)
        oldref = refname and cli.read_ref(refname) or None
        w = cli.new_packwriter(compression_level=opt.compress)
    else:
        cli = None
        oldref = refname and git.read_ref(refname) or None
        w = git.PackWriter(compression_level=opt.compress)

    handle_ctrl_c()

    # Metadata is stored in a file named .bupm in each directory.  The
    # first metadata entry will be the metadata for the current directory.
    # The remaining entries will be for each of the other directory
    # elements, in the order they're listed in the index.
    #
    # Since the git tree elements are sorted according to
    # git.shalist_item_sort_key, the metalist items are accumulated as
    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
    # created.  The sort_key should have been computed using the element's
    # mangled name and git mode (after hashsplitting), but the code isn't
    # actually doing that but rather uses the element's real name and mode.
    # This makes things a bit more difficult when reading it back, see
    # vfs.ordered_tree_entries().

    # Maintain a stack of information representing the current location in
    # the archive being constructed.  The current path is recorded in
    # parts, which will be something like ['', 'home', 'someuser'], and
    # the accumulated content and metadata for of the dirs in parts is
    # stored in parallel stacks in shalists and metalists.

    parts = []  # Current archive position (stack of dir names).
    shalists = []  # Hashes for each dir in paths.
    metalists = []  # Metadata for each dir in paths.

    def _push(part, metadata):
        # Enter a new archive directory -- make it the current directory.
        parts.append(part)
        shalists.append([])
        metalists.append([(b'', metadata)])  # This dir's metadata (no name).

    def _pop(force_tree, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        assert (len(parts) >= 1)
        part = parts.pop()
        shalist = shalists.pop()
        metalist = metalists.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            metaidx = 1  # entry at 0 is for the dir
            for x in shalist:
                name = x[1]
                if name in names_seen:
                    parent_path = b'/'.join(parts) + b'/'
                    add_error('error: ignoring duplicate path %s in %s' %
                              (path_msg(name), path_msg(parent_path)))
                    if not stat.S_ISDIR(x[0]):
                        del metalist[metaidx]
                else:
                    names_seen.add(name)
                    clean_list.append(x)
                    if not stat.S_ISDIR(x[0]):
                        metaidx += 1

            if dir_metadata:  # Override the original metadata pushed for this dir.
                metalist = [(b'', dir_metadata)] + metalist[1:]
            sorted_metalist = sorted(metalist, key=lambda x: x[0])
            metadata = b''.join([m[1].encode() for m in sorted_metalist])
            metadata_f = BytesIO(metadata)
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob,
                                                       w.new_tree,
                                                       [metadata_f],
                                                       keep_boundaries=False)
            clean_list.append((mode, b'.bupm', id))

            tree = w.new_tree(clean_list)
        if shalists:
            shalists[-1].append((GIT_MODE_TREE,
                                 git.mangle_name(part, GIT_MODE_TREE,
                                                 GIT_MODE_TREE), tree))
        return tree

    _nonlocal['count'] = 0
    _nonlocal['subcount'] = 0
    _nonlocal['lastremain'] = None

    def progress_report(n):
        _nonlocal['subcount'] += n
        cc = _nonlocal['count'] + _nonlocal['subcount']
        pct = total and (cc * 100.0 / total) or 0
        now = time.time()
        elapsed = now - tstart
        kps = elapsed and int(cc / 1024. / elapsed)
        kps_frac = 10**int(math.log(kps + 1, 10) - 1)
        kps = int(kps / kps_frac) * kps_frac
        if cc:
            remain = elapsed * 1.0 / cc * (total - cc)
        else:
            remain = 0.0
        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and
            ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] <
             0.05)):
            remain = _nonlocal['lastremain']
        else:
            _nonlocal['lastremain'] = remain
        hours = int(remain / 60 / 60)
        mins = int(remain / 60 - hours * 60)
        secs = int(remain - hours * 60 * 60 - mins * 60)
        if elapsed < 30:
            remainstr = ''
            kpsstr = ''
        else:
            kpsstr = '%dk/s' % kps
            if hours:
                remainstr = '%dh%dm' % (hours, mins)
            elif mins:
                remainstr = '%dm%d' % (mins, secs)
            else:
                remainstr = '%ds' % secs
        qprogress(
            'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' %
            (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr))

    indexfile = opt.indexfile or git.repo(b'bupindex')
    r = index.Reader(indexfile)
    try:
        msr = index.MetaStoreReader(indexfile + b'.meta')
    except IOError as ex:
        if ex.errno != EACCES:
            raise
        log('error: cannot access %r; have you run bup index?' %
            path_msg(indexfile))
        sys.exit(1)
    hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')

    def already_saved(ent):
        return ent.is_valid() and w.exists(ent.sha) and ent.sha

    def wantrecurse_pre(ent):
        return not already_saved(ent)

    def wantrecurse_during(ent):
        return not already_saved(ent) or ent.sha_missing()

    def find_hardlink_target(hlink_db, ent):
        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
            if link_paths:
                return link_paths[0]

    total = ftotal = 0
    if opt.progress:
        for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
            if not (ftotal % 10024):
                qprogress('Reading index: %d\r' % ftotal)
            exists = ent.exists()
            hashvalid = already_saved(ent)
            ent.set_sha_missing(not hashvalid)
            if not opt.smaller or ent.size < opt.smaller:
                if exists and not hashvalid:
                    total += ent.size
            ftotal += 1
        progress('Reading index: %d, done.\n' % ftotal)
        hashsplit.progress_callback = progress_report

    # Root collisions occur when strip or graft options map more than one
    # path to the same directory (paths which originally had separate
    # parents).  When that situation is detected, use empty metadata for
    # the parent.  Otherwise, use the metadata for the common parent.
    # Collision example: "bup save ... --strip /foo /foo/bar /bar".

    # FIXME: Add collision tests, or handle collisions some other way.

    # FIXME: Detect/handle strip/graft name collisions (other than root),
    # i.e. if '/foo/bar' and '/bar' both map to '/'.

    first_root = None
    root_collision = None
    tstart = time.time()
    fcount = 0
    lastskip_name = None
    lastdir = b''
    for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during):
        (dir, file) = os.path.split(ent.name)
        exists = (ent.flags & index.IX_EXISTS)
        hashvalid = already_saved(ent)
        wasmissing = ent.sha_missing()
        oldsize = ent.size
        if opt.verbose:
            if not exists:
                status = 'D'
            elif not hashvalid:
                if ent.sha == index.EMPTY_SHA:
                    status = 'A'
                else:
                    status = 'M'
            else:
                status = ' '
            if opt.verbose >= 2:
                log('%s %-70s\n' % (status, path_msg(ent.name)))
            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
                if not lastdir.startswith(dir):
                    log('%s %-70s\n' %
                        (status, path_msg(os.path.join(dir, b''))))
                lastdir = dir

        if opt.progress:
            progress_report(0)
        fcount += 1

        if not exists:
            continue
        if opt.smaller and ent.size >= opt.smaller:
            if exists and not hashvalid:
                if opt.verbose:
                    log('skipping large file "%s"\n' % path_msg(ent.name))
                lastskip_name = ent.name
            continue

        assert (dir.startswith(b'/'))
        if opt.strip:
            dirp = stripped_path_components(dir, extra)
        elif opt.strip_path:
            dirp = stripped_path_components(dir, [opt.strip_path])
        elif graft_points:
            dirp = grafted_path_components(graft_points, dir)
        else:
            dirp = path_components(dir)

        # At this point, dirp contains a representation of the archive
        # path that looks like [(archive_dir_name, real_fs_path), ...].
        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
        # might look like this at some point:
        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].

        # This dual representation supports stripping/grafting, where the
        # archive path may not have a direct correspondence with the
        # filesystem.  The root directory is represented by an initial
        # component named '', and any component that doesn't have a
        # corresponding filesystem directory (due to grafting, for
        # example) will have a real_fs_path of None, i.e. [('', None),
        # ...].

        if first_root == None:
            first_root = dirp[0]
        elif first_root != dirp[0]:
            root_collision = True

        # If switching to a new sub-tree, finish the current sub-tree.
        while parts > [x[0] for x in dirp]:
            _pop(force_tree=None)

        # If switching to a new sub-tree, start a new sub-tree.
        for path_component in dirp[len(parts):]:
            dir_name, fs_path = path_component
            # Not indexed, so just grab the FS metadata or use empty metadata.
            try:
                meta = metadata.from_path(fs_path, normalized=True) \
                    if fs_path else metadata.Metadata()
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = dir_name
                meta = metadata.Metadata()
            _push(dir_name, meta)

        if not file:
            if len(parts) == 1:
                continue  # We're at the top level -- keep the current root dir
            # Since there's no filename, this is a subdir -- finish it.
            oldtree = already_saved(ent)  # may be None
            newtree = _pop(force_tree=oldtree)
            if not oldtree:
                if lastskip_name and lastskip_name.startswith(ent.name):
                    ent.invalidate()
                else:
                    ent.validate(GIT_MODE_TREE, newtree)
                ent.repack()
            if exists and wasmissing:
                _nonlocal['count'] += oldsize
            continue

        # it's not a directory
        if hashvalid:
            id = ent.sha
            git_name = git.mangle_name(file, ent.mode, ent.gitmode)
            git_info = (ent.gitmode, git_name, id)
            shalists[-1].append(git_info)
            sort_key = git.shalist_item_sort_key((ent.mode, file, id))
            meta = msr.metadata_at(ent.meta_ofs)
            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
            # Restore the times that were cleared to 0 in the metastore.
            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime,
                                                    ent.ctime)
            metalists[-1].append((sort_key, meta))
        else:
            id = None
            hlink = find_hardlink_target(hlink_db, ent)
            try:
                meta = metadata.from_path(
                    ent.name,
                    hardlink_target=hlink,
                    normalized=True,
                    after_stat=after_nondir_metadata_stat)
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = ent.name
                continue
            if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode):
                # The mode changed since we indexed the file, this is bad.
                # This can cause two issues:
                # 1) We e.g. think the file is a regular file, but now it's
                #    something else (a device, socket, FIFO or symlink, etc.)
                #    and _read_ from it when we shouldn't.
                # 2) We then record it as valid, but don't update the index
                #    metadata, and on a subsequent save it has 'hashvalid'
                #    but is recorded as the file type from the index, when
                #    the content is something else ...
                # Avoid all of these consistency issues by just skipping such
                # things - it really ought to not happen anyway.
                add_error("%s: mode changed since indexing, skipping." %
                          path_msg(ent.name))
                lastskip_name = ent.name
                continue
            if stat.S_ISREG(ent.mode):
                try:
                    # If the file changes while we're reading it, then our reading
                    # may stop at some point, but the stat() above may have gotten
                    # a different size already. Recalculate the meta size so that
                    # the repository records the accurate size in the metadata, even
                    # if the other stat() data might be slightly older than the file
                    # content (which we can't fix, this is inherently racy, but we
                    # can prevent the size mismatch.)
                    meta.size = 0

                    def new_blob(data):
                        meta.size += len(data)
                        return w.new_blob(data)

                    before_saving_regular_file(ent.name)
                    with hashsplit.open_noatime(ent.name) as f:
                        (mode, id) = hashsplit.split_to_blob_or_tree(
                            new_blob, w.new_tree, [f], keep_boundaries=False)
                except (IOError, OSError) as e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
            elif stat.S_ISDIR(ent.mode):
                assert (0)  # handled above
            elif stat.S_ISLNK(ent.mode):
                mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target))
            else:
                # Everything else should be fully described by its
                # metadata, so just record an empty blob, so the paths
                # in the tree and .bupm will match up.
                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))

            if id:
                ent.validate(mode, id)
                ent.repack()
                git_name = git.mangle_name(file, ent.mode, ent.gitmode)
                git_info = (mode, git_name, id)
                shalists[-1].append(git_info)
                sort_key = git.shalist_item_sort_key((ent.mode, file, id))
                metalists[-1].append((sort_key, meta))

        if exists and wasmissing:
            _nonlocal['count'] += oldsize
            _nonlocal['subcount'] = 0

    if opt.progress:
        pct = total and _nonlocal['count'] * 100.0 / total or 100
        progress(
            'Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n' %
            (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal))

    while len(parts) > 1:  # _pop() all the parts above the root
        _pop(force_tree=None)
    assert (len(shalists) == 1)
    assert (len(metalists) == 1)

    # Finish the root directory.
    tree = _pop(
        force_tree=None,
        # When there's a collision, use empty metadata for the root.
        dir_metadata=metadata.Metadata() if root_collision else None)

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.tree:
        out.write(hexlify(tree))
        out.write(b'\n')
    if opt.commit or name:
        if compat.py_maj > 2:
            # Strip b prefix from python 3 bytes reprs to preserve previous format
            msgcmd = b'[%s]' % b', '.join(
                [repr(argv_bytes(x))[1:].encode('ascii') for x in argv])
        else:
            msgcmd = repr(argv)
        msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
        userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
        commit = w.new_commit(tree, oldref, userline, date, None, userline,
                              date, None, msg)
        if opt.commit:
            out.write(hexlify(commit))
            out.write(b'\n')

    msr.close()
    w.close()  # must close before we can update the ref

    if opt.name:
        if cli:
            cli.update_ref(refname, commit, oldref)
        else:
            git.update_ref(refname, commit, oldref)

    if cli:
        cli.close()

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Пример #27
0
def main():
    handle_ctrl_c()
    is_reverse = os.environ.get('BUP_SERVER_REVERSE')
    opt = parse_args(sys.argv)
    git.check_repo_or_die()
    src_dir = opt.source or git.repo()
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)
    if is_reverse and opt.remote:
        misuse("don't use -r in reverse mode; it's automatic")
    if opt.remote or is_reverse:
        dest_repo = RemoteRepo(opt.remote)
    else:
        dest_repo = LocalRepo()

    with dest_repo as dest_repo:
        with LocalRepo(repo_dir=src_dir) as src_repo:
            with dest_repo.new_packwriter(
                    compression_level=opt.compress) as writer:

                src_repo = LocalRepo(repo_dir=src_dir)

                # Resolve and validate all sources and destinations,
                # implicit or explicit, and do it up-front, so we can
                # fail before we start writing (for any obviously
                # broken cases).
                target_items = resolve_targets(opt.target_specs, src_repo,
                                               dest_repo)

                updated_refs = {
                }  # ref_name -> (original_ref, tip_commit(bin))
                no_ref_info = (None, None)

                handlers = {
                    'ff': handle_ff,
                    'append': handle_append,
                    'force-pick': handle_pick,
                    'pick': handle_pick,
                    'new-tag': handle_new_tag,
                    'replace': handle_replace,
                    'unnamed': handle_unnamed
                }

                for item in target_items:
                    debug1('get-spec: %s\n' % str(item.spec))
                    debug1('get-src: %s\n' % loc_desc(item.src))
                    debug1('get-dest: %s\n' % loc_desc(item.dest))
                    dest_path = item.dest and item.dest.path
                    if dest_path:
                        if dest_path.startswith('/.tag/'):
                            dest_ref = 'refs/tags/%s' % dest_path[6:]
                        else:
                            dest_ref = 'refs/heads/%s' % dest_path[1:]
                    else:
                        dest_ref = None

                    dest_hash = item.dest and item.dest.hash
                    orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info)
                    orig_ref = orig_ref or dest_hash
                    cur_ref = cur_ref or dest_hash

                    handler = handlers[item.spec.method]
                    item_result = handler(item, src_repo, writer, opt)
                    if len(item_result) > 1:
                        new_id, tree = item_result
                    else:
                        new_id = item_result[0]

                    if not dest_ref:
                        log_item(item.spec.src, item.src.type, opt)
                    else:
                        updated_refs[dest_ref] = (orig_ref, new_id)
                        if dest_ref.startswith('refs/tags/'):
                            log_item(item.spec.src,
                                     item.src.type,
                                     opt,
                                     tag=new_id)
                        else:
                            log_item(item.spec.src,
                                     item.src.type,
                                     opt,
                                     tree=tree,
                                     commit=new_id)

        # Only update the refs at the very end, once the writer is
        # closed, so that if something goes wrong above, the old refs
        # will be undisturbed.
        for ref_name, info in updated_refs.iteritems():
            orig_ref, new_ref = info
            try:
                dest_repo.update_ref(ref_name, new_ref, orig_ref)
                if opt.verbose:
                    new_hex = new_ref.encode('hex')
                    if orig_ref:
                        orig_hex = orig_ref.encode('hex')
                        log('updated %r (%s -> %s)\n' %
                            (ref_name, orig_hex, new_hex))
                    else:
                        log('updated %r (%s)\n' % (ref_name, new_hex))
            except (git.GitError, client.ClientError), ex:
                add_error('unable to update ref %r: %s' % (ref_name, ex))
Пример #28
0
--
n,number=  number of objects per cycle [10000]
c,cycles=  number of cycles to run [100]
ignore-midx  ignore .midx files, use only .idx files
existing   test with existing objects instead of fake ones
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if extra:
    o.fatal('no arguments expected')

git.ignore_midx = opt.ignore_midx

git.check_repo_or_die()
m = git.PackIdxList(git.repo('objects/pack'))

report(-1)
_helpers.random_sha()
report(0)

if opt.existing:

    def foreverit(mi):
        while 1:
            for e in mi:
                yield e

    objit = iter(foreverit(m))

for c in xrange(opt.cycles):
Пример #29
0
if extra:
    o.fatal('no positional parameters expected')

if opt.threshold:
    try:
        opt.threshold = int(opt.threshold)
    except ValueError:
        o.fatal('threshold must be an integer percentage value')
    if opt.threshold < 0 or opt.threshold > 100:
        o.fatal('threshold must be an integer percentage value')

git.check_repo_or_die()

cat_pipe = vfs.cp()
existing_count = count_objects(git.repo('objects/pack'))
if opt.verbose:
    log('found %d objects\n' % existing_count)
if not existing_count:
    if opt.verbose:
        log('nothing to collect\n')
else:
    live_objects = find_live_objects(existing_count, cat_pipe, opt)
    try:
        # FIXME: just rename midxes and bloom, and restore them at the end if
        # we didn't change any packs?
        if opt.verbose: log('clearing midx files\n')
        midx.clear_midxes()
        if opt.verbose: log('clearing bloom filter\n')
        bloom.clear_bloom(git.repo('objects/pack'))
        if opt.verbose: log('clearing reflog\n')
Пример #30
0
def update_index(top, excluded_paths, exclude_rxs):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    hashgen = None
    if opt.fake_valid:

        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for (path,
         pst) in drecurse.recursive_dirlist([top],
                                            xdev=opt.xdev,
                                            bup_dir=bup_dir,
                                            excluded_paths=excluded_paths,
                                            exclude_rxs=exclude_rxs):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()
        if rig.cur and rig.cur.name == path:  # paths that already existed
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                rig.next()
                continue
            if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                hlinks.del_path(rig.cur.name)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
            # Clear these so they don't bloat the store -- they're
            # already in the index (since they vary a lot and they're
            # fixed length).  If you've noticed "tmax", you might
            # wonder why it's OK to do this, since that code may
            # adjust (mangle) the index mtime and ctime -- producing
            # fake values which must not end up in a .bupm.  However,
            # it looks like that shouldn't be possible:  (1) When
            # "save" validates the index entry, it always reads the
            # metadata from the filesytem. (2) Metadata is only
            # read/used from the index if hashvalid is true. (3) index
            # always invalidates "faked" entries, because "old != new"
            # in from_stat().
            meta.ctime = meta.mtime = meta.atime = 0
            meta_ofs = msw.store(meta)
            rig.cur.from_stat(pst,
                              meta_ofs,
                              tstart,
                              check_device=opt.check_device)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=hashgen)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
Пример #31
0
(opt, flags, extra) = o.parse(sys.argv[1:])

par2_setup()
if opt.par2_ok:
    if par2_ok:
        sys.exit(0)  # 'true' in sh
    else:
        sys.exit(1)
if opt.disable_par2:
    par2_ok = 0

git.check_repo_or_die()

if not extra:
    debug('fsck: No filenames given: checking all packs.\n')
    extra = glob.glob(git.repo('objects/pack/*.pack'))

code = 0
count = 0
outstanding = {}
for name in extra:
    if name.endswith('.pack'):
        base = name[:-5]
    elif name.endswith('.idx'):
        base = name[:-4]
    elif name.endswith('.par2'):
        base = name[:-5]
    elif os.path.exists(name + '.pack'):
        base = name
    else:
        raise Exception('%s is not a pack file!' % name)
Пример #32
0
            opt.format,
            {
                "status": "saving",
                "percentage": pct,
                "bytes_done": cc,
                "bytes_total": total,
                "files_done": fcount,
                "files_total": ftotal,
                "time_remaining": remain,
                "time_remaining_str": remainstr,
                "speed": kps,
            },
        )


indexfile = opt.indexfile or git.repo("bupindex")
r = index.Reader(indexfile)
try:
    msr = index.MetaStoreReader(indexfile + ".meta")
except IOError, ex:
    if ex.errno != EACCES:
        raise
    log("error: cannot access %r; have you run bup index?" % indexfile)
    sys.exit(1)
hlink_db = hlinkdb.HLinkDB(indexfile + ".hlink")


def already_saved(ent):
    return ent.is_valid() and w.exists(ent.sha) and ent.sha

Пример #33
0
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    fake_hash = None
    if opt.fake_valid:

        def fake_hash(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for path, pst in recursive_dirlist([top],
                                       xdev=opt.xdev,
                                       bup_dir=bup_dir,
                                       excluded_paths=excluded_paths,
                                       exclude_rxs=exclude_rxs,
                                       xdev_exceptions=xdev_exceptions):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1

        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()

        if rig.cur and rig.cur.name == path:  # paths that already existed
            need_repack = False
            if (rig.cur.stale(pst, tstart, check_device=opt.check_device)):
                try:
                    meta = metadata.from_path(path, statinfo=pst)
                except (OSError, IOError) as e:
                    add_error(e)
                    rig.next()
                    continue
                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                    hlinks.del_path(rig.cur.name)
                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
                # Clear these so they don't bloat the store -- they're
                # already in the index (since they vary a lot and they're
                # fixed length).  If you've noticed "tmax", you might
                # wonder why it's OK to do this, since that code may
                # adjust (mangle) the index mtime and ctime -- producing
                # fake values which must not end up in a .bupm.  However,
                # it looks like that shouldn't be possible:  (1) When
                # "save" validates the index entry, it always reads the
                # metadata from the filesytem. (2) Metadata is only
                # read/used from the index if hashvalid is true. (3)
                # "faked" entries will be stale(), and so we'll invalidate
                # them below.
                meta.ctime = meta.mtime = meta.atime = 0
                meta_ofs = msw.store(meta)
                rig.cur.update_from_stat(pst, meta_ofs)
                rig.cur.invalidate()
                need_repack = True
            if not (rig.cur.flags & index.IX_HASHVALID):
                if fake_hash:
                    rig.cur.gitmode, rig.cur.sha = fake_hash(path)
                    rig.cur.flags |= index.IX_HASHVALID
                    need_repack = True
            if opt.fake_invalid:
                rig.cur.invalidate()
                need_repack = True
            if need_repack:
                rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError) as e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)

    elapsed = time.time() - index_start
    paths_per_sec = total / elapsed if elapsed else 0
    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))

    hlinks.prepare_save()

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, msw, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()

    msw.close()
    hlinks.commit_save()
Пример #34
0
def update_index(top, excluded_paths, exclude_rxs):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    hashgen = None
    if opt.fake_valid:
        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    for (path,pst) in drecurse.recursive_dirlist([top], xdev=opt.xdev,
                                                 bup_dir=bup_dir,
                                                 excluded_paths=excluded_paths,
                                                 exclude_rxs=exclude_rxs):
        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            qprogress('Indexing: %d\r' % total)
        elif not (total % 128):
            qprogress('Indexing: %d\r' % total)
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()
        if rig.cur and rig.cur.name == path:    # paths that already existed
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                rig.next()
                continue
            if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                hlinks.del_path(rig.cur.name)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
            # Clear these so they don't bloat the store -- they're
            # already in the index (since they vary a lot and they're
            # fixed length).  If you've noticed "tmax", you might
            # wonder why it's OK to do this, since that code may
            # adjust (mangle) the index mtime and ctime -- producing
            # fake values which must not end up in a .bupm.  However,
            # it looks like that shouldn't be possible:  (1) When
            # "save" validates the index entry, it always reads the
            # metadata from the filesytem. (2) Metadata is only
            # read/used from the index if hashvalid is true. (3) index
            # always invalidates "faked" entries, because "old != new"
            # in from_stat().
            meta.ctime = meta.mtime = meta.atime = 0
            meta_ofs = msw.store(meta)
            rig.cur.from_stat(pst, meta_ofs, tstart,
                              check_device=opt.check_device)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError), e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen = hashgen)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)
Пример #35
0
if (opt.fake_valid or opt.fake_invalid) and not opt.update:
    o.fatal('--fake-{in,}valid are meaningless without -u')
if opt.fake_valid and opt.fake_invalid:
    o.fatal('--fake-valid is incompatible with --fake-invalid')
if opt.clear and opt.indexfile:
    o.fatal('cannot clear an external index (via -f)')

# FIXME: remove this once we account for timestamp races, i.e. index;
# touch new-file; index.  It's possible for this to happen quickly
# enough that new-file ends up with the same timestamp as the first
# index, and then bup will ignore it.
tick_start = time.time()
time.sleep(1 - (tick_start - int(tick_start)))

git.check_repo_or_die()
indexfile = opt.indexfile or git.repo('bupindex')

handle_ctrl_c()

if opt.check:
    log('check: starting initial check.\n')
    check_index(index.Reader(indexfile))

if opt.clear:
    log('clear: clearing index.\n')
    clear_index(indexfile)

excluded_paths = parse_excludes(flags, o.fatal)
exclude_rxs = parse_rx_excludes(flags, o.fatal)
paths = index.reduce_paths(extra)
Пример #36
0
def sweep(live_objects, existing_count, cat_pipe, opt):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if opt.verbose and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if opt.verbose:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=opt.compress,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if opt.verbose:
            qprogress('preserving live data (%d%% complete)\r' %
                      ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if opt.verbose:
                log('deleting %s\n' % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - opt.threshold) / 100.0):
            if opt.verbose:
                log('keeping %s (%d%% live)\n' %
                    (git.repo_rel(basename(idx_name)), live_frac * 100))
            continue

        if opt.verbose:
            log('rewriting %s (%.2f%% live)\n' %
                (basename(idx_name), live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if opt.verbose:
        progress('preserving live data (%d%% complete)\n' %
                 ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert (not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert (not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if opt.verbose:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir)) /
             float(existing_count) * 100))
Пример #37
0
def main(argv):
    global opt, par2_ok

    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])
    opt.verbose = opt.verbose or 0

    par2_setup()
    if opt.par2_ok:
        if par2_ok:
            sys.exit(0)  # 'true' in sh
        else:
            sys.exit(1)
    if opt.disable_par2:
        par2_ok = 0

    git.check_repo_or_die()

    if extra:
        extra = [argv_bytes(x) for x in extra]
    else:
        debug('fsck: No filenames given: checking all packs.\n')
        extra = glob.glob(git.repo(b'objects/pack/*.pack'))

    sys.stdout.flush()
    out = byte_stream(sys.stdout)
    code = 0
    count = 0
    outstanding = {}
    for name in extra:
        if name.endswith(b'.pack'):
            base = name[:-5]
        elif name.endswith(b'.idx'):
            base = name[:-4]
        elif name.endswith(b'.par2'):
            base = name[:-5]
        elif os.path.exists(name + b'.pack'):
            base = name
        else:
            raise Exception('%r is not a pack file!' % name)
        (dir, last) = os.path.split(base)
        par2_exists = os.path.exists(base + b'.par2')
        if par2_exists and os.stat(base + b'.par2').st_size == 0:
            par2_exists = 0
        sys.stdout.flush(
        )  # Not sure we still need this, but it'll flush out too
        debug('fsck: checking %r (%s)\n' %
              (last, par2_ok and par2_exists and 'par2' or 'git'))
        if not opt.verbose:
            progress('fsck (%d/%d)\r' % (count, len(extra)))

        if not opt.jobs:
            nc = do_pack(base, last, par2_exists, out)
            code = code or nc
            count += 1
        else:
            while len(outstanding) >= opt.jobs:
                (pid, nc) = os.wait()
                nc >>= 8
                if pid in outstanding:
                    del outstanding[pid]
                    code = code or nc
                    count += 1
            pid = os.fork()
            if pid:  # parent
                outstanding[pid] = 1
            else:  # child
                try:
                    sys.exit(do_pack(base, last, par2_exists, out))
                except Exception as e:
                    log('exception: %r\n' % e)
                    sys.exit(99)

    while len(outstanding):
        (pid, nc) = os.wait()
        nc >>= 8
        if pid in outstanding:
            del outstanding[pid]
            code = code or nc
            count += 1
        if not opt.verbose:
            progress('fsck (%d/%d)\r' % (count, len(extra)))

    if istty2:
        debug('fsck done.           \n')
    sys.exit(code)
Пример #38
0
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    if not (opt.modified or \
            opt['print'] or \
            opt.status or \
            opt.update or \
            opt.check or \
            opt.clear):
        opt.update = 1
    if (opt.fake_valid or opt.fake_invalid) and not opt.update:
        o.fatal('--fake-{in,}valid are meaningless without -u')
    if opt.fake_valid and opt.fake_invalid:
        o.fatal('--fake-valid is incompatible with --fake-invalid')
    if opt.clear and opt.indexfile:
        o.fatal('cannot clear an external index (via -f)')

    # FIXME: remove this once we account for timestamp races, i.e. index;
    # touch new-file; index.  It's possible for this to happen quickly
    # enough that new-file ends up with the same timestamp as the first
    # index, and then bup will ignore it.
    tick_start = time.time()
    time.sleep(1 - (tick_start - int(tick_start)))

    git.check_repo_or_die()

    handle_ctrl_c()

    if opt.verbose is None:
        opt.verbose = 0

    if opt.indexfile:
        indexfile = argv_bytes(opt.indexfile)
    else:
        indexfile = git.repo(b'bupindex')

    if opt.check:
        log('check: starting initial check.\n')
        check_index(index.Reader(indexfile), opt.verbose)

    if opt.clear:
        log('clear: clearing index.\n')
        clear_index(indexfile, opt.verbose)

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.update:
        if not extra:
            o.fatal('update mode (-u) requested but no paths given')
        extra = [argv_bytes(x) for x in extra]
        excluded_paths = parse_excludes(flags, o.fatal)
        exclude_rxs = parse_rx_excludes(flags, o.fatal)
        xexcept = index.unique_resolved_paths(extra)
        for rp, path in index.reduce_paths(extra):
            update_index(rp,
                         excluded_paths,
                         exclude_rxs,
                         indexfile,
                         check=opt.check,
                         check_device=opt.check_device,
                         xdev=opt.xdev,
                         xdev_exceptions=xexcept,
                         fake_valid=opt.fake_valid,
                         fake_invalid=opt.fake_invalid,
                         out=out,
                         verbose=opt.verbose)

    if opt['print'] or opt.status or opt.modified:
        extra = [argv_bytes(x) for x in extra]
        for name, ent in index.Reader(indexfile).filter(extra or [b'']):
            if (opt.modified
                    and (ent.is_valid() or ent.is_deleted() or not ent.mode)):
                continue
            line = b''
            if opt.status:
                if ent.is_deleted():
                    line += b'D '
                elif not ent.is_valid():
                    if ent.sha == index.EMPTY_SHA:
                        line += b'A '
                    else:
                        line += b'M '
                else:
                    line += b'  '
            if opt.hash:
                line += hexlify(ent.sha) + b' '
            if opt.long:
                line += b'%7s %7s ' % (oct(ent.mode).encode('ascii'),
                                       oct(ent.gitmode).encode('ascii'))
            out.write(line + (name or b'./') + b'\n')

    if opt.check and (opt['print'] or opt.status or opt.modified
                      or opt.update):
        log('check: starting final check.\n')
        check_index(index.Reader(indexfile), opt.verbose)

    if saved_errors:
        log('WARNING: %d errors encountered.\n' % len(saved_errors))
        sys.exit(1)
Пример #39
0
    log("bup: error: tag '%s' already exists\n" % path_msg(tag_name))
    sys.exit(1)

if tag_name.startswith(b'.'):
    o.fatal("'%s' is not a valid tag name." % path_msg(tag_name))

try:
    hash = git.rev_parse(commit)
except git.GitError as e:
    log("bup: error: %s" % e)
    sys.exit(2)

if not hash:
    log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
    sys.exit(2)

pL = git.PackIdxList(git.repo(b'objects/pack'))
if not pL.exists(hash):
    log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
    sys.exit(2)

tag_file = git.repo(b'refs/tags/' + tag_name)
try:
    tag = open(tag_file, 'wb')
except OSError as e:
    log("bup: error: could not create tag '%s': %s" % (path_msg(tag_name), e))
    sys.exit(3)
with tag as tag:
    tag.write(hexlify(hash))
    tag.write(b'\n')
Пример #40
0
#!/usr/bin/env python
import sys
from bup import options, git, _hashsplit
from bup.helpers import *


optspec = """
bup margin
"""
o = options.Options("bup margin", optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if extra:
    o.fatal("no arguments expected")

git.check_repo_or_die()
# git.ignore_midx = 1

mi = git.PackIdxList(git.repo("objects/pack"))
last = "\0" * 20
longmatch = 0
for i in mi:
    if i == last:
        continue
    # assert(str(i) >= last)
    pm = _hashsplit.bitmatch(last, i)
    longmatch = max(longmatch, pm)
    last = i
print longmatch
Пример #41
0
 def list_indexes(self):
     for f in os.listdir(git.repo(b'objects/pack',
                                  repo_dir=self.repo_dir)):
         yield f
Пример #42
0
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + path_msg(basename(new_pack_prefix)) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + path_msg(basename(p)) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)
    try:
        # FIXME: sanity check .idx names vs .pack names?
        collect_count = 0
        for idx_name in glob.glob(
                os.path.join(git.repo(b'objects/pack'), b'*.idx')):
            if verbosity:
                qprogress('preserving live data (%d%% complete)\r' %
                          ((float(collect_count) / existing_count) * 100))
            with git.open_idx(idx_name) as idx:
                idx_live_count = 0
                for sha in idx:
                    if live_objects.exists(sha):
                        idx_live_count += 1

                collect_count += idx_live_count
                if idx_live_count == 0:
                    if verbosity:
                        log('deleting %s\n' %
                            path_msg(git.repo_rel(basename(idx_name))))
                    ns.stale_files.append(idx_name)
                    ns.stale_files.append(idx_name[:-3] + b'pack')
                    continue

                live_frac = idx_live_count / float(len(idx))
                if live_frac > ((100 - threshold) / 100.0):
                    if verbosity:
                        log('keeping %s (%d%% live)\n' % (git.repo_rel(
                            basename(idx_name)), live_frac * 100))
                    continue

                if verbosity:
                    log('rewriting %s (%.2f%% live)\n' %
                        (basename(idx_name), live_frac * 100))
                for sha in idx:
                    if live_objects.exists(sha):
                        item_it = cat_pipe.get(hexlify(sha))
                        _, typ, _ = next(item_it)
                        writer.just_write(sha, typ, b''.join(item_it))

                ns.stale_files.append(idx_name)
                ns.stale_files.append(idx_name[:-3] + b'pack')

        if verbosity:
            progress('preserving live data (%d%% complete)\n' %
                     ((float(collect_count) / existing_count) * 100))

        # Nothing should have recreated midx/bloom yet.
        pack_dir = git.repo(b'objects/pack')
        assert (not os.path.exists(os.path.join(pack_dir, b'bup.bloom')))
        assert (not glob.glob(os.path.join(pack_dir, b'*.midx')))

    except BaseException as ex:
        with pending_raise(ex):
            writer.abort()

    # This will finally run midx.
    # Can only change refs (if needed) after this.
    writer.close()

    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir, verbosity)) /
             float(existing_count) * 100))
Пример #43
0
 def send_index(self, name, conn, send_size):
     data = git.open_idx(git.repo(b'objects/pack/%s' % name,
                                  repo_dir=self.repo_dir)).map
     send_size(len(data))
     conn.write(data)
Пример #44
0
# enough that new-file ends up with the same timestamp as the first
# index, and then bup will ignore it.
tick_start = time.time()
time.sleep(1 - (tick_start - int(tick_start)))

git.check_repo_or_die()

handle_ctrl_c()

if opt.verbose is None:
    opt.verbose = 0

if opt.indexfile:
    indexfile = argv_bytes(opt.indexfile)
else:
    indexfile = git.repo(b'bupindex')

if opt.check:
    log('check: starting initial check.\n')
    check_index(index.Reader(indexfile))

if opt.clear:
    log('clear: clearing index.\n')
    clear_index(indexfile)

sys.stdout.flush()
out = byte_stream(sys.stdout)

if opt.update:
    if not extra:
        o.fatal('update mode (-u) requested but no paths given')
Пример #45
0
 def dumb_server_mode(self):
     if self._dumb_server_mode is None:
         self._dumb_server_mode = os.path.exists(git.repo(b'bup-dumb-server',
                                                          repo_dir=self.repo_dir))
     return self._dumb_server_mode
Пример #46
0
 def __init__(self, repo_dir=None):
     self.repo_dir = repo_dir or git.repo()
     self._cp = git.cp(repo_dir)
     self.rev_list = partial(git.rev_list, repo_dir=repo_dir)
Пример #47
0
def update_index(top, excluded_paths):
    tmax = time.time() - 1
    ri = index.Reader(indexfile)
    wi = index.Writer(indexfile, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time())

    hashgen = None
    if opt.fake_valid:

        def hashgen(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    for (path,
         pst) in drecurse.recursive_dirlist([top],
                                            xdev=opt.xdev,
                                            bup_dir=bup_dir,
                                            excluded_paths=excluded_paths):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            qprogress('Indexing: %d\r' % total)
        elif not (total % 128):
            qprogress('Indexing: %d\r' % total)
        total += 1
        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
            rig.next()
        if rig.cur and rig.cur.name == path:  # paths that already existed
            if pst:
                rig.cur.from_stat(pst, tstart)
            if not (rig.cur.flags & index.IX_HASHVALID):
                if hashgen:
                    (rig.cur.gitmode, rig.cur.sha) = hashgen(path)
                    rig.cur.flags |= index.IX_HASHVALID
            if opt.fake_invalid:
                rig.cur.invalidate()
            rig.cur.repack()
            rig.next()
        else:  # new paths
            wi.add(path, pst, hashgen=hashgen)
    progress('Indexing: %d, done.\n' % total)

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()
Пример #48
0
Файл: gc.py Проект: xx4h/bup
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []
    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if verbosity:
            qprogress('preserving live data (%d%% complete)\r'
                      % ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if verbosity:
                log('deleting %s\n'
                    % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - threshold) / 100.0):
            if verbosity:
                log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
                                                  live_frac * 100))
            continue

        if verbosity:
            log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
                                                  live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.just_write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if verbosity:
        progress('preserving live data (%d%% complete)\n'
                 % ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert(not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n'
            % ((existing_count - count_objects(pack_dir, verbosity))
               / float(existing_count) * 100))
Пример #49
0
        remainstr = ''
        kpsstr = ''
    else:
        kpsstr = '%dk/s' % kps
        if hours:
            remainstr = '%dh%dm' % (hours, mins)
        elif mins:
            remainstr = '%dm%d' % (mins, secs)
        else:
            remainstr = '%ds' % secs
    qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
              % (pct, cc/1024, total/1024, fcount, ftotal,
                 remainstr, kpsstr))


indexfile = opt.indexfile or git.repo(b'bupindex')
r = index.Reader(indexfile)
try:
    msr = index.MetaStoreReader(indexfile + b'.meta')
except IOError as ex:
    if ex.errno != EACCES:
        raise
    log('error: cannot access %r; have you run bup index?'
        % path_msg(indexfile))
    sys.exit(1)
hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')

def already_saved(ent):
    return ent.is_valid() and w.exists(ent.sha) and ent.sha

def wantrecurse_pre(ent):
Пример #50
0
assert(opt.max_files >= 5)

if opt.check:
    # check existing midx files
    if extra:
        midxes = extra
    else:
        midxes = []
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            midxes += glob.glob(os.path.join(path, '*.midx'))
    for name in midxes:
        check_midx(name)
    if not saved_errors:
        log('All tests passed.\n')
else:
    if extra:
        do_midx(git.repo('objects/pack'), opt.output, extra, '')
    elif opt.auto or opt.force:
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            do_midx_dir(path)
    else:
        o.fatal("you must use -f or -a or provide input filenames")

if saved_errors:
    log('WARNING: %d errors encountered.\n' % len(saved_errors))
    sys.exit(1)
Пример #51
0
assert (opt.max_files >= 5)

if opt.check:
    # check existing midx files
    if extra:
        midxes = extra
    else:
        midxes = []
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            midxes += glob.glob(os.path.join(path, '*.midx'))
    for name in midxes:
        check_midx(name)
    if not saved_errors:
        log('All tests passed.\n')
else:
    if extra:
        do_midx(git.repo('objects/pack'), opt.output, extra, '')
    elif opt.auto or opt.force:
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            do_midx_dir(path)
    else:
        o.fatal("you must use -f or -a or provide input filenames")

if saved_errors:
    log('WARNING: %d errors encountered.\n' % len(saved_errors))
    sys.exit(1)
Пример #52
0
optspec = """
bup margin
--
predict    Guess object offsets and report the maximum deviation
ignore-midx  Don't use midx files; use only plain pack idx files.
"""
o = options.Options(optspec)
(opt, flags, extra) = o.parse(sys.argv[1:])

if extra:
    o.fatal("no arguments expected")

git.check_repo_or_die()

mi = git.PackIdxList(git.repo('objects/pack'), ignore_midx=opt.ignore_midx)


def do_predict(ix):
    total = len(ix)
    maxdiff = 0
    for count, i in enumerate(ix):
        prefix = struct.unpack('!Q', i[:8])[0]
        expected = prefix * total / (1 << 64)
        diff = count - expected
        maxdiff = max(maxdiff, abs(diff))
    print('%d of %d (%.3f%%) ' % (maxdiff, len(ix), maxdiff * 100.0 / len(ix)))
    sys.stdout.flush()
    assert (count + 1 == len(ix))

Пример #53
0
def main(argv):
    handle_ctrl_c()
    opt = opts_from_cmdline(argv)
    client.bwlimit = opt.bwlimit
    git.check_repo_or_die()

    remote_dest = opt.remote or opt.is_reverse
    if not remote_dest:
        repo = git
        cli = nullcontext()
    else:
        try:
            cli = repo = client.Client(opt.remote)
        except client.ClientError as e:
            log('error: %s' % e)
            sys.exit(1)

    # cli creation must be last nontrivial command in each if clause above
    with cli:
        if not remote_dest:
            w = git.PackWriter(compression_level=opt.compress)
        else:
            w = cli.new_packwriter(compression_level=opt.compress)

        with w:
            sys.stdout.flush()
            out = byte_stream(sys.stdout)

            if opt.name:
                refname = b'refs/heads/%s' % opt.name
                parent = repo.read_ref(refname)
            else:
                refname = parent = None

            indexfile = opt.indexfile or git.repo(b'bupindex')
            try:
                msr = index.MetaStoreReader(indexfile + b'.meta')
            except IOError as ex:
                if ex.errno != ENOENT:
                    raise
                log('error: cannot access %r; have you run bup index?'
                    % path_msg(indexfile))
                sys.exit(1)
            with msr, \
                 hlinkdb.HLinkDB(indexfile + b'.hlink') as hlink_db, \
                 index.Reader(indexfile) as reader:
                tree = save_tree(opt, reader, hlink_db, msr, w)
            if opt.tree:
                out.write(hexlify(tree))
                out.write(b'\n')
            if opt.commit or opt.name:
                commit = commit_tree(tree, parent, opt.date, argv, w)
                if opt.commit:
                    out.write(hexlify(commit))
                    out.write(b'\n')

        # packwriter must be closed before we can update the ref
        if opt.name:
            repo.update_ref(refname, commit, parent)

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
        sys.exit(1)
Пример #54
0
def _set_mode():
    global dumb_server_mode
    dumb_server_mode = os.path.exists(git.repo('bup-dumb-server'))
    debug1('bup server: serving in %s mode\n' 
           % (dumb_server_mode and 'dumb' or 'smart'))
Пример #55
0
 def __init__(self, repo_dir=None):
     self.repo_dir = realpath(repo_dir or git.repo())
     self._cp = git.cp(self.repo_dir)
     self.update_ref = partial(git.update_ref, repo_dir=self.repo_dir)
     self.rev_list = partial(git.rev_list, repo_dir=self.repo_dir)
     self._id = _repo_id(self.repo_dir)
Пример #56
0
    log("bup: error: tag '%s' already exists\n" % tag_name)
    sys.exit(1)

if tag_name.startswith('.'):
    o.fatal("'%s' is not a valid tag name." % tag_name)

try:
    hash = git.rev_parse(commit)
except git.GitError as e:
    log("bup: error: %s" % e)
    sys.exit(2)

if not hash:
    log("bup: error: commit %s not found.\n" % commit)
    sys.exit(2)

pL = git.PackIdxList(git.repo('objects/pack'))
if not pL.exists(hash):
    log("bup: error: commit %s not found.\n" % commit)
    sys.exit(2)

tag_file = git.repo('refs/tags/%s' % tag_name)
try:
    tag = file(tag_file, 'w')
except OSError as e:
    log("bup: error: could not create tag '%s': %s" % (tag_name, e))
    sys.exit(3)

tag.write(hash.encode('hex'))
tag.close()
Пример #57
0
    # check existing midx files
    if extra:
        midxes = extra
    else:
        midxes = []
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path)
            midxes += glob.glob(os.path.join(path, b'*.midx'))
    for name in midxes:
        check_midx(name)
    if not saved_errors:
        log('All tests passed.\n')
else:
    if extra:
        sys.stdout.flush()
        do_midx(git.repo(b'objects/pack'), opt.output, extra, b'',
                byte_stream(sys.stdout))
    elif opt.auto or opt.force:
        sys.stdout.flush()
        paths = opt.dir and [opt.dir] or git.all_packdirs()
        for path in paths:
            debug1('midx: scanning %s\n' % path_msg(path))
            do_midx_dir(path, opt.output, byte_stream(sys.stdout))
    else:
        o.fatal("you must use -f or -a or provide input filenames")

if saved_errors:
    log('WARNING: %d errors encountered.\n' % len(saved_errors))
    sys.exit(1)
Пример #58
0
Файл: tag.py Проект: gdt/bup
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    git.check_repo_or_die()

    tags = [t for sublist in git.tags().values() for t in sublist]

    if opt.delete:
        # git.delete_ref() doesn't complain if a ref doesn't exist.  We
        # could implement this verification but we'd need to read in the
        # contents of the tag file and pass the hash, and we already know
        # about the tag's existance via "tags".
        tag_name = argv_bytes(opt.delete)
        if not opt.force and tag_name not in tags:
            log("error: tag '%s' doesn't exist\n" % path_msg(tag_name))
            sys.exit(1)
        tag_file = b'refs/tags/%s' % tag_name
        git.delete_ref(tag_file)
        sys.exit(0)

    if not extra:
        for t in tags:
            sys.stdout.flush()
            out = byte_stream(sys.stdout)
            out.write(t)
            out.write(b'\n')
        sys.exit(0)
    elif len(extra) != 2:
        o.fatal('expected commit ref and hash')

    tag_name, commit = map(argv_bytes, extra[:2])
    if not tag_name:
        o.fatal("tag name must not be empty.")
    debug1("args: tag name = %s; commit = %s\n" %
           (path_msg(tag_name), commit.decode('ascii')))

    if tag_name in tags and not opt.force:
        log("bup: error: tag '%s' already exists\n" % path_msg(tag_name))
        sys.exit(1)

    if tag_name.startswith(b'.'):
        o.fatal("'%s' is not a valid tag name." % path_msg(tag_name))

    try:
        hash = git.rev_parse(commit)
    except git.GitError as e:
        log("bup: error: %s" % e)
        sys.exit(2)

    if not hash:
        log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
        sys.exit(2)

    pL = git.PackIdxList(git.repo(b'objects/pack'))
    if not pL.exists(hash):
        log("bup: error: commit %s not found.\n" % commit.decode('ascii'))
        sys.exit(2)

    tag_file = git.repo(b'refs/tags/' + tag_name)
    try:
        tag = open(tag_file, 'wb')
    except OSError as e:
        log("bup: error: could not create tag '%s': %s" %
            (path_msg(tag_name), e))
        sys.exit(3)
    with tag as tag:
        tag.write(hexlify(hash))
        tag.write(b'\n')
Пример #59
0
def main():
    handle_ctrl_c()
    opt = parse_args(sys.argv)
    if opt.source:
        opt.source = argv_bytes(opt.source)
    src_dir = opt.source or git.repo()
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)
    if opt.remote:
        opt.remote = argv_bytes(opt.remote)
    dest_repo = repo.from_opts(opt)

    with dest_repo as dest_repo:
        with LocalRepo(repo_dir=src_dir) as src_repo:
            # Resolve and validate all sources and destinations,
            # implicit or explicit, and do it up-front, so we can
            # fail before we start writing (for any obviously
            # broken cases).
            target_items = resolve_targets(opt.target_specs, src_repo,
                                           dest_repo)

            updated_refs = {}  # ref_name -> (original_ref, tip_commit(bin))
            no_ref_info = (None, None)

            handlers = {
                'ff': handle_ff,
                'append': handle_append,
                'force-pick': handle_pick,
                'pick': handle_pick,
                'new-tag': handle_new_tag,
                'replace': handle_replace,
                'unnamed': handle_unnamed
            }

            for item in target_items:
                debug1('get-spec: %r\n' % (item.spec, ))
                debug1('get-src: %s\n' % loc_desc(item.src))
                debug1('get-dest: %s\n' % loc_desc(item.dest))
                dest_path = item.dest and item.dest.path
                if dest_path:
                    if dest_path.startswith(b'/.tag/'):
                        dest_ref = b'refs/tags/%s' % dest_path[6:]
                    else:
                        dest_ref = b'refs/heads/%s' % dest_path[1:]
                else:
                    dest_ref = None

                dest_hash = item.dest and item.dest.hash
                orig_ref, cur_ref = updated_refs.get(dest_ref, no_ref_info)
                orig_ref = orig_ref or dest_hash
                cur_ref = cur_ref or dest_hash

                handler = handlers[item.spec.method]
                item_result = handler(item, src_repo, dest_repo, opt)
                if len(item_result) > 1:
                    new_id, tree = item_result
                else:
                    new_id = item_result[0]

                if not dest_ref:
                    log_item(item.spec.src, item.src.type, opt)
                else:
                    updated_refs[dest_ref] = (orig_ref, new_id)
                    if dest_ref.startswith(b'refs/tags/'):
                        log_item(item.spec.src, item.src.type, opt, tag=new_id)
                    else:
                        log_item(item.spec.src,
                                 item.src.type,
                                 opt,
                                 tree=tree,
                                 commit=new_id)

        # Only update the refs at the very end, once the destination repo
        # finished writing, so that if something goes wrong above, the old
        # refs will be undisturbed.
        for ref_name, info in items(updated_refs):
            orig_ref, new_ref = info
            try:
                dest_repo.update_ref(ref_name, new_ref, orig_ref)
                if opt.verbose:
                    new_hex = hexlify(new_ref)
                    if orig_ref:
                        orig_hex = hexlify(orig_ref)
                        log('updated %r (%s -> %s)\n' %
                            (ref_name, orig_hex, new_hex))
                    else:
                        log('updated %r (%s)\n' % (ref_name, new_hex))
            except (git.GitError, client.ClientError) as ex:
                add_error('unable to update ref %r: %s' % (ref_name, ex))

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)