Exemplo n.º 1
0
def report_live_item(n, total, ref_name, ref_id, item, verbosity):
    status = 'scanned %02.2f%%' % (n * 100.0 / total)
    hex_id = hexstr(ref_id)
    dirslash = b'/' if item.type == b'tree' else b''
    chunk_path = item.chunk_path

    if chunk_path:
        if verbosity < 4:
            return
        ps = b'/'.join(item.path)
        chunk_ps = b'/'.join(chunk_path)
        log('%s %s:%s/%s%s\n' % (status, hex_id, path_msg(ps),
                                 path_msg(chunk_ps), path_msg(dirslash)))
        return

    # Top commit, for example has none.
    demangled = git.demangle_name(item.path[-1], item.mode)[0] if item.path \
                else None

    # Don't print mangled paths unless the verbosity is over 3.
    if demangled:
        ps = b'/'.join(item.path[:-1] + [demangled])
        if verbosity == 1:
            qprogress('%s %s:%s%s\r' %
                      (status, hex_id, path_msg(ps), path_msg(dirslash)))
        elif (verbosity > 1 and item.type == b'tree') \
             or (verbosity > 2 and item.type == b'blob'):
            log('%s %s:%s%s\n' %
                (status, hex_id, path_msg(ps), path_msg(dirslash)))
    elif verbosity > 3:
        ps = b'/'.join(item.path)
        log('%s %s:%s%s\n' %
            (status, hex_id, path_msg(ps), path_msg(dirslash)))
Exemplo n.º 2
0
def prog(filenum, nbytes):
    global total_bytes
    total_bytes += nbytes
    if filenum > 0:
        qprogress("Splitting: file #%d, %d kbytes\r" % (filenum + 1, total_bytes / 1024))
    else:
        qprogress("Splitting: %d kbytes\r" % (total_bytes / 1024))
Exemplo n.º 3
0
Arquivo: gc.py Projeto: xx4h/bup
def report_live_item(n, total, ref_name, ref_id, item, verbosity):
    status = 'scanned %02.2f%%' % (n * 100.0 / total)
    hex_id = ref_id.encode('hex')
    dirslash = '/' if item.type == 'tree' else ''
    chunk_path = item.chunk_path

    if chunk_path:
        if verbosity < 4:
            return
        ps = '/'.join(item.path)
        chunk_ps = '/'.join(chunk_path)
        log('%s %s:%s/%s%s\n' % (status, hex_id, ps, chunk_ps, dirslash))
        return

    # Top commit, for example has none.
    demangled = git.demangle_name(item.path[-1], item.mode)[0] if item.path \
                else None

    # Don't print mangled paths unless the verbosity is over 3.
    if demangled:
        ps = '/'.join(item.path[:-1] + [demangled])
        if verbosity == 1:
            qprogress('%s %s:%s%s\r' % (status, hex_id, ps, dirslash))
        elif (verbosity > 1 and item.type == 'tree') \
             or (verbosity > 2 and item.type == 'blob'):
            log('%s %s:%s%s\n' % (status, hex_id, ps, dirslash))
    elif verbosity > 3:
        ps = '/'.join(item.path)
        log('%s %s:%s%s\n' % (status, hex_id, ps, dirslash))
Exemplo n.º 4
0
def report_live_item(n, total, ref_name, ref_id, item):
    global opt
    status = 'scanned %02.2f%%' % (n * 100.0 / total)
    hex_id = ref_id.encode('hex')
    dirslash = '/' if item.type == 'tree' else ''
    chunk_path = item.chunk_path

    if chunk_path:
        if opt.verbose < 4:
            return
        ps = '/'.join(item.path)
        chunk_ps = '/'.join(chunk_path)
        log('%s %s:%s/%s%s\n' % (status, hex_id, ps, chunk_ps, dirslash))
        return

    # Top commit, for example has none.
    demangled = git.demangle_name(item.path[-1], item.mode)[0] if item.path \
                else None

    # Don't print mangled paths unless the verbosity is over 3.
    if demangled:
        ps = '/'.join(item.path[:-1] + [demangled])
        if opt.verbose == 1:
            qprogress('%s %s:%s%s\r' % (status, hex_id, ps, dirslash))
        elif (opt.verbose > 1 and item.type == 'tree') \
             or (opt.verbose > 2 and item.type == 'blob'):
            log('%s %s:%s%s\n' % (status, hex_id, ps, dirslash))
    elif opt.verbose > 3:
        ps = '/'.join(item.path)
        log('%s %s:%s%s\n' % (status, hex_id, ps, dirslash))
Exemplo n.º 5
0
Arquivo: midx.py Projeto: bup/bup
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % path_msg(nicename))
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (path_msg(name), e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname).decode('ascii'),
                             ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (path_msg(nicename),
                             git.shorten_hash(subname).decode('ascii'),
                             hexstr(e)))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (path_msg(nicename),
                             git.shorten_hash(subname).decode('ascii'),
                             hexstr(e)))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if e and prev and not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename, hexstr(e), hexstr(prev)))
        prev = e
Exemplo n.º 6
0
def check_midx(name):
    nicename = git.repo_rel(name)
    log('Checking %s.\n' % nicename)
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (name, e))
        return
    for count,subname in enumerate(ix.idxnames):
        sub = git.open_idx(os.path.join(os.path.dirname(name), subname))
        for ecount,e in enumerate(sub):
            if not (ecount % 1234):
                qprogress('  %d/%d: %s %d/%d\r' 
                          % (count, len(ix.idxnames),
                             git.shorten_hash(subname), ecount, len(sub)))
            if not sub.exists(e):
                add_error("%s: %s: %s missing from idx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
            if not ix.exists(e):
                add_error("%s: %s: %s missing from midx"
                          % (nicename, git.shorten_hash(subname),
                             str(e).encode('hex')))
    prev = None
    for ecount,e in enumerate(ix):
        if not (ecount % 1234):
            qprogress('  Ordering: %d/%d\r' % (ecount, len(ix)))
        if not e >= prev:
            add_error('%s: ordering error: %s < %s'
                      % (nicename,
                         str(e).encode('hex'), str(prev).encode('hex')))
        prev = e
Exemplo n.º 7
0
 def progress_report(n):
     _nonlocal['subcount'] += n
     cc = _nonlocal['count'] + _nonlocal['subcount']
     pct = total and (cc*100.0/total) or 0
     now = time.time()
     elapsed = now - tstart
     kps = elapsed and int(cc/1024./elapsed)
     kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
     kps = int(kps/kps_frac)*kps_frac
     if cc:
         remain = elapsed*1.0/cc * (total-cc)
     else:
         remain = 0.0
     if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain'])
           and ((remain - _nonlocal['lastremain'])/_nonlocal['lastremain'] < 0.05)):
         remain = _nonlocal['lastremain']
     else:
         _nonlocal['lastremain'] = remain
     hours = int(remain/60/60)
     mins = int(remain/60 - hours*60)
     secs = int(remain - hours*60*60 - mins*60)
     if elapsed < 30:
         remainstr = ''
         kpsstr = ''
     else:
         kpsstr = '%dk/s' % kps
         if hours:
             remainstr = '%dh%dm' % (hours, mins)
         elif mins:
             remainstr = '%dm%d' % (mins, secs)
         else:
             remainstr = '%ds' % secs
     qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
               % (pct, cc/1024, total/1024, fcount, ftotal,
                  remainstr, kpsstr))
Exemplo n.º 8
0
Arquivo: split.py Projeto: fakegit/bup
 def prog(filenum, nbytes):
     total_bytes[0] += nbytes
     if filenum > 0:
         qprogress('Splitting: file #%d, %d kbytes\r' %
                   (filenum + 1, total_bytes[0] // 1024))
     else:
         qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))
Exemplo n.º 9
0
def prog(filenum, nbytes):
    global total_bytes
    total_bytes += nbytes
    if filenum > 0:
        qprogress('Splitting: file #%d, %d kbytes\r' %
                  (filenum + 1, total_bytes / 1024))
    else:
        qprogress('Splitting: %d kbytes\r' % (total_bytes / 1024))
Exemplo n.º 10
0
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    handle_ctrl_c()

    opt.find = argv_bytes(opt.find) if opt.find else b''

    if not extra:
        o.fatal('you must provide at least one filename')

    if len(opt.find) > 40:
        o.fatal('--find parameter must be <= 40 chars long')
    else:
        if len(opt.find) % 2:
            s = opt.find + b'0'
        else:
            s = opt.find
        try:
            bin = unhexlify(s)
        except TypeError:
            o.fatal('--find parameter is not a valid hex string')

    sys.stdout.flush()
    out = byte_stream(sys.stdout)
    find = opt.find.lower()
    count = 0
    idxfiles = [argv_bytes(x) for x in extra]
    for name in idxfiles:
        try:
            ix = git.open_idx(name)
        except git.GitError as e:
            add_error('%r: %s' % (name, e))
            continue
        if len(opt.find) == 40:
            if ix.exists(bin):
                out.write(b'%s %s\n' % (name, find))
        else:
            # slow, exhaustive search
            for _i in ix:
                i = hexlify(_i)
                if i.startswith(find):
                    out.write(b'%s %s\n' % (name, i))
                qprogress('Searching: %d\r' % count)
                count += 1

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Exemplo n.º 11
0
    def send_index(self, name, f, send_size):
        self._require_command(b'send-index')
        #debug1('requesting %r\n' % name)
        self.check_busy()
        self.conn.write(b'send-index %s\n' % name)
        n = struct.unpack('!I', self.conn.read(4))[0]
        assert(n)

        send_size(n)

        count = 0
        progress('Receiving index from server: %d/%d\r' % (count, n))
        for b in chunkyreader(self.conn, n):
            f.write(b)
            count += len(b)
            qprogress('Receiving index from server: %d/%d\r' % (count, n))
        progress('Receiving index from server: %d/%d, done.\n' % (count, n))
        self.check_ok()
Exemplo n.º 12
0
 def sync_index(self, name):
     #debug1('requesting %r\n' % name)
     self.check_busy()
     mkdirp(self.cachedir)
     fn = os.path.join(self.cachedir, name)
     if os.path.exists(fn):
         msg = "won't request existing .idx, try `bup bloom --check %s`" % fn
         raise ClientError(msg)
     self.conn.write('send-index %s\n' % name)
     n = struct.unpack('!I', self.conn.read(4))[0]
     assert(n)
     with atomically_replaced_file(fn, 'w') as f:
         count = 0
         for b in chunkyreader(self.conn, n):
             f.write(b)
             count += len(b)
             qprogress('Receiving index from server: %d/%d\r' % (count, n))
         self.check_ok()
Exemplo n.º 13
0
 def sync_index(self, name):
     #debug1('requesting %r\n' % name)
     self.check_busy()
     mkdirp(self.cachedir)
     fn = os.path.join(self.cachedir, name)
     if os.path.exists(fn):
         msg = "won't request existing .idx, try `bup bloom --check %s`" % fn
         raise ClientError(msg)
     self.conn.write('send-index %s\n' % name)
     n = struct.unpack('!I', self.conn.read(4))[0]
     assert (n)
     with atomically_replaced_file(fn, 'w') as f:
         count = 0
         progress('Receiving index from server: %d/%d\r' % (count, n))
         for b in chunkyreader(self.conn, n):
             f.write(b)
             count += len(b)
             qprogress('Receiving index from server: %d/%d\r' % (count, n))
         progress('Receiving index from server: %d/%d, done.\n' %
                  (count, n))
         self.check_ok()
Exemplo n.º 14
0
def progress_report(n):
    global count, subcount, lastremain
    subcount += n
    cc = count + subcount
    pct = total and (cc*100.0/total) or 0
    now = time.time()
    elapsed = now - tstart
    kps = elapsed and int(cc/1024./elapsed)
    kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
    kps = int(kps/kps_frac)*kps_frac
    if cc:
        remain = elapsed*1.0/cc * (total-cc)
    else:
        remain = 0.0
    if (lastremain and (remain > lastremain)
          and ((remain - lastremain)/lastremain < 0.05)):
        remain = lastremain
    else:
        lastremain = remain
    hours = int(remain/60/60)
    mins = int(remain/60 - hours*60)
    secs = int(remain - hours*60*60 - mins*60)
    if elapsed < 30:
        remainstr = ''
        kpsstr = ''
    else:
        kpsstr = '%dk/s' % kps
        if hours:
            remainstr = '%dh%dm' % (hours, mins)
        elif mins:
            remainstr = '%dm%d' % (mins, secs)
        else:
            remainstr = '%ds' % secs
    qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
              % (pct, cc/1024, total/1024, fcount, ftotal,
                 remainstr, kpsstr))
Exemplo n.º 15
0
Arquivo: gc.py Projeto: xx4h/bup
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []
    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if verbosity:
            qprogress('preserving live data (%d%% complete)\r'
                      % ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if verbosity:
                log('deleting %s\n'
                    % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - threshold) / 100.0):
            if verbosity:
                log('keeping %s (%d%% live)\n' % (git.repo_rel(basename(idx_name)),
                                                  live_frac * 100))
            continue

        if verbosity:
            log('rewriting %s (%.2f%% live)\n' % (basename(idx_name),
                                                  live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20 : (i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.just_write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if verbosity:
        progress('preserving live data (%d%% complete)\n'
                 % ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert(not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert(not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n'
            % ((existing_count - count_objects(pack_dir, verbosity))
               / float(existing_count) * 100))
Exemplo n.º 16
0
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    fake_hash = None
    if opt.fake_valid:
        def fake_hash(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for path, pst in recursive_dirlist([top],
                                       xdev=opt.xdev,
                                       bup_dir=bup_dir,
                                       excluded_paths=excluded_paths,
                                       exclude_rxs=exclude_rxs,
                                       xdev_exceptions=xdev_exceptions):
        if opt.verbose>=2 or (opt.verbose==1 and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1

        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()

        if rig.cur and rig.cur.name == path:    # paths that already existed
            need_repack = False
            if(rig.cur.stale(pst, tstart, check_device=opt.check_device)):
                try:
                    meta = metadata.from_path(path, statinfo=pst)
                except (OSError, IOError) as e:
                    add_error(e)
                    rig.next()
                    continue
                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                    hlinks.del_path(rig.cur.name)
                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
                # Clear these so they don't bloat the store -- they're
                # already in the index (since they vary a lot and they're
                # fixed length).  If you've noticed "tmax", you might
                # wonder why it's OK to do this, since that code may
                # adjust (mangle) the index mtime and ctime -- producing
                # fake values which must not end up in a .bupm.  However,
                # it looks like that shouldn't be possible:  (1) When
                # "save" validates the index entry, it always reads the
                # metadata from the filesytem. (2) Metadata is only
                # read/used from the index if hashvalid is true. (3)
                # "faked" entries will be stale(), and so we'll invalidate
                # them below.
                meta.ctime = meta.mtime = meta.atime = 0
                meta_ofs = msw.store(meta)
                rig.cur.update_from_stat(pst, meta_ofs)
                rig.cur.invalidate()
                need_repack = True
            if not (rig.cur.flags & index.IX_HASHVALID):
                if fake_hash:
                    rig.cur.gitmode, rig.cur.sha = fake_hash(path)
                    rig.cur.flags |= index.IX_HASHVALID
                    need_repack = True
            if opt.fake_invalid:
                rig.cur.invalidate()
                need_repack = True
            if need_repack:
                rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError) as e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)

    elapsed = time.time() - index_start
    paths_per_sec = total / elapsed if elapsed else 0
    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))

    hlinks.prepare_save()

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, msw, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()

    msw.close()
    hlinks.commit_save()
Exemplo n.º 17
0
 def pfunc(count, total):
     qprogress('bup: merging indexes (%d/%d)\r' % (count, total))
Exemplo n.º 18
0
    return not already_saved(ent)

def wantrecurse_during(ent):
    return not already_saved(ent) or ent.sha_missing()

def find_hardlink_target(hlink_db, ent):
    if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
        link_paths = hlink_db.node_paths(ent.dev, ent.ino)
        if link_paths:
            return link_paths[0]

total = ftotal = 0
if opt.progress:
    for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
        if not (ftotal % 10024):
            qprogress('Reading index: %d\r' % ftotal)
        exists = ent.exists()
        hashvalid = already_saved(ent)
        ent.set_sha_missing(not hashvalid)
        if not opt.smaller or ent.size < opt.smaller:
            if exists and not hashvalid:
                total += ent.size
        ftotal += 1
    progress('Reading index: %d, done.\n' % ftotal)
    hashsplit.progress_callback = progress_report

# Root collisions occur when strip or graft options map more than one
# path to the same directory (paths which originally had separate
# parents).  When that situation is detected, use empty metadata for
# the parent.  Otherwise, use the metadata for the common parent.
# Collision example: "bup save ... --strip /foo /foo/bar /bar".
Exemplo n.º 19
0
def do_bloom(path, outfilename):
    global _first
    b = None
    if os.path.exists(outfilename) and not opt.force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i,name in enumerate(glob.glob('%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)
    total = add_count + rest_count

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n"
                   % (len(b), rest_count))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS and
              b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n"
                   % (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b: # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path)+': ' or ''
    progress('bloom: %s%s %d file%s (%d object%s).\n'
        % (dirprefix, msg,
           len(add), len(add)!=1 and 's' or '',
           add_count, add_count!=1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, 'bup.tmp.bloom')
        b = bloom.create(tfname, expected=add_count, k=opt.k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' 
                  % (icount*100.0/add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    # Currently, there's an open file object for tfname inside b.
    # Make sure it's closed before rename.
    b.close()

    if tfname:
        os.rename(tfname, outfilename)
Exemplo n.º 20
0
Arquivo: bloom.py Projeto: jmberg/bup
def do_bloom(path, outfilename, k, force):
    global _first
    assert k in (None, 4, 5)
    b = None
    if os.path.exists(outfilename) and not force:
        b = bloom.ShaBloom(outfilename)
        if not b.valid():
            debug1("bloom: Existing invalid bloom found, regenerating.\n")
            b = None

    add = []
    rest = []
    add_count = 0
    rest_count = 0
    for i, name in enumerate(glob.glob(b'%s/*.idx' % path)):
        progress('bloom: counting: %d\r' % i)
        ix = git.open_idx(name)
        ixbase = os.path.basename(name)
        if b and (ixbase in b.idxnames):
            rest.append(name)
            rest_count += len(ix)
        else:
            add.append(name)
            add_count += len(ix)

    if not add:
        debug1("bloom: nothing to do.\n")
        return

    if b:
        if len(b) != rest_count:
            debug1("bloom: size %d != idx total %d, regenerating\n" %
                   (len(b), rest_count))
            b = None
        elif k is not None and k != b.k:
            debug1("bloom: new k %d != existing k %d, regenerating\n" %
                   (k, b.k))
            b = None
        elif (b.bits < bloom.MAX_BLOOM_BITS[b.k]
              and b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE):
            debug1("bloom: regenerating: adding %d entries gives "
                   "%.2f%% false positives.\n" %
                   (add_count, b.pfalse_positive(add_count)))
            b = None
        else:
            b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count)
    if not b:  # Need all idxs to build from scratch
        add += rest
        add_count += rest_count
    del rest
    del rest_count

    msg = b is None and 'creating from' or 'adding'
    if not _first: _first = path
    dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b''
    progress('bloom: %s%s %d file%s (%d object%s).\r' %
             (path_msg(dirprefix), msg, len(add), len(add) != 1 and 's'
              or '', add_count, add_count != 1 and 's' or ''))

    tfname = None
    if b is None:
        tfname = os.path.join(path, b'bup.tmp.bloom')
        b = bloom.create(tfname, expected=add_count, k=k)
    count = 0
    icount = 0
    for name in add:
        ix = git.open_idx(name)
        qprogress('bloom: writing %.2f%% (%d/%d objects)\r' %
                  (icount * 100.0 / add_count, icount, add_count))
        b.add_idx(ix)
        count += 1
        icount += len(ix)

    # Currently, there's an open file object for tfname inside b.
    # Make sure it's closed before rename.
    b.close()

    if tfname:
        os.rename(tfname, outfilename)
Exemplo n.º 21
0
def sweep(live_objects, existing_count, cat_pipe, opt):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if opt.verbose and new_pack_prefix:
            log('created ' + basename(new_pack_prefix) + '\n')
        for p in ns.stale_files:
            if opt.verbose:
                log('removing ' + basename(p) + '\n')
            os.unlink(p)
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=opt.compress,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)

    # FIXME: sanity check .idx names vs .pack names?
    collect_count = 0
    for idx_name in glob.glob(os.path.join(git.repo('objects/pack'), '*.idx')):
        if opt.verbose:
            qprogress('preserving live data (%d%% complete)\r' %
                      ((float(collect_count) / existing_count) * 100))
        idx = git.open_idx(idx_name)

        idx_live_count = 0
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                idx_live_count += 1

        collect_count += idx_live_count
        if idx_live_count == 0:
            if opt.verbose:
                log('deleting %s\n' % git.repo_rel(basename(idx_name)))
            ns.stale_files.append(idx_name)
            ns.stale_files.append(idx_name[:-3] + 'pack')
            continue

        live_frac = idx_live_count / float(len(idx))
        if live_frac > ((100 - opt.threshold) / 100.0):
            if opt.verbose:
                log('keeping %s (%d%% live)\n' %
                    (git.repo_rel(basename(idx_name)), live_frac * 100))
            continue

        if opt.verbose:
            log('rewriting %s (%.2f%% live)\n' %
                (basename(idx_name), live_frac * 100))
        for i in xrange(0, len(idx)):
            sha = idx.shatable[i * 20:(i + 1) * 20]
            if live_objects.exists(sha):
                item_it = cat_pipe.get(sha.encode('hex'))
                type = item_it.next()
                writer.write(sha, type, ''.join(item_it))

        ns.stale_files.append(idx_name)
        ns.stale_files.append(idx_name[:-3] + 'pack')

    if opt.verbose:
        progress('preserving live data (%d%% complete)\n' %
                 ((float(collect_count) / existing_count) * 100))

    # Nothing should have recreated midx/bloom yet.
    pack_dir = git.repo('objects/pack')
    assert (not os.path.exists(os.path.join(pack_dir, 'bup.bloom')))
    assert (not glob.glob(os.path.join(pack_dir, '*.midx')))

    # try/catch should call writer.abort()?
    # This will finally run midx.
    writer.close()  # Can only change refs (if needed) after this.
    remove_stale_files(None)  # In case we didn't write to the writer.

    if opt.verbose:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir)) /
             float(existing_count) * 100))
Exemplo n.º 22
0
def main(argv):

    # Hack around lack of nonlocal vars in python 2
    _nonlocal = {}

    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    if opt.indexfile:
        opt.indexfile = argv_bytes(opt.indexfile)
    if opt.name:
        opt.name = argv_bytes(opt.name)
    if opt.remote:
        opt.remote = argv_bytes(opt.remote)
    if opt.strip_path:
        opt.strip_path = argv_bytes(opt.strip_path)

    git.check_repo_or_die()
    if not (opt.tree or opt.commit or opt.name):
        o.fatal("use one or more of -t, -c, -n")
    if not extra:
        o.fatal("no filenames given")

    extra = [argv_bytes(x) for x in extra]

    opt.progress = (istty2 and not opt.quiet)
    opt.smaller = parse_num(opt.smaller or 0)
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)

    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    if opt.strip and opt.strip_path:
        o.fatal("--strip is incompatible with --strip-path")

    graft_points = []
    if opt.graft:
        if opt.strip:
            o.fatal("--strip is incompatible with --graft")

        if opt.strip_path:
            o.fatal("--strip-path is incompatible with --graft")

        for (option, parameter) in flags:
            if option == "--graft":
                parameter = argv_bytes(parameter)
                splitted_parameter = parameter.split(b'=')
                if len(splitted_parameter) != 2:
                    o.fatal(
                        "a graft point must be of the form old_path=new_path")
                old_path, new_path = splitted_parameter
                if not (old_path and new_path):
                    o.fatal("a graft point cannot be empty")
                graft_points.append(
                    (resolve_parent(old_path), resolve_parent(new_path)))

    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
    if is_reverse and opt.remote:
        o.fatal("don't use -r in reverse mode; it's automatic")

    name = opt.name
    if name and not valid_save_name(name):
        o.fatal("'%s' is not a valid branch name" % path_msg(name))
    refname = name and b'refs/heads/%s' % name or None
    if opt.remote or is_reverse:
        try:
            cli = client.Client(opt.remote)
        except client.ClientError as e:
            log('error: %s' % e)
            sys.exit(1)
        oldref = refname and cli.read_ref(refname) or None
        w = cli.new_packwriter(compression_level=opt.compress)
    else:
        cli = None
        oldref = refname and git.read_ref(refname) or None
        w = git.PackWriter(compression_level=opt.compress)

    handle_ctrl_c()

    # Metadata is stored in a file named .bupm in each directory.  The
    # first metadata entry will be the metadata for the current directory.
    # The remaining entries will be for each of the other directory
    # elements, in the order they're listed in the index.
    #
    # Since the git tree elements are sorted according to
    # git.shalist_item_sort_key, the metalist items are accumulated as
    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
    # created.  The sort_key should have been computed using the element's
    # mangled name and git mode (after hashsplitting), but the code isn't
    # actually doing that but rather uses the element's real name and mode.
    # This makes things a bit more difficult when reading it back, see
    # vfs.ordered_tree_entries().

    # Maintain a stack of information representing the current location in
    # the archive being constructed.  The current path is recorded in
    # parts, which will be something like ['', 'home', 'someuser'], and
    # the accumulated content and metadata for of the dirs in parts is
    # stored in parallel stacks in shalists and metalists.

    parts = []  # Current archive position (stack of dir names).
    shalists = []  # Hashes for each dir in paths.
    metalists = []  # Metadata for each dir in paths.

    def _push(part, metadata):
        # Enter a new archive directory -- make it the current directory.
        parts.append(part)
        shalists.append([])
        metalists.append([(b'', metadata)])  # This dir's metadata (no name).

    def _pop(force_tree, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        assert (len(parts) >= 1)
        part = parts.pop()
        shalist = shalists.pop()
        metalist = metalists.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            metaidx = 1  # entry at 0 is for the dir
            for x in shalist:
                name = x[1]
                if name in names_seen:
                    parent_path = b'/'.join(parts) + b'/'
                    add_error('error: ignoring duplicate path %s in %s' %
                              (path_msg(name), path_msg(parent_path)))
                    if not stat.S_ISDIR(x[0]):
                        del metalist[metaidx]
                else:
                    names_seen.add(name)
                    clean_list.append(x)
                    if not stat.S_ISDIR(x[0]):
                        metaidx += 1

            if dir_metadata:  # Override the original metadata pushed for this dir.
                metalist = [(b'', dir_metadata)] + metalist[1:]
            sorted_metalist = sorted(metalist, key=lambda x: x[0])
            metadata = b''.join([m[1].encode() for m in sorted_metalist])
            metadata_f = BytesIO(metadata)
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob,
                                                       w.new_tree,
                                                       [metadata_f],
                                                       keep_boundaries=False)
            clean_list.append((mode, b'.bupm', id))

            tree = w.new_tree(clean_list)
        if shalists:
            shalists[-1].append((GIT_MODE_TREE,
                                 git.mangle_name(part, GIT_MODE_TREE,
                                                 GIT_MODE_TREE), tree))
        return tree

    _nonlocal['count'] = 0
    _nonlocal['subcount'] = 0
    _nonlocal['lastremain'] = None

    def progress_report(n):
        _nonlocal['subcount'] += n
        cc = _nonlocal['count'] + _nonlocal['subcount']
        pct = total and (cc * 100.0 / total) or 0
        now = time.time()
        elapsed = now - tstart
        kps = elapsed and int(cc / 1024. / elapsed)
        kps_frac = 10**int(math.log(kps + 1, 10) - 1)
        kps = int(kps / kps_frac) * kps_frac
        if cc:
            remain = elapsed * 1.0 / cc * (total - cc)
        else:
            remain = 0.0
        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and
            ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] <
             0.05)):
            remain = _nonlocal['lastremain']
        else:
            _nonlocal['lastremain'] = remain
        hours = int(remain / 60 / 60)
        mins = int(remain / 60 - hours * 60)
        secs = int(remain - hours * 60 * 60 - mins * 60)
        if elapsed < 30:
            remainstr = ''
            kpsstr = ''
        else:
            kpsstr = '%dk/s' % kps
            if hours:
                remainstr = '%dh%dm' % (hours, mins)
            elif mins:
                remainstr = '%dm%d' % (mins, secs)
            else:
                remainstr = '%ds' % secs
        qprogress(
            'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' %
            (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr))

    indexfile = opt.indexfile or git.repo(b'bupindex')
    r = index.Reader(indexfile)
    try:
        msr = index.MetaStoreReader(indexfile + b'.meta')
    except IOError as ex:
        if ex.errno != EACCES:
            raise
        log('error: cannot access %r; have you run bup index?' %
            path_msg(indexfile))
        sys.exit(1)
    hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')

    def already_saved(ent):
        return ent.is_valid() and w.exists(ent.sha) and ent.sha

    def wantrecurse_pre(ent):
        return not already_saved(ent)

    def wantrecurse_during(ent):
        return not already_saved(ent) or ent.sha_missing()

    def find_hardlink_target(hlink_db, ent):
        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
            if link_paths:
                return link_paths[0]

    total = ftotal = 0
    if opt.progress:
        for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
            if not (ftotal % 10024):
                qprogress('Reading index: %d\r' % ftotal)
            exists = ent.exists()
            hashvalid = already_saved(ent)
            ent.set_sha_missing(not hashvalid)
            if not opt.smaller or ent.size < opt.smaller:
                if exists and not hashvalid:
                    total += ent.size
            ftotal += 1
        progress('Reading index: %d, done.\n' % ftotal)
        hashsplit.progress_callback = progress_report

    # Root collisions occur when strip or graft options map more than one
    # path to the same directory (paths which originally had separate
    # parents).  When that situation is detected, use empty metadata for
    # the parent.  Otherwise, use the metadata for the common parent.
    # Collision example: "bup save ... --strip /foo /foo/bar /bar".

    # FIXME: Add collision tests, or handle collisions some other way.

    # FIXME: Detect/handle strip/graft name collisions (other than root),
    # i.e. if '/foo/bar' and '/bar' both map to '/'.

    first_root = None
    root_collision = None
    tstart = time.time()
    fcount = 0
    lastskip_name = None
    lastdir = b''
    for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during):
        (dir, file) = os.path.split(ent.name)
        exists = (ent.flags & index.IX_EXISTS)
        hashvalid = already_saved(ent)
        wasmissing = ent.sha_missing()
        oldsize = ent.size
        if opt.verbose:
            if not exists:
                status = 'D'
            elif not hashvalid:
                if ent.sha == index.EMPTY_SHA:
                    status = 'A'
                else:
                    status = 'M'
            else:
                status = ' '
            if opt.verbose >= 2:
                log('%s %-70s\n' % (status, path_msg(ent.name)))
            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
                if not lastdir.startswith(dir):
                    log('%s %-70s\n' %
                        (status, path_msg(os.path.join(dir, b''))))
                lastdir = dir

        if opt.progress:
            progress_report(0)
        fcount += 1

        if not exists:
            continue
        if opt.smaller and ent.size >= opt.smaller:
            if exists and not hashvalid:
                if opt.verbose:
                    log('skipping large file "%s"\n' % path_msg(ent.name))
                lastskip_name = ent.name
            continue

        assert (dir.startswith(b'/'))
        if opt.strip:
            dirp = stripped_path_components(dir, extra)
        elif opt.strip_path:
            dirp = stripped_path_components(dir, [opt.strip_path])
        elif graft_points:
            dirp = grafted_path_components(graft_points, dir)
        else:
            dirp = path_components(dir)

        # At this point, dirp contains a representation of the archive
        # path that looks like [(archive_dir_name, real_fs_path), ...].
        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
        # might look like this at some point:
        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].

        # This dual representation supports stripping/grafting, where the
        # archive path may not have a direct correspondence with the
        # filesystem.  The root directory is represented by an initial
        # component named '', and any component that doesn't have a
        # corresponding filesystem directory (due to grafting, for
        # example) will have a real_fs_path of None, i.e. [('', None),
        # ...].

        if first_root == None:
            first_root = dirp[0]
        elif first_root != dirp[0]:
            root_collision = True

        # If switching to a new sub-tree, finish the current sub-tree.
        while parts > [x[0] for x in dirp]:
            _pop(force_tree=None)

        # If switching to a new sub-tree, start a new sub-tree.
        for path_component in dirp[len(parts):]:
            dir_name, fs_path = path_component
            # Not indexed, so just grab the FS metadata or use empty metadata.
            try:
                meta = metadata.from_path(fs_path, normalized=True) \
                    if fs_path else metadata.Metadata()
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = dir_name
                meta = metadata.Metadata()
            _push(dir_name, meta)

        if not file:
            if len(parts) == 1:
                continue  # We're at the top level -- keep the current root dir
            # Since there's no filename, this is a subdir -- finish it.
            oldtree = already_saved(ent)  # may be None
            newtree = _pop(force_tree=oldtree)
            if not oldtree:
                if lastskip_name and lastskip_name.startswith(ent.name):
                    ent.invalidate()
                else:
                    ent.validate(GIT_MODE_TREE, newtree)
                ent.repack()
            if exists and wasmissing:
                _nonlocal['count'] += oldsize
            continue

        # it's not a directory
        if hashvalid:
            id = ent.sha
            git_name = git.mangle_name(file, ent.mode, ent.gitmode)
            git_info = (ent.gitmode, git_name, id)
            shalists[-1].append(git_info)
            sort_key = git.shalist_item_sort_key((ent.mode, file, id))
            meta = msr.metadata_at(ent.meta_ofs)
            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
            # Restore the times that were cleared to 0 in the metastore.
            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime,
                                                    ent.ctime)
            metalists[-1].append((sort_key, meta))
        else:
            id = None
            hlink = find_hardlink_target(hlink_db, ent)
            try:
                meta = metadata.from_path(
                    ent.name,
                    hardlink_target=hlink,
                    normalized=True,
                    after_stat=after_nondir_metadata_stat)
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = ent.name
                continue
            if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode):
                # The mode changed since we indexed the file, this is bad.
                # This can cause two issues:
                # 1) We e.g. think the file is a regular file, but now it's
                #    something else (a device, socket, FIFO or symlink, etc.)
                #    and _read_ from it when we shouldn't.
                # 2) We then record it as valid, but don't update the index
                #    metadata, and on a subsequent save it has 'hashvalid'
                #    but is recorded as the file type from the index, when
                #    the content is something else ...
                # Avoid all of these consistency issues by just skipping such
                # things - it really ought to not happen anyway.
                add_error("%s: mode changed since indexing, skipping." %
                          path_msg(ent.name))
                lastskip_name = ent.name
                continue
            if stat.S_ISREG(ent.mode):
                try:
                    # If the file changes while we're reading it, then our reading
                    # may stop at some point, but the stat() above may have gotten
                    # a different size already. Recalculate the meta size so that
                    # the repository records the accurate size in the metadata, even
                    # if the other stat() data might be slightly older than the file
                    # content (which we can't fix, this is inherently racy, but we
                    # can prevent the size mismatch.)
                    meta.size = 0

                    def new_blob(data):
                        meta.size += len(data)
                        return w.new_blob(data)

                    before_saving_regular_file(ent.name)
                    with hashsplit.open_noatime(ent.name) as f:
                        (mode, id) = hashsplit.split_to_blob_or_tree(
                            new_blob, w.new_tree, [f], keep_boundaries=False)
                except (IOError, OSError) as e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
            elif stat.S_ISDIR(ent.mode):
                assert (0)  # handled above
            elif stat.S_ISLNK(ent.mode):
                mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target))
            else:
                # Everything else should be fully described by its
                # metadata, so just record an empty blob, so the paths
                # in the tree and .bupm will match up.
                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))

            if id:
                ent.validate(mode, id)
                ent.repack()
                git_name = git.mangle_name(file, ent.mode, ent.gitmode)
                git_info = (mode, git_name, id)
                shalists[-1].append(git_info)
                sort_key = git.shalist_item_sort_key((ent.mode, file, id))
                metalists[-1].append((sort_key, meta))

        if exists and wasmissing:
            _nonlocal['count'] += oldsize
            _nonlocal['subcount'] = 0

    if opt.progress:
        pct = total and _nonlocal['count'] * 100.0 / total or 100
        progress(
            'Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n' %
            (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal))

    while len(parts) > 1:  # _pop() all the parts above the root
        _pop(force_tree=None)
    assert (len(shalists) == 1)
    assert (len(metalists) == 1)

    # Finish the root directory.
    tree = _pop(
        force_tree=None,
        # When there's a collision, use empty metadata for the root.
        dir_metadata=metadata.Metadata() if root_collision else None)

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.tree:
        out.write(hexlify(tree))
        out.write(b'\n')
    if opt.commit or name:
        if compat.py_maj > 2:
            # Strip b prefix from python 3 bytes reprs to preserve previous format
            msgcmd = b'[%s]' % b', '.join(
                [repr(argv_bytes(x))[1:].encode('ascii') for x in argv])
        else:
            msgcmd = repr(argv)
        msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
        userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
        commit = w.new_commit(tree, oldref, userline, date, None, userline,
                              date, None, msg)
        if opt.commit:
            out.write(hexlify(commit))
            out.write(b'\n')

    msr.close()
    w.close()  # must close before we can update the ref

    if opt.name:
        if cli:
            cli.update_ref(refname, commit, oldref)
        else:
            git.update_ref(refname, commit, oldref)

    if cli:
        cli.close()

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Exemplo n.º 23
0
 def pfunc(count, total):
     qprogress('bup: merging indexes (%d/%d)\r' % (count, total))
Exemplo n.º 24
0
def restore(repo, parent_path, name, item, top, sparse, numeric_ids, owner_map,
            exclude_rxs, verbosity, hardlinks):
    global total_restored
    mode = vfs.item_mode(item)
    treeish = S_ISDIR(mode)
    fullname = parent_path + b'/' + name
    # Match behavior of index --exclude-rx with respect to paths.
    if should_rx_exclude_path(fullname + (b'/' if treeish else b''),
                              exclude_rxs):
        return

    if not treeish:
        # Do this now so we'll have meta.symlink_target for verbose output
        item = vfs.augment_item_meta(repo, item, include_size=True)
        meta = item.meta
        assert (meta.mode == mode)

    if stat.S_ISDIR(mode):
        if verbosity >= 1:
            out.write(b'%s/\n' % fullname)
    elif stat.S_ISLNK(mode):
        assert (meta.symlink_target)
        if verbosity >= 2:
            out.write(b'%s@ -> %s\n' % (fullname, meta.symlink_target))
    else:
        if verbosity >= 2:
            out.write(fullname + b'\n')

    orig_cwd = os.getcwd()
    try:
        if treeish:
            # Assumes contents() returns '.' with the full metadata first
            sub_items = vfs.contents(repo, item, want_meta=True)
            dot, item = next(sub_items, None)
            assert (dot == b'.')
            item = vfs.augment_item_meta(repo, item, include_size=True)
            meta = item.meta
            meta.create_path(name)
            os.chdir(name)
            total_restored += 1
            if verbosity >= 0:
                qprogress('Restoring: %d\r' % total_restored)
            for sub_name, sub_item in sub_items:
                restore(repo, fullname, sub_name, sub_item, top, sparse,
                        numeric_ids, owner_map, exclude_rxs, verbosity,
                        hardlinks)
            os.chdir(b'..')
            apply_metadata(meta, name, numeric_ids, owner_map)
        else:
            created_hardlink = False
            if meta.hardlink_target:
                created_hardlink = hardlink_if_possible(
                    fullname, item, top, hardlinks)
            if not created_hardlink:
                meta.create_path(name)
                if stat.S_ISREG(meta.mode):
                    if sparse:
                        write_file_content_sparsely(repo, name, item)
                    else:
                        write_file_content(repo, name, item)
            total_restored += 1
            if verbosity >= 0:
                qprogress('Restoring: %d\r' % total_restored)
            if not created_hardlink:
                apply_metadata(meta, name, numeric_ids, owner_map)
    finally:
        os.chdir(orig_cwd)
Exemplo n.º 25
0
def sweep(live_objects, existing_count, cat_pipe, threshold, compression,
          verbosity):
    # Traverse all the packs, saving the (probably) live data.

    ns = Nonlocal()
    ns.stale_files = []

    def remove_stale_files(new_pack_prefix):
        if verbosity and new_pack_prefix:
            log('created ' + path_msg(basename(new_pack_prefix)) + '\n')
        for p in ns.stale_files:
            if new_pack_prefix and p.startswith(new_pack_prefix):
                continue  # Don't remove the new pack file
            if verbosity:
                log('removing ' + path_msg(basename(p)) + '\n')
            os.unlink(p)
        if ns.stale_files:  # So git cat-pipe will close them
            cat_pipe.restart()
        ns.stale_files = []

    writer = git.PackWriter(objcache_maker=None,
                            compression_level=compression,
                            run_midx=False,
                            on_pack_finish=remove_stale_files)
    try:
        # FIXME: sanity check .idx names vs .pack names?
        collect_count = 0
        for idx_name in glob.glob(
                os.path.join(git.repo(b'objects/pack'), b'*.idx')):
            if verbosity:
                qprogress('preserving live data (%d%% complete)\r' %
                          ((float(collect_count) / existing_count) * 100))
            with git.open_idx(idx_name) as idx:
                idx_live_count = 0
                for sha in idx:
                    if live_objects.exists(sha):
                        idx_live_count += 1

                collect_count += idx_live_count
                if idx_live_count == 0:
                    if verbosity:
                        log('deleting %s\n' %
                            path_msg(git.repo_rel(basename(idx_name))))
                    ns.stale_files.append(idx_name)
                    ns.stale_files.append(idx_name[:-3] + b'pack')
                    continue

                live_frac = idx_live_count / float(len(idx))
                if live_frac > ((100 - threshold) / 100.0):
                    if verbosity:
                        log('keeping %s (%d%% live)\n' % (git.repo_rel(
                            basename(idx_name)), live_frac * 100))
                    continue

                if verbosity:
                    log('rewriting %s (%.2f%% live)\n' %
                        (basename(idx_name), live_frac * 100))
                for sha in idx:
                    if live_objects.exists(sha):
                        item_it = cat_pipe.get(hexlify(sha))
                        _, typ, _ = next(item_it)
                        writer.just_write(sha, typ, b''.join(item_it))

                ns.stale_files.append(idx_name)
                ns.stale_files.append(idx_name[:-3] + b'pack')

        if verbosity:
            progress('preserving live data (%d%% complete)\n' %
                     ((float(collect_count) / existing_count) * 100))

        # Nothing should have recreated midx/bloom yet.
        pack_dir = git.repo(b'objects/pack')
        assert (not os.path.exists(os.path.join(pack_dir, b'bup.bloom')))
        assert (not glob.glob(os.path.join(pack_dir, b'*.midx')))

    except BaseException as ex:
        with pending_raise(ex):
            writer.abort()

    # This will finally run midx.
    # Can only change refs (if needed) after this.
    writer.close()

    remove_stale_files(None)  # In case we didn't write to the writer.

    if verbosity:
        log('discarded %d%% of objects\n' %
            ((existing_count - count_objects(pack_dir, verbosity)) /
             float(existing_count) * 100))
Exemplo n.º 26
0
def restore(repo, parent_path, name, item, top, sparse, numeric_ids, owner_map,
            exclude_rxs, verbosity, hardlinks):
    global total_restored
    mode = vfs.item_mode(item)
    treeish = S_ISDIR(mode)
    fullname = parent_path + '/' + name
    # Match behavior of index --exclude-rx with respect to paths.
    if should_rx_exclude_path(fullname + ('/' if treeish else ''),
                              exclude_rxs):
        return

    if not treeish:
        # Do this now so we'll have meta.symlink_target for verbose output
        item = vfs.augment_item_meta(repo, item, include_size=True)
        meta = item.meta
        assert(meta.mode == mode)

    if stat.S_ISDIR(mode):
        if verbosity >= 1:
            print('%s/' % fullname)
    elif stat.S_ISLNK(mode):
        assert(meta.symlink_target)
        if verbosity >= 2:
            print('%s@ -> %s' % (fullname, meta.symlink_target))
    else:
        if verbosity >= 2:
            print(fullname)

    orig_cwd = os.getcwd()
    try:
        if treeish:
            # Assumes contents() returns '.' with the full metadata first
            sub_items = vfs.contents(repo, item, want_meta=True)
            dot, item = next(sub_items, None)
            assert(dot == '.')
            item = vfs.augment_item_meta(repo, item, include_size=True)
            meta = item.meta
            meta.create_path(name)
            os.chdir(name)
            total_restored += 1
            if verbosity >= 0:
                qprogress('Restoring: %d\r' % total_restored)
            for sub_name, sub_item in sub_items:
                restore(repo, fullname, sub_name, sub_item, top, sparse,
                        numeric_ids, owner_map, exclude_rxs, verbosity,
                        hardlinks)
            os.chdir('..')
            apply_metadata(meta, name, numeric_ids, owner_map)
        else:
            created_hardlink = False
            if meta.hardlink_target:
                created_hardlink = hardlink_if_possible(fullname, item, top,
                                                        hardlinks)
            if not created_hardlink:
                meta.create_path(name)
                if stat.S_ISREG(meta.mode):
                    if sparse:
                        write_file_content_sparsely(repo, name, item)
                    else:
                        write_file_content(repo, name, item)
            total_restored += 1
            if verbosity >= 0:
                qprogress('Restoring: %d\r' % total_restored)
            if not created_hardlink:
                apply_metadata(meta, name, numeric_ids, owner_map)
    finally:
        os.chdir(orig_cwd)
Exemplo n.º 27
0
 def pfunc(count, total):
     qprogress('Reading indexes: %.2f%% (%d/%d)\r'
               % (count*100.0/total, count, total))
Exemplo n.º 28
0
def update_index(top, excluded_paths, exclude_rxs, xdev_exceptions):
    # tmax and start must be epoch nanoseconds.
    tmax = (time.time() - 1) * 10**9
    ri = index.Reader(indexfile)
    msw = index.MetaStoreWriter(indexfile + '.meta')
    wi = index.Writer(indexfile, msw, tmax)
    rig = IterHelper(ri.iter(name=top))
    tstart = int(time.time()) * 10**9

    hlinks = hlinkdb.HLinkDB(indexfile + '.hlink')

    fake_hash = None
    if opt.fake_valid:

        def fake_hash(name):
            return (GIT_MODE_FILE, index.FAKE_SHA)

    total = 0
    bup_dir = os.path.abspath(git.repo())
    index_start = time.time()
    for path, pst in recursive_dirlist([top],
                                       xdev=opt.xdev,
                                       bup_dir=bup_dir,
                                       excluded_paths=excluded_paths,
                                       exclude_rxs=exclude_rxs,
                                       xdev_exceptions=xdev_exceptions):
        if opt.verbose >= 2 or (opt.verbose == 1
                                and stat.S_ISDIR(pst.st_mode)):
            sys.stdout.write('%s\n' % path)
            sys.stdout.flush()
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        elif not (total % 128):
            elapsed = time.time() - index_start
            paths_per_sec = total / elapsed if elapsed else 0
            qprogress('Indexing: %d (%d paths/s)\r' % (total, paths_per_sec))
        total += 1

        while rig.cur and rig.cur.name > path:  # deleted paths
            if rig.cur.exists():
                rig.cur.set_deleted()
                rig.cur.repack()
                if rig.cur.nlink > 1 and not stat.S_ISDIR(rig.cur.mode):
                    hlinks.del_path(rig.cur.name)
            rig.next()

        if rig.cur and rig.cur.name == path:  # paths that already existed
            need_repack = False
            if (rig.cur.stale(pst, tstart, check_device=opt.check_device)):
                try:
                    meta = metadata.from_path(path, statinfo=pst)
                except (OSError, IOError) as e:
                    add_error(e)
                    rig.next()
                    continue
                if not stat.S_ISDIR(rig.cur.mode) and rig.cur.nlink > 1:
                    hlinks.del_path(rig.cur.name)
                if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                    hlinks.add_path(path, pst.st_dev, pst.st_ino)
                # Clear these so they don't bloat the store -- they're
                # already in the index (since they vary a lot and they're
                # fixed length).  If you've noticed "tmax", you might
                # wonder why it's OK to do this, since that code may
                # adjust (mangle) the index mtime and ctime -- producing
                # fake values which must not end up in a .bupm.  However,
                # it looks like that shouldn't be possible:  (1) When
                # "save" validates the index entry, it always reads the
                # metadata from the filesytem. (2) Metadata is only
                # read/used from the index if hashvalid is true. (3)
                # "faked" entries will be stale(), and so we'll invalidate
                # them below.
                meta.ctime = meta.mtime = meta.atime = 0
                meta_ofs = msw.store(meta)
                rig.cur.update_from_stat(pst, meta_ofs)
                rig.cur.invalidate()
                need_repack = True
            if not (rig.cur.flags & index.IX_HASHVALID):
                if fake_hash:
                    rig.cur.gitmode, rig.cur.sha = fake_hash(path)
                    rig.cur.flags |= index.IX_HASHVALID
                    need_repack = True
            if opt.fake_invalid:
                rig.cur.invalidate()
                need_repack = True
            if need_repack:
                rig.cur.repack()
            rig.next()
        else:  # new paths
            try:
                meta = metadata.from_path(path, statinfo=pst)
            except (OSError, IOError) as e:
                add_error(e)
                continue
            # See same assignment to 0, above, for rationale.
            meta.atime = meta.mtime = meta.ctime = 0
            meta_ofs = msw.store(meta)
            wi.add(path, pst, meta_ofs, hashgen=fake_hash)
            if not stat.S_ISDIR(pst.st_mode) and pst.st_nlink > 1:
                hlinks.add_path(path, pst.st_dev, pst.st_ino)

    elapsed = time.time() - index_start
    paths_per_sec = total / elapsed if elapsed else 0
    progress('Indexing: %d, done (%d paths/s).\n' % (total, paths_per_sec))

    hlinks.prepare_save()

    if ri.exists():
        ri.save()
        wi.flush()
        if wi.count:
            wr = wi.new_reader()
            if opt.check:
                log('check: before merging: oldfile\n')
                check_index(ri)
                log('check: before merging: newfile\n')
                check_index(wr)
            mi = index.Writer(indexfile, msw, tmax)

            for e in index.merge(ri, wr):
                # FIXME: shouldn't we remove deleted entries eventually?  When?
                mi.add_ixentry(e)

            ri.close()
            mi.close()
            wr.close()
        wi.abort()
    else:
        wi.close()

    msw.close()
    hlinks.commit_save()
Exemplo n.º 29
0
def plog(s):
    if opt.quiet:
        return
    qprogress(s)
Exemplo n.º 30
0
    return not already_saved(ent)

def wantrecurse_during(ent):
    return not already_saved(ent) or ent.sha_missing()

def find_hardlink_target(hlink_db, ent):
    if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
        link_paths = hlink_db.node_paths(ent.dev, ent.ino)
        if link_paths:
            return link_paths[0]

total = ftotal = 0
if opt.progress:
    for (transname,ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
        if not (ftotal % 10024):
            qprogress('Reading index: %d\r' % ftotal)
        exists = ent.exists()
        hashvalid = already_saved(ent)
        ent.set_sha_missing(not hashvalid)
        if not opt.smaller or ent.size < opt.smaller:
            if exists and not hashvalid:
                total += ent.size
        ftotal += 1
    progress('Reading index: %d, done.\n' % ftotal)
    hashsplit.progress_callback = progress_report

# Root collisions occur when strip or graft options map more than one
# path to the same directory (paths which originally had separate
# parents).  When that situation is detected, use empty metadata for
# the parent.  Otherwise, use the metadata for the common parent.
# Collision example: "bup save ... --strip /foo /foo/bar /bar".
Exemplo n.º 31
0
Arquivo: save.py Projeto: fakegit/bup
def save_tree(opt, reader, hlink_db, msr, w):
    # Metadata is stored in a file named .bupm in each directory.  The
    # first metadata entry will be the metadata for the current directory.
    # The remaining entries will be for each of the other directory
    # elements, in the order they're listed in the index.
    #
    # Since the git tree elements are sorted according to
    # git.shalist_item_sort_key, the metalist items are accumulated as
    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
    # created.  The sort_key should have been computed using the element's
    # mangled name and git mode (after hashsplitting), but the code isn't
    # actually doing that but rather uses the element's real name and mode.
    # This makes things a bit more difficult when reading it back, see
    # vfs.ordered_tree_entries().

    # Maintain a stack of information representing the current location in
    # the archive being constructed.  The current path is recorded in
    # parts, which will be something like
    #      [StackDir(name=''), StackDir(name='home'), StackDir(name='someuser')],
    # and the accumulated content and metadata for files in the dirs is stored
    # in the .items member of the StackDir.

    stack = []

    def _push(part, metadata):
        # Enter a new archive directory -- make it the current directory.
        item = StackDir(part, metadata)
        stack.append(item)


    def _pop(force_tree=None, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        item = stack.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            for x in item.items:
                name = x.name
                if name in names_seen:
                    parent_path = b'/'.join(x.name for x in stack) + b'/'
                    add_error('error: ignoring duplicate path %s in %s'
                              % (path_msg(name), path_msg(parent_path)))
                else:
                    names_seen.add(name)
                    clean_list.append(x)

            # if set, overrides the original metadata pushed for this dir.
            if dir_metadata is None:
                dir_metadata = item.meta
            metalist = [(b'', dir_metadata)]
            metalist += [(git.shalist_item_sort_key((entry.mode, entry.name, None)),
                          entry.meta)
                         for entry in clean_list if entry.mode != GIT_MODE_TREE]
            metalist.sort(key = lambda x: x[0])
            metadata = BytesIO(b''.join(m[1].encode() for m in metalist))
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
                                                       [metadata],
                                                       keep_boundaries=False)
            shalist = [(mode, b'.bupm', id)]
            shalist += [(entry.gitmode,
                         git.mangle_name(entry.name, entry.mode, entry.gitmode),
                         entry.oid)
                        for entry in clean_list]

            tree = w.new_tree(shalist)
        if stack:
            stack[-1].append(item.name, GIT_MODE_TREE, GIT_MODE_TREE, tree, None)
        return tree


    # Hack around lack of nonlocal vars in python 2
    _nonlocal = {}
    _nonlocal['count'] = 0
    _nonlocal['subcount'] = 0
    _nonlocal['lastremain'] = None

    def progress_report(n):
        _nonlocal['subcount'] += n
        cc = _nonlocal['count'] + _nonlocal['subcount']
        pct = total and (cc*100.0/total) or 0
        now = time.time()
        elapsed = now - tstart
        kps = elapsed and int(cc/1024./elapsed)
        kps_frac = 10 ** int(math.log(kps+1, 10) - 1)
        kps = int(kps/kps_frac)*kps_frac
        if cc:
            remain = elapsed*1.0/cc * (total-cc)
        else:
            remain = 0.0
        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain'])
              and ((remain - _nonlocal['lastremain'])/_nonlocal['lastremain'] < 0.05)):
            remain = _nonlocal['lastremain']
        else:
            _nonlocal['lastremain'] = remain
        hours = int(remain/60/60)
        mins = int(remain/60 - hours*60)
        secs = int(remain - hours*60*60 - mins*60)
        if elapsed < 30:
            remainstr = ''
            kpsstr = ''
        else:
            kpsstr = '%dk/s' % kps
            if hours:
                remainstr = '%dh%dm' % (hours, mins)
            elif mins:
                remainstr = '%dm%d' % (mins, secs)
            else:
                remainstr = '%ds' % secs
        qprogress('Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r'
                  % (pct, cc/1024, total/1024, fcount, ftotal,
                     remainstr, kpsstr))


    def already_saved(ent):
        return ent.is_valid() and w.exists(ent.sha) and ent.sha

    def wantrecurse_pre(ent):
        return not already_saved(ent)

    def wantrecurse_during(ent):
        return not already_saved(ent) or ent.sha_missing()

    def find_hardlink_target(hlink_db, ent):
        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
            if link_paths:
                return link_paths[0]
        return None

    total = ftotal = 0
    if opt.progress:
        for transname, ent in reader.filter(opt.sources,
                                            wantrecurse=wantrecurse_pre):
            if not (ftotal % 10024):
                qprogress('Reading index: %d\r' % ftotal)
            exists = ent.exists()
            hashvalid = already_saved(ent)
            ent.set_sha_missing(not hashvalid)
            if not opt.smaller or ent.size < opt.smaller:
                if exists and not hashvalid:
                    total += ent.size
            ftotal += 1
        progress('Reading index: %d, done.\n' % ftotal)
        hashsplit.progress_callback = progress_report

    # Root collisions occur when strip or graft options map more than one
    # path to the same directory (paths which originally had separate
    # parents).  When that situation is detected, use empty metadata for
    # the parent.  Otherwise, use the metadata for the common parent.
    # Collision example: "bup save ... --strip /foo /foo/bar /bar".

    # FIXME: Add collision tests, or handle collisions some other way.

    # FIXME: Detect/handle strip/graft name collisions (other than root),
    # i.e. if '/foo/bar' and '/bar' both map to '/'.

    first_root = None
    root_collision = None
    tstart = time.time()
    fcount = 0
    lastskip_name = None
    lastdir = b''
    for transname, ent in reader.filter(opt.sources,
                                        wantrecurse=wantrecurse_during):
        (dir, file) = os.path.split(ent.name)
        exists = (ent.flags & index.IX_EXISTS)
        hashvalid = already_saved(ent)
        wasmissing = ent.sha_missing()
        oldsize = ent.size
        if opt.verbose:
            if not exists:
                status = 'D'
            elif not hashvalid:
                if ent.sha == index.EMPTY_SHA:
                    status = 'A'
                else:
                    status = 'M'
            else:
                status = ' '
            if opt.verbose >= 2:
                log('%s %-70s\n' % (status, path_msg(ent.name)))
            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
                if not lastdir.startswith(dir):
                    log('%s %-70s\n' % (status, path_msg(os.path.join(dir, b''))))
                lastdir = dir

        if opt.progress:
            progress_report(0)
        fcount += 1

        if not exists:
            continue
        if opt.smaller and ent.size >= opt.smaller:
            if exists and not hashvalid:
                if opt.verbose:
                    log('skipping large file "%s"\n' % path_msg(ent.name))
                lastskip_name = ent.name
            continue

        assert(dir.startswith(b'/'))
        if opt.strip:
            dirp = stripped_path_components(dir, opt.sources)
        elif opt.strip_path:
            dirp = stripped_path_components(dir, [opt.strip_path])
        elif opt.grafts:
            dirp = grafted_path_components(opt.grafts, dir)
        else:
            dirp = path_components(dir)

        # At this point, dirp contains a representation of the archive
        # path that looks like [(archive_dir_name, real_fs_path), ...].
        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
        # might look like this at some point:
        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].

        # This dual representation supports stripping/grafting, where the
        # archive path may not have a direct correspondence with the
        # filesystem.  The root directory is represented by an initial
        # component named '', and any component that doesn't have a
        # corresponding filesystem directory (due to grafting, for
        # example) will have a real_fs_path of None, i.e. [('', None),
        # ...].

        if first_root == None:
            first_root = dirp[0]
        elif first_root != dirp[0]:
            root_collision = True

        # If switching to a new sub-tree, finish the current sub-tree.
        while [x.name for x in stack] > [x[0] for x in dirp]:
            _pop()

        # If switching to a new sub-tree, start a new sub-tree.
        for path_component in dirp[len(stack):]:
            dir_name, fs_path = path_component
            # Not indexed, so just grab the FS metadata or use empty metadata.
            try:
                meta = metadata.from_path(fs_path, normalized=True) \
                    if fs_path else metadata.Metadata()
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = dir_name
                meta = metadata.Metadata()
            _push(dir_name, meta)

        if not file:
            if len(stack) == 1:
                continue # We're at the top level -- keep the current root dir
            # Since there's no filename, this is a subdir -- finish it.
            oldtree = already_saved(ent) # may be None
            newtree = _pop(force_tree = oldtree)
            if not oldtree:
                if lastskip_name and lastskip_name.startswith(ent.name):
                    ent.invalidate()
                else:
                    ent.validate(GIT_MODE_TREE, newtree)
                ent.repack()
            if exists and wasmissing:
                _nonlocal['count'] += oldsize
            continue

        # it's not a directory
        if hashvalid:
            meta = msr.metadata_at(ent.meta_ofs)
            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
            # Restore the times that were cleared to 0 in the metastore.
            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
            stack[-1].append(file, ent.mode, ent.gitmode, ent.sha, meta)
        else:
            id = None
            hlink = find_hardlink_target(hlink_db, ent)
            try:
                meta = metadata.from_path(ent.name, hardlink_target=hlink,
                                          normalized=True,
                                          after_stat=after_nondir_metadata_stat)
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = ent.name
                continue
            if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode):
                # The mode changed since we indexed the file, this is bad.
                # This can cause two issues:
                # 1) We e.g. think the file is a regular file, but now it's
                #    something else (a device, socket, FIFO or symlink, etc.)
                #    and _read_ from it when we shouldn't.
                # 2) We then record it as valid, but don't update the index
                #    metadata, and on a subsequent save it has 'hashvalid'
                #    but is recorded as the file type from the index, when
                #    the content is something else ...
                # Avoid all of these consistency issues by just skipping such
                # things - it really ought to not happen anyway.
                add_error("%s: mode changed since indexing, skipping." % path_msg(ent.name))
                lastskip_name = ent.name
                continue
            if stat.S_ISREG(ent.mode):
                try:
                    # If the file changes while we're reading it, then our reading
                    # may stop at some point, but the stat() above may have gotten
                    # a different size already. Recalculate the meta size so that
                    # the repository records the accurate size in the metadata, even
                    # if the other stat() data might be slightly older than the file
                    # content (which we can't fix, this is inherently racy, but we
                    # can prevent the size mismatch.)
                    meta.size = 0
                    def new_blob(data):
                        meta.size += len(data)
                        return w.new_blob(data)
                    before_saving_regular_file(ent.name)
                    with hashsplit.open_noatime(ent.name) as f:
                        (mode, id) = hashsplit.split_to_blob_or_tree(
                                                new_blob, w.new_tree, [f],
                                                keep_boundaries=False)
                except (IOError, OSError) as e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
            elif stat.S_ISDIR(ent.mode):
                assert(0)  # handled above
            elif stat.S_ISLNK(ent.mode):
                mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target))
            else:
                # Everything else should be fully described by its
                # metadata, so just record an empty blob, so the paths
                # in the tree and .bupm will match up.
                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))

            if id:
                ent.validate(mode, id)
                ent.repack()
                stack[-1].append(file, ent.mode, ent.gitmode, id, meta)

        if exists and wasmissing:
            _nonlocal['count'] += oldsize
            _nonlocal['subcount'] = 0


    if opt.progress:
        pct = total and _nonlocal['count']*100.0/total or 100
        progress('Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n'
                 % (pct, _nonlocal['count']/1024, total/1024, fcount, ftotal))

    while len(stack) > 1: # _pop() all the parts above the root
        _pop()

    # Finish the root directory.
    # When there's a collision, use empty metadata for the root.
    tree = _pop(dir_metadata = metadata.Metadata() if root_collision else None)

    return tree
Exemplo n.º 32
0
Arquivo: git.py Projeto: zzmjohn/bup
 def pfunc(count, total):
     qprogress('Reading indexes: %.2f%% (%d/%d)\r' %
               (count * 100.0 / total, count, total))
Exemplo n.º 33
0
        s = opt.find
    try:
        bin = s.decode('hex')
    except TypeError:
        o.fatal('--find parameter is not a valid hex string')

find = opt.find.lower()

count = 0
for name in extra:
    try:
        ix = git.open_idx(name)
    except git.GitError as e:
        add_error('%s: %s' % (name, e))
        continue
    if len(opt.find) == 40:
        if ix.exists(bin):
            print name, find
    else:
        # slow, exhaustive search
        for _i in ix:
            i = str(_i).encode('hex')
            if i.startswith(find):
                print name, i
            qprogress('Searching: %d\r' % count)
            count += 1

if saved_errors:
    log('WARNING: %d errors encountered while saving.\n' % len(saved_errors))
    sys.exit(1)