Пример #1
0
def testmangle():
    with no_lingering_errors():
        afile  = 0o100644
        afile2 = 0o100770
        alink  = 0o120000
        adir   = 0o040000
        adir2  = 0o040777
        WVPASSEQ(git.mangle_name(b'a', adir2, adir), b'a')
        WVPASSEQ(git.mangle_name(b'.bup', adir2, adir), b'.bup.bupl')
        WVPASSEQ(git.mangle_name(b'a.bupa', adir2, adir), b'a.bupa.bupl')
        WVPASSEQ(git.mangle_name(b'b.bup', alink, alink), b'b.bup.bupl')
        WVPASSEQ(git.mangle_name(b'b.bu', alink, alink), b'b.bu')
        WVPASSEQ(git.mangle_name(b'f', afile, afile2), b'f')
        WVPASSEQ(git.mangle_name(b'f.bup', afile, afile2), b'f.bup.bupl')
        WVPASSEQ(git.mangle_name(b'f.bup', afile, adir), b'f.bup.bup')
        WVPASSEQ(git.mangle_name(b'f', afile, adir), b'f.bup')

        WVPASSEQ(git.demangle_name(b'f.bup', afile), (b'f', git.BUP_CHUNKED))
        WVPASSEQ(git.demangle_name(b'f.bupl', afile), (b'f', git.BUP_NORMAL))
        WVPASSEQ(git.demangle_name(b'f.bup.bupl', afile), (b'f.bup', git.BUP_NORMAL))

        WVPASSEQ(git.demangle_name(b'.bupm', afile), (b'', git.BUP_NORMAL))
        WVPASSEQ(git.demangle_name(b'.bupm', adir), (b'', git.BUP_CHUNKED))

        # for safety, we ignore .bup? suffixes we don't recognize.  Future
        # versions might implement a .bup[a-z] extension as something other
        # than BUP_NORMAL.
        WVPASSEQ(git.demangle_name(b'f.bupa', afile), (b'f.bupa', git.BUP_NORMAL))
Пример #2
0
def testmangle():
    afile = 0100644
    afile2 = 0100770
    alink = 0120000
    adir = 0040000
    adir2 = 0040777
    WVPASSEQ(git.mangle_name("a", adir2, adir), "a")
    WVPASSEQ(git.mangle_name(".bup", adir2, adir), ".bup.bupl")
    WVPASSEQ(git.mangle_name("a.bupa", adir2, adir), "a.bupa.bupl")
    WVPASSEQ(git.mangle_name("b.bup", alink, alink), "b.bup.bupl")
    WVPASSEQ(git.mangle_name("b.bu", alink, alink), "b.bu")
    WVPASSEQ(git.mangle_name("f", afile, afile2), "f")
    WVPASSEQ(git.mangle_name("f.bup", afile, afile2), "f.bup.bupl")
    WVPASSEQ(git.mangle_name("f.bup", afile, adir), "f.bup.bup")
    WVPASSEQ(git.mangle_name("f", afile, adir), "f.bup")

    WVPASSEQ(git.demangle_name("f.bup", afile), ("f", git.BUP_CHUNKED))
    WVPASSEQ(git.demangle_name("f.bupl", afile), ("f", git.BUP_NORMAL))
    WVPASSEQ(git.demangle_name("f.bup.bupl", afile), ("f.bup", git.BUP_NORMAL))

    WVPASSEQ(git.demangle_name(".bupm", afile), ('', git.BUP_NORMAL))
    WVPASSEQ(git.demangle_name(".bupm", adir), ('', git.BUP_CHUNKED))

    # for safety, we ignore .bup? suffixes we don't recognize.  Future
    # versions might implement a .bup[a-z] extension as something other
    # than BUP_NORMAL.
    WVPASSEQ(git.demangle_name("f.bupa", afile), ("f.bupa", git.BUP_NORMAL))
Пример #3
0
Файл: tgit.py Проект: senseb/bup
def testmangle():
    afile  = 0100644
    afile2 = 0100770
    alink  = 0120000
    adir   = 0040000
    adir2  = 0040777
    WVPASSEQ(git.mangle_name("a", adir2, adir), "a")
    WVPASSEQ(git.mangle_name(".bup", adir2, adir), ".bup.bupl")
    WVPASSEQ(git.mangle_name("a.bupa", adir2, adir), "a.bupa.bupl")
    WVPASSEQ(git.mangle_name("b.bup", alink, alink), "b.bup.bupl")
    WVPASSEQ(git.mangle_name("b.bu", alink, alink), "b.bu")
    WVPASSEQ(git.mangle_name("f", afile, afile2), "f")
    WVPASSEQ(git.mangle_name("f.bup", afile, afile2), "f.bup.bupl")
    WVPASSEQ(git.mangle_name("f.bup", afile, adir), "f.bup.bup")
    WVPASSEQ(git.mangle_name("f", afile, adir), "f.bup")

    WVPASSEQ(git.demangle_name("f.bup", afile), ("f", git.BUP_CHUNKED))
    WVPASSEQ(git.demangle_name("f.bupl", afile), ("f", git.BUP_NORMAL))
    WVPASSEQ(git.demangle_name("f.bup.bupl", afile), ("f.bup", git.BUP_NORMAL))

    WVPASSEQ(git.demangle_name(".bupm", afile), ('', git.BUP_NORMAL))
    WVPASSEQ(git.demangle_name(".bupm", adir), ('', git.BUP_CHUNKED))

    # for safety, we ignore .bup? suffixes we don't recognize.  Future
    # versions might implement a .bup[a-z] extension as something other
    # than BUP_NORMAL.
    WVPASSEQ(git.demangle_name("f.bupa", afile), ("f.bupa", git.BUP_NORMAL))
Пример #4
0
def _pop(force_tree):
    assert(len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append(('40000',
                             git.mangle_name(part, 040000, 40000),
                             tree))
    else:  # this was the toplevel, so put it back for sanity
        shalists.append(shalist)
    return tree
Пример #5
0
def _pop(force_tree):
    assert (len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append(
            (GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE,
                                            GIT_MODE_TREE), tree))
    else:  # this was the toplevel, so put it back for sanity
        shalists.append(shalist)
    return tree
Пример #6
0
def _pop(force_tree, dir_metadata=None):
    # Leave the current archive directory and add its tree to its parent.
    assert len(parts) >= 1
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    if metalist and not force_tree:
        if dir_metadata:  # Override the original metadata pushed for this dir.
            metalist = [("", dir_metadata)] + metalist[1:]
        sorted_metalist = sorted(metalist, key=lambda x: x[0])
        metadata = "".join([m[1].encode() for m in sorted_metalist])
        metadata_f = StringIO(metadata)
        mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree, [metadata_f], keep_boundaries=False)
        shalist.append((mode, ".bupm", id))
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append((GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE, GIT_MODE_TREE), tree))
    return tree
Пример #7
0
def _pop(force_tree, dir_metadata=None):
    # Leave the current archive directory and add its tree to its parent.
    assert (len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    if metalist:
        if dir_metadata:  # Override the original metadata pushed for this dir.
            metalist = [('', dir_metadata)] + metalist[1:]
        sorted_metalist = sorted(metalist, key=lambda x: x[0])
        metadata = ''.join([m[1].encode() for m in sorted_metalist])
        shalist.append((0100644, '.bupm', w.new_blob(metadata)))
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append(
            (GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE,
                                            GIT_MODE_TREE), tree))
    return tree
Пример #8
0
def _pop(force_tree, dir_metadata=None):
    # Leave the current archive directory and add its tree to its parent.
    assert(len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
    if force_tree:
        tree = force_tree
    else:
        names_seen = set()
        clean_list = []
        metaidx = 1 # entry at 0 is for the dir
        for x in shalist:
            name = x[1]
            if name in names_seen:
                parent_path = b'/'.join(parts) + b'/'
                add_error('error: ignoring duplicate path %s in %s'
                          % (path_msg(name), path_msg(parent_path)))
                if not stat.S_ISDIR(x[0]):
                    del metalist[metaidx]
            else:
                names_seen.add(name)
                clean_list.append(x)
                if not stat.S_ISDIR(x[0]):
                    metaidx += 1

        if dir_metadata: # Override the original metadata pushed for this dir.
            metalist = [(b'', dir_metadata)] + metalist[1:]
        sorted_metalist = sorted(metalist, key = lambda x : x[0])
        metadata = b''.join([m[1].encode() for m in sorted_metalist])
        metadata_f = BytesIO(metadata)
        mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
                                                   [metadata_f],
                                                   keep_boundaries=False)
        clean_list.append((mode, b'.bupm', id))

        tree = w.new_tree(clean_list)
    if shalists:
        shalists[-1].append((GIT_MODE_TREE,
                             git.mangle_name(part,
                                             GIT_MODE_TREE, GIT_MODE_TREE),
                             tree))
    return tree
Пример #9
0
def _pop(force_tree, dir_metadata=None):
    # Leave the current archive directory and add its tree to its parent.
    assert(len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    if metalist and not force_tree:
        if dir_metadata: # Override the original metadata pushed for this dir.
            metalist = [('', dir_metadata)] + metalist[1:]
        sorted_metalist = sorted(metalist, key = lambda x : x[0])
        metadata = ''.join([m[1].encode() for m in sorted_metalist])
        shalist.append((0100644, '.bupm', w.new_blob(metadata)))
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append((GIT_MODE_TREE,
                             git.mangle_name(part,
                                             GIT_MODE_TREE, GIT_MODE_TREE),
                             tree))
    return tree
Пример #10
0
Файл: save.py Проект: gdt/bup
    def _pop(force_tree=None, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        item = stack.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            for x in item.items:
                name = x.name
                if name in names_seen:
                    parent_path = b'/'.join(x.name for x in stack) + b'/'
                    add_error('error: ignoring duplicate path %s in %s' %
                              (path_msg(name), path_msg(parent_path)))
                else:
                    names_seen.add(name)
                    clean_list.append(x)

            # if set, overrides the original metadata pushed for this dir.
            if dir_metadata is None:
                dir_metadata = item.meta
            metalist = [(b'', dir_metadata)]
            metalist += [(git.shalist_item_sort_key(
                (entry.mode, entry.name, None)), entry.meta)
                         for entry in clean_list
                         if entry.mode != GIT_MODE_TREE]
            metalist.sort(key=lambda x: x[0])
            metadata = BytesIO(b''.join(m[1].encode() for m in metalist))
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob,
                                                       w.new_tree, [metadata],
                                                       keep_boundaries=False)
            shalist = [(mode, b'.bupm', id)]
            shalist += [(entry.gitmode,
                         git.mangle_name(entry.name, entry.mode,
                                         entry.gitmode), entry.oid)
                        for entry in clean_list]

            tree = w.new_tree(shalist)
        if stack:
            stack[-1].append(item.name, GIT_MODE_TREE, GIT_MODE_TREE, tree,
                             None)
        return tree
Пример #11
0
def _pop(force_tree):
    assert(len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    if metalist:
        sorted_metalist = sorted(metalist, key = lambda x : x[0])
        metadata = ''.join([m[1].encode() for m in sorted_metalist])
        shalist.append((0100644, '.bupm', w.new_blob(metadata)))
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append((GIT_MODE_TREE,
                             git.mangle_name(part,
                                             GIT_MODE_TREE, GIT_MODE_TREE),
                             tree))
    else:
        # This was the toplevel, so put it back for sanity (i.e. cd .. from /).
        shalists.append(shalist)
        metalists.append(metalist)
    return tree
Пример #12
0
def _pop(force_tree, dir_metadata=None):
    # Leave the current archive directory and add its tree to its parent.
    assert (len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    if metalist and not force_tree:
        if dir_metadata:  # Override the original metadata pushed for this dir.
            metalist = [('', dir_metadata)] + metalist[1:]
        sorted_metalist = sorted(metalist, key=lambda x: x[0])
        metadata = ''.join([m[1].encode() for m in sorted_metalist])
        metadata_f = StringIO(metadata)
        mode, id = hashsplit.split_to_blob_or_tree(w.new_blob,
                                                   w.new_tree, [metadata_f],
                                                   keep_boundaries=False)
        shalist.append((mode, '.bupm', id))
    tree = force_tree or w.new_tree(shalist)
    if shalists:
        shalists[-1].append(
            (GIT_MODE_TREE, git.mangle_name(part, GIT_MODE_TREE,
                                            GIT_MODE_TREE), tree))
    return tree
Пример #13
0
def _pop(force_tree, dir_metadata=None):
    # Leave the current archive directory and add its tree to its parent.
    assert(len(parts) >= 1)
    part = parts.pop()
    shalist = shalists.pop()
    metalist = metalists.pop()
    if metalist and not force_tree:
        if dir_metadata: # Override the original metadata pushed for this dir.
            metalist = [('', dir_metadata)] + metalist[1:]
        sorted_metalist = sorted(metalist, key = lambda x : x[0])
        metadata = ''.join([m[1].encode() for m in sorted_metalist])
        metadata_f = BytesIO(metadata)
        mode, id = hashsplit.split_to_blob_or_tree(w.new_blob, w.new_tree,
                                                   [metadata_f],
                                                   keep_boundaries=False)
        shalist.append((mode, '.bupm', id))
    # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
    if force_tree:
        tree = force_tree
    else:
        names_seen = set()
        clean_list = []
        for x in shalist:
            name = x[1]
            if name in names_seen:
                parent_path = '/'.join(parts) + '/'
                add_error('error: ignoring duplicate path %r in %r'
                          % (name, parent_path))
            else:
                names_seen.add(name)
                clean_list.append(x)
        tree = w.new_tree(clean_list)
    if shalists:
        shalists[-1].append((GIT_MODE_TREE,
                             git.mangle_name(part,
                                             GIT_MODE_TREE, GIT_MODE_TREE),
                             tree))
    return tree
Пример #14
0
Файл: tree.py Проект: jmberg/bup
 def mangled_name(self):
     return mangle_name(self.name, self.mode, self.gitmode)
Пример #15
0
if pack_writer and opt.blobs:
    shalist = hashsplit.split_to_blobs(pack_writer.new_blob, files,
                                       keep_boundaries=opt.keep_boundaries,
                                       progress=prog)
    for (sha, size, level) in shalist:
        print sha.encode('hex')
        reprogress()
elif pack_writer:  # tree or commit or name
    if opt.name: # insert dummy_name which may be used as a restore target
        mode, sha = \
            hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
                                            pack_writer.new_tree,
                                            files,
                                            keep_boundaries=opt.keep_boundaries,
                                            progress=prog)
        splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
        shalist = [(mode, splitfile_name, sha)]
    else:
        shalist = hashsplit.split_to_shalist(
                      pack_writer.new_blob, pack_writer.new_tree, files,
                      keep_boundaries=opt.keep_boundaries, progress=prog)
    tree = pack_writer.new_tree(shalist)
else:
    last = 0
    it = hashsplit.hashsplit_iter(files,
                                  keep_boundaries=opt.keep_boundaries,
                                  progress=prog)
    for (blob, level) in it:
        hashsplit.total_split += len(blob)
        if opt.copy:
            sys.stdout.write(str(blob))
Пример #16
0
    shalist = hashsplit.split_to_blobs(pack_writer.new_blob,
                                       files,
                                       keep_boundaries=opt.keep_boundaries,
                                       progress=prog)
    for (sha, size, level) in shalist:
        print sha.encode('hex')
        reprogress()
elif pack_writer:  # tree or commit or name
    if opt.name:  # insert dummy_name which may be used as a restore target
        mode, sha = \
            hashsplit.split_to_blob_or_tree(pack_writer.new_blob,
                                            pack_writer.new_tree,
                                            files,
                                            keep_boundaries=opt.keep_boundaries,
                                            progress=prog)
        splitfile_name = git.mangle_name('data', hashsplit.GIT_MODE_FILE, mode)
        shalist = [(mode, splitfile_name, sha)]
    else:
        shalist = hashsplit.split_to_shalist(
            pack_writer.new_blob,
            pack_writer.new_tree,
            files,
            keep_boundaries=opt.keep_boundaries,
            progress=prog)
    tree = pack_writer.new_tree(shalist)
else:
    last = 0
    it = hashsplit.hashsplit_iter(files,
                                  keep_boundaries=opt.keep_boundaries,
                                  progress=prog)
    for (blob, level) in it:
Пример #17
0
        if not oldtree:
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
            else:
                ent.validate(GIT_MODE_TREE, newtree)
            ent.repack()
        if exists and wasmissing:
            count += oldsize
        continue

    # it's not a directory
    id = None
    if hashvalid:
        id = ent.sha
        shalists[-1].append(
            (ent.gitmode, git.mangle_name(file, ent.mode, ent.gitmode), id))
    else:
        if stat.S_ISREG(ent.mode):
            try:
                f = hashsplit.open_noatime(ent.name)
            except (IOError, OSError), e:
                add_error(e)
                lastskip_name = ent.name
            else:
                try:
                    (mode, id) = hashsplit.split_to_blob_or_tree(
                        w.new_blob, w.new_tree, [f], keep_boundaries=False)
                except (IOError, OSError), e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
        else:
Пример #18
0
Файл: split.py Проект: gdt/bup
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])
    if opt.name: opt.name = argv_bytes(opt.name)
    if opt.remote: opt.remote = argv_bytes(opt.remote)
    if opt.verbose is None: opt.verbose = 0

    if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop
            or opt.copy):
        o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
    if opt.copy and (opt.blobs or opt.tree):
        o.fatal('--copy is incompatible with -b, -t')
    if (opt.noop or opt.copy) and (opt.commit or opt.name):
        o.fatal('--noop and --copy are incompatible with -c, -n')
    if opt.blobs and (opt.tree or opt.commit or opt.name):
        o.fatal('-b is incompatible with -t, -c, -n')
    if extra and opt.git_ids:
        o.fatal("don't provide filenames when using --git-ids")

    if opt.verbose >= 2:
        git.verbose = opt.verbose - 1
        opt.bench = 1

    max_pack_size = None
    if opt.max_pack_size:
        max_pack_size = parse_num(opt.max_pack_size)
    max_pack_objects = None
    if opt.max_pack_objects:
        max_pack_objects = parse_num(opt.max_pack_objects)

    if opt.fanout:
        hashsplit.fanout = parse_num(opt.fanout)
    if opt.blobs:
        hashsplit.fanout = 0
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)
    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    # Hack around lack of nonlocal vars in python 2
    total_bytes = [0]

    def prog(filenum, nbytes):
        total_bytes[0] += nbytes
        if filenum > 0:
            qprogress('Splitting: file #%d, %d kbytes\r' %
                      (filenum + 1, total_bytes[0] // 1024))
        else:
            qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))

    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
    if is_reverse and opt.remote:
        o.fatal("don't use -r in reverse mode; it's automatic")
    start_time = time.time()

    if opt.name and not valid_save_name(opt.name):
        o.fatal("'%r' is not a valid branch name." % opt.name)
    refname = opt.name and b'refs/heads/%s' % opt.name or None

    if opt.noop or opt.copy:
        cli = pack_writer = oldref = None
    elif opt.remote or is_reverse:
        git.check_repo_or_die()
        cli = client.Client(opt.remote)
        oldref = refname and cli.read_ref(refname) or None
        pack_writer = cli.new_packwriter(compression_level=opt.compress,
                                         max_pack_size=max_pack_size,
                                         max_pack_objects=max_pack_objects)
    else:
        git.check_repo_or_die()
        cli = None
        oldref = refname and git.read_ref(refname) or None
        pack_writer = git.PackWriter(compression_level=opt.compress,
                                     max_pack_size=max_pack_size,
                                     max_pack_objects=max_pack_objects)

    input = byte_stream(sys.stdin)

    if opt.git_ids:
        # the input is actually a series of git object ids that we should retrieve
        # and split.
        #
        # This is a bit messy, but basically it converts from a series of
        # CatPipe.get() iterators into a series of file-type objects.
        # It would be less ugly if either CatPipe.get() returned a file-like object
        # (not very efficient), or split_to_shalist() expected an iterator instead
        # of a file.
        cp = git.CatPipe()

        class IterToFile:
            def __init__(self, it):
                self.it = iter(it)

            def read(self, size):
                v = next(self.it, None)
                return v or b''

        def read_ids():
            while 1:
                line = input.readline()
                if not line:
                    break
                if line:
                    line = line.strip()
                try:
                    it = cp.get(line.strip())
                    next(it, None)  # skip the file info
                except KeyError as e:
                    add_error('error: %s' % e)
                    continue
                yield IterToFile(it)

        files = read_ids()
    else:
        # the input either comes from a series of files or from stdin.
        files = extra and (open(argv_bytes(fn), 'rb')
                           for fn in extra) or [input]

    if pack_writer:
        new_blob = pack_writer.new_blob
        new_tree = pack_writer.new_tree
    elif opt.blobs or opt.tree:
        # --noop mode
        new_blob = lambda content: git.calc_hash(b'blob', content)
        new_tree = lambda shalist: git.calc_hash(b'tree',
                                                 git.tree_encode(shalist))

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.blobs:
        shalist = hashsplit.split_to_blobs(new_blob,
                                           files,
                                           keep_boundaries=opt.keep_boundaries,
                                           progress=prog)
        for (sha, size, level) in shalist:
            out.write(hexlify(sha) + b'\n')
            reprogress()
    elif opt.tree or opt.commit or opt.name:
        if opt.name:  # insert dummy_name which may be used as a restore target
            mode, sha = \
                hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
                                                keep_boundaries=opt.keep_boundaries,
                                                progress=prog)
            splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE,
                                             mode)
            shalist = [(mode, splitfile_name, sha)]
        else:
            shalist = hashsplit.split_to_shalist(
                new_blob,
                new_tree,
                files,
                keep_boundaries=opt.keep_boundaries,
                progress=prog)
        tree = new_tree(shalist)
    else:
        last = 0
        it = hashsplit.hashsplit_iter(files,
                                      keep_boundaries=opt.keep_boundaries,
                                      progress=prog)
        for (blob, level) in it:
            hashsplit.total_split += len(blob)
            if opt.copy:
                sys.stdout.write(str(blob))
            megs = hashsplit.total_split // 1024 // 1024
            if not opt.quiet and last != megs:
                last = megs

    if opt.verbose:
        log('\n')
    if opt.tree:
        out.write(hexlify(tree) + b'\n')
    if opt.commit or opt.name:
        msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
        ref = opt.name and (b'refs/heads/%s' % opt.name) or None
        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
        commit = pack_writer.new_commit(tree, oldref, userline, date, None,
                                        userline, date, None, msg)
        if opt.commit:
            out.write(hexlify(commit) + b'\n')

    if pack_writer:
        pack_writer.close()  # must close before we can update the ref

    if opt.name:
        if cli:
            cli.update_ref(refname, commit, oldref)
        else:
            git.update_ref(refname, commit, oldref)

    if cli:
        cli.close()

    secs = time.time() - start_time
    size = hashsplit.total_split
    if opt.bench:
        log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' %
            (size / 1024, secs, size / 1024 / secs))

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Пример #19
0
def main(argv):
    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])
    if opt.name: opt.name = argv_bytes(opt.name)
    if opt.verbose is None: opt.verbose = 0

    if not (opt.blobs or opt.tree or opt.commit or opt.name or opt.noop
            or opt.copy):
        o.fatal("use one or more of -b, -t, -c, -n, --noop, --copy")
    if opt.copy and (opt.blobs or opt.tree):
        o.fatal('--copy is incompatible with -b, -t')
    if (opt.noop or opt.copy) and (opt.commit or opt.name):
        o.fatal('--noop and --copy are incompatible with -c, -n')
    if opt.blobs and (opt.tree or opt.commit or opt.name):
        o.fatal('-b is incompatible with -t, -c, -n')
    if extra and opt.git_ids:
        o.fatal("don't provide filenames when using --git-ids")

    if opt.verbose >= 2:
        git.verbose = opt.verbose - 1
        opt.bench = 1

    fanout = None
    if opt.fanout:
        # This used to be in hashsplit, but that's just confusing;
        # hashsplit now defaults to the real default (16) if 0 (or
        # None) is passed, but keep the command-line compatible...
        fanout = parse_num(opt.fanout) or 128
    blobbits = None
    if opt.blobbits:
        blobbits = parse_num(opt.blobbits)
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)
    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    # Hack around lack of nonlocal vars in python 2
    total_bytes = [0]

    def prog(filenum, nbytes):
        total_bytes[0] += nbytes
        if filenum > 0:
            qprogress('Splitting: file #%d, %d kbytes\r' %
                      (filenum + 1, total_bytes[0] // 1024))
        else:
            qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))

    start_time = time.time()

    if opt.name and not valid_save_name(opt.name):
        o.fatal("'%r' is not a valid branch name." % opt.name)
    refname = opt.name and b'refs/heads/%s' % opt.name or None

    if opt.noop or opt.copy:
        repo = oldref = None
    else:
        repo = from_opts(opt)
        oldref = refname and repo.read_ref(refname) or None
        repobits = repo.config(b'bup.blobbits',
                               opttype='int') or hashsplit.BUP_BLOBBITS
        if not blobbits:
            blobbits = repobits
        else:
            print("overriding repo blobbits %d from cmdline with %d" %
                  (repobits, blobbits))

    input = byte_stream(sys.stdin)

    if opt.git_ids:
        # the input is actually a series of git object ids that we should retrieve
        # and split.
        #
        # This is a bit messy, but basically it converts from a series of
        # repo.cat() iterators into a series of file-type objects.
        # It would be less ugly if either repo.cat() returned a file-like object
        # (not very efficient), or split_to_shalist() expected an iterator instead
        # of a file.
        class IterToFile:
            def __init__(self, it):
                self.it = iter(it)

            def read(self, size):
                v = next(self.it, None)
                return v or b''

        def read_ids():
            while 1:
                line = input.readline()
                if not line:
                    break
                if line:
                    line = line.strip()
                try:
                    it = repo.cat(line.strip())
                    next(it, None)  # skip the file info
                except KeyError as e:
                    add_error('error: %s' % e)
                    continue
                yield IterToFile(it)

        files = read_ids()
    else:
        # the input either comes from a series of files or from stdin.
        files = extra and (open(argv_bytes(fn), 'rb')
                           for fn in extra) or [input]

    if repo:
        write_data = repo.write_data
        write_tree = repo.write_tree
    elif opt.blobs or opt.tree:
        # --noop mode
        write_data = lambda content: git.calc_hash(b'blob', content)
        write_tree = lambda shalist: git.calc_hash(b'tree',
                                                   git.tree_encode(shalist))

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.blobs:
        shalist = hashsplit.split_to_blobs(write_data,
                                           files,
                                           keep_boundaries=opt.keep_boundaries,
                                           progress=prog,
                                           blobbits=blobbits)
        for (sha, size, level) in shalist:
            out.write(hexlify(sha) + b'\n')
            reprogress()
    elif opt.tree or opt.commit or opt.name:
        if opt.name:  # insert dummy_name which may be used as a restore target
            mode, sha = \
                hashsplit.split_to_blob_or_tree(write_data, write_tree, files,
                                                keep_boundaries=opt.keep_boundaries,
                                                progress=prog, fanout=fanout,
                                                blobbits=blobbits)
            splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE,
                                             mode)
            shalist = [(mode, splitfile_name, sha)]
        else:
            shalist = hashsplit.split_to_shalist(
                write_data,
                write_tree,
                files,
                keep_boundaries=opt.keep_boundaries,
                progress=prog,
                fanout=fanout,
                blobbits=blobbits)
        tree = write_tree(shalist)
    else:
        last = 0
        it = hashsplit.hashsplit_iter(files,
                                      keep_boundaries=opt.keep_boundaries,
                                      progress=prog,
                                      fanout=fanout,
                                      blobbits=blobbits)
        for (blob, level) in it:
            hashsplit.total_split += len(blob)
            if opt.copy:
                sys.stdout.write(str(blob))
            megs = hashsplit.total_split // 1024 // 1024
            if not opt.quiet and last != megs:
                last = megs

    if opt.verbose:
        log('\n')
    if opt.tree:
        out.write(hexlify(tree) + b'\n')
    if opt.commit or opt.name:
        msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
        ref = opt.name and (b'refs/heads/%s' % opt.name) or None
        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
        commit = repo.write_commit(tree, oldref, userline, date, None,
                                   userline, date, None, msg)
        if opt.commit:
            out.write(hexlify(commit) + b'\n')

    if opt.name and repo:
        repo.update_ref(refname, commit, oldref)

    if repo:
        repo.close()

    secs = time.time() - start_time
    size = hashsplit.total_split
    if opt.bench:
        log('bup: %.2f kbytes in %.2f secs = %.2f kbytes/sec\n' %
            (size / 1024, secs, size / 1024 / secs))

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Пример #20
0
        newtree = _pop(force_tree=oldtree)
        if not oldtree:
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
            else:
                ent.validate(GIT_MODE_TREE, newtree)
            ent.repack()
        if exists and wasmissing:
            count += oldsize
        continue

    # it's not a directory
    id = None
    if hashvalid:
        id = ent.sha
        git_name = git.mangle_name(file, ent.mode, ent.gitmode)
        git_info = (ent.gitmode, git_name, id)
        shalists[-1].append(git_info)
        sort_key = git.shalist_item_sort_key((ent.mode, file, id))
        meta = msr.metadata_at(ent.meta_ofs)
        meta.hardlink_target = find_hardlink_target(hlink_db, ent)
        # Restore the times that were cleared to 0 in the metastore.
        (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
        metalists[-1].append((sort_key, meta))
    else:
        if stat.S_ISREG(ent.mode):
            try:
                f = hashsplit.open_noatime(ent.name)
            except (IOError, OSError), e:
                add_error(e)
                lastskip_name = ent.name
Пример #21
0
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
            else:
                ent.validate(040000, newtree)
            ent.repack()
        if exists and wasmissing:
            count += oldsize
        continue

    # it's not a directory
    id = None
    if hashvalid:
        mode = '%o' % ent.gitmode
        id = ent.sha
        shalists[-1].append((mode, 
                             git.mangle_name(file, ent.mode, ent.gitmode),
                             id))
    else:
        if stat.S_ISREG(ent.mode):
            try:
                f = hashsplit.open_noatime(ent.name)
            except IOError, e:
                add_error(e)
                lastskip_name = ent.name
            except OSError, e:
                add_error(e)
                lastskip_name = ent.name
            else:
                (mode, id) = hashsplit.split_to_blob_or_tree(w, [f])
        else:
            if stat.S_ISDIR(ent.mode):
Пример #22
0
        oldtree = already_saved(ent) # may be None
        newtree = _pop(force_tree = oldtree)
        if not oldtree:
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
            else:
                ent.validate(GIT_MODE_TREE, newtree)
            ent.repack()
        if exists and wasmissing:
            count += oldsize
        continue

    # it's not a directory
    if hashvalid:
        id = ent.sha
        git_name = git.mangle_name(file, ent.mode, ent.gitmode)
        git_info = (ent.gitmode, git_name, id)
        shalists[-1].append(git_info)
        sort_key = git.shalist_item_sort_key((ent.mode, file, id))
        meta = msr.metadata_at(ent.meta_ofs)
        meta.hardlink_target = find_hardlink_target(hlink_db, ent)
        # Restore the times that were cleared to 0 in the metastore.
        (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime, ent.ctime)
        metalists[-1].append((sort_key, meta))
    else:
        id = None
        if stat.S_ISREG(ent.mode):
            try:
                with hashsplit.open_noatime(ent.name) as f:
                    (mode, id) = hashsplit.split_to_blob_or_tree(
                                            w.new_blob, w.new_tree, [f],
Пример #23
0
def main(argv):

    # Hack around lack of nonlocal vars in python 2
    _nonlocal = {}

    o = options.Options(optspec)
    opt, flags, extra = o.parse_bytes(argv[1:])

    if opt.indexfile:
        opt.indexfile = argv_bytes(opt.indexfile)
    if opt.name:
        opt.name = argv_bytes(opt.name)
    if opt.remote:
        opt.remote = argv_bytes(opt.remote)
    if opt.strip_path:
        opt.strip_path = argv_bytes(opt.strip_path)

    git.check_repo_or_die()
    if not (opt.tree or opt.commit or opt.name):
        o.fatal("use one or more of -t, -c, -n")
    if not extra:
        o.fatal("no filenames given")

    extra = [argv_bytes(x) for x in extra]

    opt.progress = (istty2 and not opt.quiet)
    opt.smaller = parse_num(opt.smaller or 0)
    if opt.bwlimit:
        client.bwlimit = parse_num(opt.bwlimit)

    if opt.date:
        date = parse_date_or_fatal(opt.date, o.fatal)
    else:
        date = time.time()

    if opt.strip and opt.strip_path:
        o.fatal("--strip is incompatible with --strip-path")

    graft_points = []
    if opt.graft:
        if opt.strip:
            o.fatal("--strip is incompatible with --graft")

        if opt.strip_path:
            o.fatal("--strip-path is incompatible with --graft")

        for (option, parameter) in flags:
            if option == "--graft":
                parameter = argv_bytes(parameter)
                splitted_parameter = parameter.split(b'=')
                if len(splitted_parameter) != 2:
                    o.fatal(
                        "a graft point must be of the form old_path=new_path")
                old_path, new_path = splitted_parameter
                if not (old_path and new_path):
                    o.fatal("a graft point cannot be empty")
                graft_points.append(
                    (resolve_parent(old_path), resolve_parent(new_path)))

    is_reverse = environ.get(b'BUP_SERVER_REVERSE')
    if is_reverse and opt.remote:
        o.fatal("don't use -r in reverse mode; it's automatic")

    name = opt.name
    if name and not valid_save_name(name):
        o.fatal("'%s' is not a valid branch name" % path_msg(name))
    refname = name and b'refs/heads/%s' % name or None
    if opt.remote or is_reverse:
        try:
            cli = client.Client(opt.remote)
        except client.ClientError as e:
            log('error: %s' % e)
            sys.exit(1)
        oldref = refname and cli.read_ref(refname) or None
        w = cli.new_packwriter(compression_level=opt.compress)
    else:
        cli = None
        oldref = refname and git.read_ref(refname) or None
        w = git.PackWriter(compression_level=opt.compress)

    handle_ctrl_c()

    # Metadata is stored in a file named .bupm in each directory.  The
    # first metadata entry will be the metadata for the current directory.
    # The remaining entries will be for each of the other directory
    # elements, in the order they're listed in the index.
    #
    # Since the git tree elements are sorted according to
    # git.shalist_item_sort_key, the metalist items are accumulated as
    # (sort_key, metadata) tuples, and then sorted when the .bupm file is
    # created.  The sort_key should have been computed using the element's
    # mangled name and git mode (after hashsplitting), but the code isn't
    # actually doing that but rather uses the element's real name and mode.
    # This makes things a bit more difficult when reading it back, see
    # vfs.ordered_tree_entries().

    # Maintain a stack of information representing the current location in
    # the archive being constructed.  The current path is recorded in
    # parts, which will be something like ['', 'home', 'someuser'], and
    # the accumulated content and metadata for of the dirs in parts is
    # stored in parallel stacks in shalists and metalists.

    parts = []  # Current archive position (stack of dir names).
    shalists = []  # Hashes for each dir in paths.
    metalists = []  # Metadata for each dir in paths.

    def _push(part, metadata):
        # Enter a new archive directory -- make it the current directory.
        parts.append(part)
        shalists.append([])
        metalists.append([(b'', metadata)])  # This dir's metadata (no name).

    def _pop(force_tree, dir_metadata=None):
        # Leave the current archive directory and add its tree to its parent.
        assert (len(parts) >= 1)
        part = parts.pop()
        shalist = shalists.pop()
        metalist = metalists.pop()
        # FIXME: only test if collision is possible (i.e. given --strip, etc.)?
        if force_tree:
            tree = force_tree
        else:
            names_seen = set()
            clean_list = []
            metaidx = 1  # entry at 0 is for the dir
            for x in shalist:
                name = x[1]
                if name in names_seen:
                    parent_path = b'/'.join(parts) + b'/'
                    add_error('error: ignoring duplicate path %s in %s' %
                              (path_msg(name), path_msg(parent_path)))
                    if not stat.S_ISDIR(x[0]):
                        del metalist[metaidx]
                else:
                    names_seen.add(name)
                    clean_list.append(x)
                    if not stat.S_ISDIR(x[0]):
                        metaidx += 1

            if dir_metadata:  # Override the original metadata pushed for this dir.
                metalist = [(b'', dir_metadata)] + metalist[1:]
            sorted_metalist = sorted(metalist, key=lambda x: x[0])
            metadata = b''.join([m[1].encode() for m in sorted_metalist])
            metadata_f = BytesIO(metadata)
            mode, id = hashsplit.split_to_blob_or_tree(w.new_blob,
                                                       w.new_tree,
                                                       [metadata_f],
                                                       keep_boundaries=False)
            clean_list.append((mode, b'.bupm', id))

            tree = w.new_tree(clean_list)
        if shalists:
            shalists[-1].append((GIT_MODE_TREE,
                                 git.mangle_name(part, GIT_MODE_TREE,
                                                 GIT_MODE_TREE), tree))
        return tree

    _nonlocal['count'] = 0
    _nonlocal['subcount'] = 0
    _nonlocal['lastremain'] = None

    def progress_report(n):
        _nonlocal['subcount'] += n
        cc = _nonlocal['count'] + _nonlocal['subcount']
        pct = total and (cc * 100.0 / total) or 0
        now = time.time()
        elapsed = now - tstart
        kps = elapsed and int(cc / 1024. / elapsed)
        kps_frac = 10**int(math.log(kps + 1, 10) - 1)
        kps = int(kps / kps_frac) * kps_frac
        if cc:
            remain = elapsed * 1.0 / cc * (total - cc)
        else:
            remain = 0.0
        if (_nonlocal['lastremain'] and (remain > _nonlocal['lastremain']) and
            ((remain - _nonlocal['lastremain']) / _nonlocal['lastremain'] <
             0.05)):
            remain = _nonlocal['lastremain']
        else:
            _nonlocal['lastremain'] = remain
        hours = int(remain / 60 / 60)
        mins = int(remain / 60 - hours * 60)
        secs = int(remain - hours * 60 * 60 - mins * 60)
        if elapsed < 30:
            remainstr = ''
            kpsstr = ''
        else:
            kpsstr = '%dk/s' % kps
            if hours:
                remainstr = '%dh%dm' % (hours, mins)
            elif mins:
                remainstr = '%dm%d' % (mins, secs)
            else:
                remainstr = '%ds' % secs
        qprogress(
            'Saving: %.2f%% (%d/%dk, %d/%d files) %s %s\r' %
            (pct, cc / 1024, total / 1024, fcount, ftotal, remainstr, kpsstr))

    indexfile = opt.indexfile or git.repo(b'bupindex')
    r = index.Reader(indexfile)
    try:
        msr = index.MetaStoreReader(indexfile + b'.meta')
    except IOError as ex:
        if ex.errno != EACCES:
            raise
        log('error: cannot access %r; have you run bup index?' %
            path_msg(indexfile))
        sys.exit(1)
    hlink_db = hlinkdb.HLinkDB(indexfile + b'.hlink')

    def already_saved(ent):
        return ent.is_valid() and w.exists(ent.sha) and ent.sha

    def wantrecurse_pre(ent):
        return not already_saved(ent)

    def wantrecurse_during(ent):
        return not already_saved(ent) or ent.sha_missing()

    def find_hardlink_target(hlink_db, ent):
        if hlink_db and not stat.S_ISDIR(ent.mode) and ent.nlink > 1:
            link_paths = hlink_db.node_paths(ent.dev, ent.ino)
            if link_paths:
                return link_paths[0]

    total = ftotal = 0
    if opt.progress:
        for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_pre):
            if not (ftotal % 10024):
                qprogress('Reading index: %d\r' % ftotal)
            exists = ent.exists()
            hashvalid = already_saved(ent)
            ent.set_sha_missing(not hashvalid)
            if not opt.smaller or ent.size < opt.smaller:
                if exists and not hashvalid:
                    total += ent.size
            ftotal += 1
        progress('Reading index: %d, done.\n' % ftotal)
        hashsplit.progress_callback = progress_report

    # Root collisions occur when strip or graft options map more than one
    # path to the same directory (paths which originally had separate
    # parents).  When that situation is detected, use empty metadata for
    # the parent.  Otherwise, use the metadata for the common parent.
    # Collision example: "bup save ... --strip /foo /foo/bar /bar".

    # FIXME: Add collision tests, or handle collisions some other way.

    # FIXME: Detect/handle strip/graft name collisions (other than root),
    # i.e. if '/foo/bar' and '/bar' both map to '/'.

    first_root = None
    root_collision = None
    tstart = time.time()
    fcount = 0
    lastskip_name = None
    lastdir = b''
    for (transname, ent) in r.filter(extra, wantrecurse=wantrecurse_during):
        (dir, file) = os.path.split(ent.name)
        exists = (ent.flags & index.IX_EXISTS)
        hashvalid = already_saved(ent)
        wasmissing = ent.sha_missing()
        oldsize = ent.size
        if opt.verbose:
            if not exists:
                status = 'D'
            elif not hashvalid:
                if ent.sha == index.EMPTY_SHA:
                    status = 'A'
                else:
                    status = 'M'
            else:
                status = ' '
            if opt.verbose >= 2:
                log('%s %-70s\n' % (status, path_msg(ent.name)))
            elif not stat.S_ISDIR(ent.mode) and lastdir != dir:
                if not lastdir.startswith(dir):
                    log('%s %-70s\n' %
                        (status, path_msg(os.path.join(dir, b''))))
                lastdir = dir

        if opt.progress:
            progress_report(0)
        fcount += 1

        if not exists:
            continue
        if opt.smaller and ent.size >= opt.smaller:
            if exists and not hashvalid:
                if opt.verbose:
                    log('skipping large file "%s"\n' % path_msg(ent.name))
                lastskip_name = ent.name
            continue

        assert (dir.startswith(b'/'))
        if opt.strip:
            dirp = stripped_path_components(dir, extra)
        elif opt.strip_path:
            dirp = stripped_path_components(dir, [opt.strip_path])
        elif graft_points:
            dirp = grafted_path_components(graft_points, dir)
        else:
            dirp = path_components(dir)

        # At this point, dirp contains a representation of the archive
        # path that looks like [(archive_dir_name, real_fs_path), ...].
        # So given "bup save ... --strip /foo/bar /foo/bar/baz", dirp
        # might look like this at some point:
        #   [('', '/foo/bar'), ('baz', '/foo/bar/baz'), ...].

        # This dual representation supports stripping/grafting, where the
        # archive path may not have a direct correspondence with the
        # filesystem.  The root directory is represented by an initial
        # component named '', and any component that doesn't have a
        # corresponding filesystem directory (due to grafting, for
        # example) will have a real_fs_path of None, i.e. [('', None),
        # ...].

        if first_root == None:
            first_root = dirp[0]
        elif first_root != dirp[0]:
            root_collision = True

        # If switching to a new sub-tree, finish the current sub-tree.
        while parts > [x[0] for x in dirp]:
            _pop(force_tree=None)

        # If switching to a new sub-tree, start a new sub-tree.
        for path_component in dirp[len(parts):]:
            dir_name, fs_path = path_component
            # Not indexed, so just grab the FS metadata or use empty metadata.
            try:
                meta = metadata.from_path(fs_path, normalized=True) \
                    if fs_path else metadata.Metadata()
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = dir_name
                meta = metadata.Metadata()
            _push(dir_name, meta)

        if not file:
            if len(parts) == 1:
                continue  # We're at the top level -- keep the current root dir
            # Since there's no filename, this is a subdir -- finish it.
            oldtree = already_saved(ent)  # may be None
            newtree = _pop(force_tree=oldtree)
            if not oldtree:
                if lastskip_name and lastskip_name.startswith(ent.name):
                    ent.invalidate()
                else:
                    ent.validate(GIT_MODE_TREE, newtree)
                ent.repack()
            if exists and wasmissing:
                _nonlocal['count'] += oldsize
            continue

        # it's not a directory
        if hashvalid:
            id = ent.sha
            git_name = git.mangle_name(file, ent.mode, ent.gitmode)
            git_info = (ent.gitmode, git_name, id)
            shalists[-1].append(git_info)
            sort_key = git.shalist_item_sort_key((ent.mode, file, id))
            meta = msr.metadata_at(ent.meta_ofs)
            meta.hardlink_target = find_hardlink_target(hlink_db, ent)
            # Restore the times that were cleared to 0 in the metastore.
            (meta.atime, meta.mtime, meta.ctime) = (ent.atime, ent.mtime,
                                                    ent.ctime)
            metalists[-1].append((sort_key, meta))
        else:
            id = None
            hlink = find_hardlink_target(hlink_db, ent)
            try:
                meta = metadata.from_path(
                    ent.name,
                    hardlink_target=hlink,
                    normalized=True,
                    after_stat=after_nondir_metadata_stat)
            except (OSError, IOError) as e:
                add_error(e)
                lastskip_name = ent.name
                continue
            if stat.S_IFMT(ent.mode) != stat.S_IFMT(meta.mode):
                # The mode changed since we indexed the file, this is bad.
                # This can cause two issues:
                # 1) We e.g. think the file is a regular file, but now it's
                #    something else (a device, socket, FIFO or symlink, etc.)
                #    and _read_ from it when we shouldn't.
                # 2) We then record it as valid, but don't update the index
                #    metadata, and on a subsequent save it has 'hashvalid'
                #    but is recorded as the file type from the index, when
                #    the content is something else ...
                # Avoid all of these consistency issues by just skipping such
                # things - it really ought to not happen anyway.
                add_error("%s: mode changed since indexing, skipping." %
                          path_msg(ent.name))
                lastskip_name = ent.name
                continue
            if stat.S_ISREG(ent.mode):
                try:
                    # If the file changes while we're reading it, then our reading
                    # may stop at some point, but the stat() above may have gotten
                    # a different size already. Recalculate the meta size so that
                    # the repository records the accurate size in the metadata, even
                    # if the other stat() data might be slightly older than the file
                    # content (which we can't fix, this is inherently racy, but we
                    # can prevent the size mismatch.)
                    meta.size = 0

                    def new_blob(data):
                        meta.size += len(data)
                        return w.new_blob(data)

                    before_saving_regular_file(ent.name)
                    with hashsplit.open_noatime(ent.name) as f:
                        (mode, id) = hashsplit.split_to_blob_or_tree(
                            new_blob, w.new_tree, [f], keep_boundaries=False)
                except (IOError, OSError) as e:
                    add_error('%s: %s' % (ent.name, e))
                    lastskip_name = ent.name
            elif stat.S_ISDIR(ent.mode):
                assert (0)  # handled above
            elif stat.S_ISLNK(ent.mode):
                mode, id = (GIT_MODE_SYMLINK, w.new_blob(meta.symlink_target))
            else:
                # Everything else should be fully described by its
                # metadata, so just record an empty blob, so the paths
                # in the tree and .bupm will match up.
                (mode, id) = (GIT_MODE_FILE, w.new_blob(b''))

            if id:
                ent.validate(mode, id)
                ent.repack()
                git_name = git.mangle_name(file, ent.mode, ent.gitmode)
                git_info = (mode, git_name, id)
                shalists[-1].append(git_info)
                sort_key = git.shalist_item_sort_key((ent.mode, file, id))
                metalists[-1].append((sort_key, meta))

        if exists and wasmissing:
            _nonlocal['count'] += oldsize
            _nonlocal['subcount'] = 0

    if opt.progress:
        pct = total and _nonlocal['count'] * 100.0 / total or 100
        progress(
            'Saving: %.2f%% (%d/%dk, %d/%d files), done.    \n' %
            (pct, _nonlocal['count'] / 1024, total / 1024, fcount, ftotal))

    while len(parts) > 1:  # _pop() all the parts above the root
        _pop(force_tree=None)
    assert (len(shalists) == 1)
    assert (len(metalists) == 1)

    # Finish the root directory.
    tree = _pop(
        force_tree=None,
        # When there's a collision, use empty metadata for the root.
        dir_metadata=metadata.Metadata() if root_collision else None)

    sys.stdout.flush()
    out = byte_stream(sys.stdout)

    if opt.tree:
        out.write(hexlify(tree))
        out.write(b'\n')
    if opt.commit or name:
        if compat.py_maj > 2:
            # Strip b prefix from python 3 bytes reprs to preserve previous format
            msgcmd = b'[%s]' % b', '.join(
                [repr(argv_bytes(x))[1:].encode('ascii') for x in argv])
        else:
            msgcmd = repr(argv)
        msg = b'bup save\n\nGenerated by command:\n%s\n' % msgcmd
        userline = (b'%s <%s@%s>' % (userfullname(), username(), hostname()))
        commit = w.new_commit(tree, oldref, userline, date, None, userline,
                              date, None, msg)
        if opt.commit:
            out.write(hexlify(commit))
            out.write(b'\n')

    msr.close()
    w.close()  # must close before we can update the ref

    if opt.name:
        if cli:
            cli.update_ref(refname, commit, oldref)
        else:
            git.update_ref(refname, commit, oldref)

    if cli:
        cli.close()

    if saved_errors:
        log('WARNING: %d errors encountered while saving.\n' %
            len(saved_errors))
        sys.exit(1)
Пример #24
0
def split(opt, files, parent, out, pack_writer):
    # Hack around lack of nonlocal vars in python 2
    total_bytes = [0]

    def prog(filenum, nbytes):
        total_bytes[0] += nbytes
        if filenum > 0:
            qprogress('Splitting: file #%d, %d kbytes\r' %
                      (filenum + 1, total_bytes[0] // 1024))
        else:
            qprogress('Splitting: %d kbytes\r' % (total_bytes[0] // 1024))

    new_blob = pack_writer.new_blob
    new_tree = pack_writer.new_tree
    if opt.blobs:
        shalist = hashsplit.split_to_blobs(new_blob,
                                           files,
                                           keep_boundaries=opt.keep_boundaries,
                                           progress=prog)
        for sha, size, level in shalist:
            out.write(hexlify(sha) + b'\n')
            reprogress()
    elif opt.tree or opt.commit or opt.name:
        if opt.name:  # insert dummy_name which may be used as a restore target
            mode, sha = \
                hashsplit.split_to_blob_or_tree(new_blob, new_tree, files,
                                                keep_boundaries=opt.keep_boundaries,
                                                progress=prog)
            splitfile_name = git.mangle_name(b'data', hashsplit.GIT_MODE_FILE,
                                             mode)
            shalist = [(mode, splitfile_name, sha)]
        else:
            shalist = \
                hashsplit.split_to_shalist(new_blob, new_tree, files,
                                           keep_boundaries=opt.keep_boundaries,
                                           progress=prog)
        tree = new_tree(shalist)
    else:
        last = 0
        it = hashsplit.hashsplit_iter(files,
                                      keep_boundaries=opt.keep_boundaries,
                                      progress=prog)
        for blob, level in it:
            hashsplit.total_split += len(blob)
            if opt.copy:
                sys.stdout.write(str(blob))
            megs = hashsplit.total_split // 1024 // 1024
            if not opt.quiet and last != megs:
                last = megs

    if opt.verbose:
        log('\n')
    if opt.tree:
        out.write(hexlify(tree) + b'\n')

    commit = None
    if opt.commit or opt.name:
        msg = b'bup split\n\nGenerated by command:\n%r\n' % compat.get_argvb()
        userline = b'%s <%s@%s>' % (userfullname(), username(), hostname())
        commit = pack_writer.new_commit(tree, parent, userline, opt.date, None,
                                        userline, opt.date, None, msg)
        if opt.commit:
            out.write(hexlify(commit) + b'\n')

    return commit
Пример #25
0
        if not oldtree:
            if lastskip_name and lastskip_name.startswith(ent.name):
                ent.invalidate()
            else:
                ent.validate(GIT_MODE_TREE, newtree)
            ent.repack()
        if exists and wasmissing:
            count += oldsize
        continue

    # it's not a directory
    id = None
    if hashvalid:
        id = ent.sha
        shalists[-1].append((ent.gitmode, 
                             git.mangle_name(file, ent.mode, ent.gitmode),
                             id))
    else:
        if stat.S_ISREG(ent.mode):
            try:
                f = hashsplit.open_noatime(ent.name)
            except (IOError, OSError), e:
                add_error(e)
                lastskip_name = ent.name
            else:
                try:
                    (mode, id) = hashsplit.split_to_blob_or_tree(
                                            w.new_blob, w.new_tree, [f],
                                            keep_boundaries=False)
                except (IOError, OSError), e:
                    add_error('%s: %s' % (ent.name, e))