Пример #1
0
 def test_delta_medium_object(self):
     # This tests an object set that will have a copy operation
     # 2**20 in size.
     with self.get_pack(pack1_sha) as orig_pack:
         orig_blob = orig_pack[a_sha]
         new_blob = Blob()
         new_blob.data = orig_blob.data + (b'x' * 2**20)
         new_blob_2 = Blob()
         new_blob_2.data = new_blob.data + b'y'
         all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
                                                        (new_blob_2, None)]
     pack_path = os.path.join(self._tempdir, 'pack_with_deltas')
     write_pack(pack_path, all_to_pack, deltify=True)
     output = run_git_or_fail(['verify-pack', '-v', pack_path])
     self.assertEqual(set(x[0].id for x in all_to_pack),
                      _git_verify_pack_object_list(output))
     # We specifically made a new blob that should be a delta
     # against the blob a_sha, so make sure we really got only 3
     # non-delta objects:
     got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
     self.assertEqual(
         3, got_non_delta,
         'Expected 3 non-delta objects, got %d' % got_non_delta)
     # We expect one object to have a delta chain length of two
     # (new_blob_2), so let's verify that actually happens:
     self.assertIn(b'chain length = 2', output)
Пример #2
0
 def test_delta_large_object(self):
     # This tests an object set that will have a copy operation
     # 2**25 in size. This is a copy large enough that it requires
     # two copy operations in git's binary delta format.
     raise SkipTest("skipping slow, large test")
     with self.get_pack(pack1_sha) as orig_pack:
         new_blob = Blob()
         new_blob.data = "big blob" + ("x" * 2**25)
         new_blob_2 = Blob()
         new_blob_2.data = new_blob.data + "y"
         all_to_pack = list(orig_pack.pack_tuples()) + [
             (new_blob, None),
             (new_blob_2, None),
         ]
     pack_path = os.path.join(self._tempdir, "pack_with_deltas")
     write_pack(pack_path, all_to_pack, deltify=True)
     output = run_git_or_fail(["verify-pack", "-v", pack_path])
     self.assertEqual(
         {x[0].id
          for x in all_to_pack},
         _git_verify_pack_object_list(output),
     )
     # We specifically made a new blob that should be a delta
     # against the blob a_sha, so make sure we really got only 4
     # non-delta objects:
     got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta"))
     self.assertEqual(
         4,
         got_non_delta,
         "Expected 4 non-delta objects, got %d" % got_non_delta,
     )
Пример #3
0
 def test_delta_large_object(self):
     # This tests an object set that will have a copy operation
     # 2**25 in size. This is a copy large enough that it requires
     # two copy operations in git's binary delta format.
     raise SkipTest('skipping slow, large test')
     orig_pack = self.get_pack(pack1_sha)
     orig_blob = orig_pack[a_sha]
     new_blob = Blob()
     new_blob.data = 'big blob' + ('x' * 2**25)
     new_blob_2 = Blob()
     new_blob_2.data = new_blob.data + 'y'
     all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None),
                                                    (new_blob_2, None)]
     pack_path = os.path.join(self._tempdir, "pack_with_deltas")
     write_pack(pack_path, all_to_pack, deltify=True)
     output = run_git_or_fail(['verify-pack', '-v', pack_path])
     self.assertEqual(set(x[0].id for x in all_to_pack),
                      _git_verify_pack_object_list(output))
     # We specifically made a new blob that should be a delta
     # against the blob a_sha, so make sure we really got only 4
     # non-delta objects:
     got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta'))
     self.assertEqual(
         4, got_non_delta,
         'Expected 4 non-delta objects, got %d' % got_non_delta)
Пример #4
0
def symlink_to_blob(symlink_target):
    from dulwich.objects import Blob
    blob = Blob()
    if isinstance(symlink_target, str):
        symlink_target = symlink_target.encode('utf-8')
    blob.data = symlink_target
    return blob
Пример #5
0
    def _tree_from_structure(self, structure):
        # TODO : Support directories
        tree = Tree()

        for file_info in structure:

            # str only
            try:
                data = file_info['data'].encode('ascii')
                name = file_info['name'].encode('ascii')
                mode = file_info['mode']
            except:
                # Skip file on encoding errors
                continue

            blob = Blob()

            blob.data = data

            # Store file's contents
            self.repo.object_store.add_object(blob)

            # Add blob entry
            tree.add(
                name,
                mode,
                blob.id
            )

        # Store tree
        self.repo.object_store.add_object(tree)

        return tree.id
Пример #6
0
    def test_emit_commit(self):
        b = Blob()
        b.data = "FOO"
        t = Tree()
        t.add("foo", stat.S_IFREG | 0o644, b.id)
        c = Commit()
        c.committer = c.author = "Jelmer <jelmer@host>"
        c.author_time = c.commit_time = 1271345553
        c.author_timezone = c.commit_timezone = 0
        c.message = "msg"
        c.tree = t.id
        self.store.add_objects([(b, None), (t, None), (c, None)])
        self.fastexporter.emit_commit(c, "refs/heads/master")
        self.assertEqual(
            """blob
mark :1
data 3
FOO
commit refs/heads/master
mark :2
author Jelmer <jelmer@host> 1271345553 +0000
committer Jelmer <jelmer@host> 1271345553 +0000
data 3
msg
M 644 1 foo
""", self.stream.getvalue())
Пример #7
0
def directory_to_tree(path,
                      children,
                      lookup_ie_sha1,
                      unusual_modes,
                      empty_file_name,
                      allow_empty=False):
    """Create a Git Tree object from a Bazaar directory.

    :param path: directory path
    :param children: Children inventory entries
    :param lookup_ie_sha1: Lookup the Git SHA1 for a inventory entry
    :param unusual_modes: Dictionary with unusual file modes by file ids
    :param empty_file_name: Name to use for dummy files in empty directories,
        None to ignore empty directories.
    """
    tree = Tree()
    for value in children:
        if value.name in BANNED_FILENAMES:
            continue
        child_path = osutils.pathjoin(path, value.name)
        try:
            mode = unusual_modes[child_path]
        except KeyError:
            mode = entry_mode(value)
        hexsha = lookup_ie_sha1(child_path, value)
        if hexsha is not None:
            tree.add(value.name.encode("utf-8"), mode, hexsha)
    if not allow_empty and len(tree) == 0:
        # Only the root can be an empty tree
        if empty_file_name is not None:
            tree.add(empty_file_name, stat.S_IFREG | 0o644, Blob().id)
        else:
            return None
    return tree
Пример #8
0
 def ie_to_hexsha(path, ie):
     try:
         return shamap[path]
     except KeyError:
         pass
     # FIXME: Should be the same as in parent
     if ie.kind in ("file", "symlink"):
         try:
             return idmap.lookup_blob_id(ie.file_id, ie.revision)
         except KeyError:
             # no-change merge ?
             blob = Blob()
             blob.data = tree.get_file_text(path)
             if add_cache_entry is not None:
                 add_cache_entry(blob, (ie.file_id, ie.revision), path)
             return blob.id
     elif ie.kind == "directory":
         # Not all cache backends store the tree information,
         # calculate again from scratch
         ret = directory_to_tree(path, ie.children.values(), ie_to_hexsha,
                                 unusual_modes, dummy_file_name,
                                 ie.parent_id is None)
         if ret is None:
             return ret
         return ret.id
     else:
         raise AssertionError
Пример #9
0
    def stage(self, paths):
        """Stage a set of paths.

        :param paths: List of paths, relative to the repository path
        """
        if isinstance(paths, basestring):
            paths = [paths]
        from dulwich.index import index_entry_from_stat
        index = self.open_index()
        for path in paths:
            full_path = os.path.join(self.path, path)
            try:
                st = os.stat(full_path)
            except OSError:
                # File no longer exists
                try:
                    del index[path]
                except KeyError:
                    pass  # already removed
            else:
                blob = Blob()
                f = open(full_path, 'rb')
                try:
                    blob.data = f.read()
                finally:
                    f.close()
                self.object_store.add_object(blob)
                index[path] = index_entry_from_stat(st, blob.id, 0)
        index.write()
Пример #10
0
 def test_set_chunks(self):
     b = Blob()
     b.chunked = [b'te', b'st', b' 5\n']
     self.assertEqual(b'test 5\n', b.data)
     b.chunked = [b'te', b'st', b' 6\n']
     self.assertEqual(b'test 6\n', b.as_raw_string())
     self.assertEqual(b'test 6\n', bytes(b))
Пример #11
0
    def stage(self, paths):
        """Stage a set of paths.

        :param paths: List of paths, relative to the repository path
        """
        from dulwich.index import cleanup_mode
        index = self.open_index()
        for path in paths:
            full_path = os.path.join(self.path, path)
            blob = Blob()
            try:
                st = os.stat(full_path)
            except OSError:
                # File no longer exists
                try:
                    del index[path]
                except KeyError:
                    pass  # Doesn't exist in the index either
            else:
                f = open(full_path, 'rb')
                try:
                    blob.data = f.read()
                finally:
                    f.close()
                self.object_store.add_object(blob)
                # XXX: Cleanup some of the other file properties as well?
                index[path] = (st.st_ctime, st.st_mtime, st.st_dev, st.st_ino,
                    cleanup_mode(st.st_mode), st.st_uid, st.st_gid, st.st_size,
                    blob.id, 0)
        index.write()
Пример #12
0
def symlink_to_blob(symlink_target):
    from dulwich.objects import Blob
    blob = Blob()
    if isinstance(symlink_target, str):
        symlink_target = encode_git_path(symlink_target)
    blob.data = symlink_target
    return blob
Пример #13
0
def blob_from_path(basepath, path):
    """Returns a tuple of (sha_id, mode, blob)
    """
    fullpath = os.path.join(basepath, path)
    with open(fullpath, 'rb') as working_file:
        blob = Blob()
        blob.data = working_file.read()
    return (path, os.stat(fullpath).st_mode, blob)
Пример #14
0
 def test_single_blob(self):
     blob = Blob()
     blob.data = b"foo"
     self.store.add_object(blob)
     blobs = [(b"bla", blob.id, stat.S_IFREG)]
     rootid = commit_tree(self.store, blobs)
     self.assertEqual(rootid, b"1a1e80437220f9312e855c37ac4398b68e5c1d50")
     self.assertEqual((stat.S_IFREG, blob.id), self.store[rootid][b"bla"])
     self.assertEqual(set([rootid, blob.id]), set(self.store._data.keys()))
Пример #15
0
 def test_full_tree(self):
     c = self.make_commit(commit_time=30)
     t = Tree()
     t.add(b'data-x', 0o644, Blob().id)
     c.tree = t
     c1 = Commit()
     c1.set_raw_string(c.as_raw_string())
     self.assertEqual(t.id, c1.tree)
     self.assertEqual(c.as_raw_string(), c1.as_raw_string())
Пример #16
0
    def export_fileid_map(self, fileid_map):
        """Export a file id map to a fileid map.

        :param fileid_map: File id map, mapping paths to file ids
        :return: A Git blob object (or None if there are no entries)
        """
        from dulwich.objects import Blob
        b = Blob()
        b.set_raw_chunks(serialize_fileid_map(fileid_map))
        return b
Пример #17
0
 def test_git_dir(self):
     obj = Tree()
     a = Blob()
     a.data = b"foo"
     obj.add(b".git", 0o100644, a.id)
     self.repo.object_store.add_objects(
         [(a, None), (obj, None)])
     self.assertEqual(
             [(obj.id, 'invalid name .git')],
             [(sha, str(e)) for (sha, e) in porcelain.fsck(self.repo)])
Пример #18
0
def test_current_tree_should_be_from_current_commit():
    repo = MemoryRepo()
    tree = Tree()
    repo.object_store.add_object(tree)
    repo.do_commit(tree=tree.id, message=b'first commit')
    tree.add(b'test', 0o100644, Blob().id)
    repo.object_store.add_object(tree)
    repo.do_commit(tree=tree.id, message=b'second commit')

    assert GitRepo(repo).current_tree.id == tree.id
Пример #19
0
    def commit(self):
        # XXX: evidence for the rest of
        # this functions is supposed not to exist
        # yes, its that
        # XXX: generate all objects at once and
        #     add them as pack instead of legacy objects
        r = self.repo.repo
        store = r.object_store
        new_objects = []
        names = sorted(self.contents)
        nametree = defaultdict(list)
        for name in names:
            base = name.strip('/')
            while base:
                nbase = os.path.dirname(base)
                nametree[nbase].append(base)
                base = nbase

        if self.base_commit:
            tree = r.tree(self.base_commit.commit.tree)
            tree._ensure_parsed()
            print tree._entries
        else:
            tree = Tree()

        for src, dest in self.renames:
            src = src.strip('/')
            dest = dest.strip('/')
            tree[dest] = tree[src]
            del tree[src]

        for name in names:
            blob = Blob()
            blob.data = self.contents[name]
            new_objects.append((blob, name))
            tree.add(0555, os.path.basename(name), blob.id)

        new_objects.append((tree, ''))
        commit = Commit()
        if self.base_commit:
            commit.parents = [self.base_commit.commit.id]
        commit.tree = tree.id
        commit.message = self.extra['message']
        commit.committer = self.author
        commit.commit_time = int(self.time_unix)
        commit.commit_timezone = self.time_offset
        commit.author = self.author
        commit.author_time = int(self.time_unix)
        commit.author_timezone = self.time_offset
        new_objects.append((commit, ''))
        store.add_objects(new_objects)
        self.repo.repo.refs['HEAD'] = commit.id
Пример #20
0
    def test_tree_copy_after_update(self):
        """Check Tree.id is correctly updated when the tree is copied after updated.
        """
        shas = []
        tree = Tree()
        shas.append(tree.id)
        tree.add(b'data', 0o644, Blob().id)
        copied = tree.copy()
        shas.append(tree.id)
        shas.append(copied.id)

        self.assertNotIn(shas[0], shas[1:])
        self.assertEqual(shas[1], shas[2])
Пример #21
0
 def test_nested(self):
     blob = Blob()
     blob.data = b"foo"
     self.store.add_object(blob)
     blobs = [(b"bla/bar", blob.id, stat.S_IFREG)]
     rootid = commit_tree(self.store, blobs)
     self.assertEqual(rootid, b"d92b959b216ad0d044671981196781b3258fa537")
     dirid = self.store[rootid][b"bla"][1]
     self.assertEqual(dirid, b"c1a1deb9788150829579a8b4efa6311e7b638650")
     self.assertEqual((stat.S_IFDIR, dirid), self.store[rootid][b"bla"])
     self.assertEqual((stat.S_IFREG, blob.id), self.store[dirid][b"bar"])
     self.assertEqual(set([rootid, dirid, blob.id]),
                      set(self.store._data.keys()))
Пример #22
0
def blob_from_path_and_stat(path, st):
    """Create a blob from a path and a stat object.

    :param path: Full path to file
    :param st: A stat object
    :return: A `Blob` object
    """
    blob = Blob()
    if not stat.S_ISLNK(st.st_mode):
        with open(path, 'rb') as f:
            blob.data = f.read()
    else:
        blob.data = os.readlink(path)
    return blob
Пример #23
0
 def test_blob(self):
     self.map.start_write_group()
     updater = self.cache.get_updater(Revision(b"myrevid"))
     updater.add_object(self._get_test_commit(),
                        {"testament3-sha1": b"Test"}, None)
     b = Blob()
     b.data = b"TEH BLOB"
     updater.add_object(b, (b"myfileid", b"myrevid"), None)
     updater.finish()
     self.map.commit_write_group()
     self.assertEqual([("blob", (b"myfileid", b"myrevid"))],
                      list(self.map.lookup_git_sha(b.id)))
     self.assertEqual(b.id, self.map.lookup_blob_id(b"myfileid",
                                                    b"myrevid"))
Пример #24
0
 def test_simple(self):
     c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1],
         [3, 1, 2]])
     b = Blob()
     b.data = b"foo the bar"
     t = Tree()
     t.add(b"somename", 0o100644, b.id)
     self.repo.object_store.add_object(t)
     self.repo.object_store.add_object(b)
     sha = porcelain.commit_tree(
         self.repo.path, t.id, message=b"Withcommit.",
         author=b"Joe <*****@*****.**>",
         committer=b"Jane <*****@*****.**>")
     self.assertTrue(isinstance(sha, bytes))
     self.assertEqual(len(sha), 40)
Пример #25
0
def blob_from_path_and_stat(fs_path, st):
    """Create a blob from a path and a stat object.

    :param fs_path: Full file system path to file
    :param st: A stat object
    :return: A `Blob` object
    """
    assert isinstance(fs_path, bytes)
    blob = Blob()
    if not stat.S_ISLNK(st.st_mode):
        with open(fs_path, 'rb') as f:
            blob.data = f.read()
    else:
        blob.data = os.readlink(fs_path)
    return blob
Пример #26
0
 def test_splitlines(self):
     for case in [
         [],
         [b'foo\nbar\n'],
         [b'bl\na', b'blie'],
         [b'bl\na', b'blie', b'bloe\n'],
         [b'', b'bl\na', b'blie', b'bloe\n'],
         [b'', b'', b'', b'bla\n'],
         [b'', b'', b'', b'bla\n', b''],
         [b'bl', b'', b'a\naaa'],
         [b'a\naaa', b'a'],
     ]:
         b = Blob()
         b.chunked = case
         self.assertEqual(b.data.splitlines(True), b.splitlines())
Пример #27
0
    def test_normalize_to_crlf_no_op(self):
        base_content = b"line1\r\nline2"
        base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96"

        base_blob = Blob()
        base_blob.set_raw_string(base_content)

        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
        self.assertEqual(base_blob.sha().hexdigest(), base_sha)

        filtered_blob = normalize_blob(base_blob,
                                       convert_lf_to_crlf,
                                       binary_detection=False)

        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
Пример #28
0
    def test_normalize_to_crlf_binary(self):
        base_content = b"line1\r\nline2\0"
        base_sha = "b44504193b765f7cd79673812de8afb55b372ab2"

        base_blob = Blob()
        base_blob.set_raw_string(base_content)

        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
        self.assertEqual(base_blob.sha().hexdigest(), base_sha)

        filtered_blob = normalize_blob(base_blob,
                                       convert_lf_to_crlf,
                                       binary_detection=True)

        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
Пример #29
0
    def test_normalize_to_lf_no_op(self):
        base_content = b"line1\nline2"
        base_sha = "f8be7bb828880727816015d21abcbc37d033f233"

        base_blob = Blob()
        base_blob.set_raw_string(base_content)

        self.assertEqual(base_blob.as_raw_chunks(), [base_content])
        self.assertEqual(base_blob.sha().hexdigest(), base_sha)

        filtered_blob = normalize_blob(base_blob,
                                       convert_crlf_to_lf,
                                       binary_detection=False)

        self.assertEqual(filtered_blob.as_raw_chunks(), [base_content])
        self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
Пример #30
0
def inventory_to_tree_and_blobs(repo, mapping, revision_id):
    stack = []
    cur = ""
    tree = Tree()

    inv = repo.get_inventory(revision_id)

    for path, entry in inv.iter_entries():
        while stack and not path.startswith(cur):
            tree.serialize()
            sha = tree.sha().hexdigest()
            yield sha, tree
            t = (stat.S_IFDIR, splitpath(cur)[-1:][0].encode('UTF-8'), sha)
            cur, tree = stack.pop()
            tree.add(*t)

        if type(entry) == InventoryDirectory:
            stack.append((cur, tree))
            cur = path
            tree = Tree()

        if type(entry) == InventoryFile:
            #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff
            # and having all these objects in memory at once
            blob = Blob()
            _, blob._text = repo.iter_files_bytes([(entry.file_id, revision_id,
                                                    path)]).next()
            sha = blob.sha().hexdigest()
            yield sha, blob

            name = splitpath(path)[-1:][0].encode('UTF-8')
            mode = stat.S_IFREG | 0644
            if entry.executable:
                mode |= 0111
            tree.add(mode, name, sha)

    while len(stack) > 1:
        tree.serialize()
        sha = tree.sha().hexdigest()
        yield sha, tree
        t = (stat.S_IFDIR, splitpath(cur)[-1:][0].encode('UTF-8'), sha)
        cur, tree = stack.pop()
        tree.add(*t)

    tree.serialize()
    yield tree.sha().hexdigest(), tree