def test_delta_medium_object(self): # This tests an object set that will have a copy operation # 2**20 in size. with self.get_pack(pack1_sha) as orig_pack: orig_blob = orig_pack[a_sha] new_blob = Blob() new_blob.data = orig_blob.data + (b'x' * 2**20) new_blob_2 = Blob() new_blob_2.data = new_blob.data + b'y' all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None), (new_blob_2, None)] pack_path = os.path.join(self._tempdir, 'pack_with_deltas') write_pack(pack_path, all_to_pack, deltify=True) output = run_git_or_fail(['verify-pack', '-v', pack_path]) self.assertEqual(set(x[0].id for x in all_to_pack), _git_verify_pack_object_list(output)) # We specifically made a new blob that should be a delta # against the blob a_sha, so make sure we really got only 3 # non-delta objects: got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta')) self.assertEqual( 3, got_non_delta, 'Expected 3 non-delta objects, got %d' % got_non_delta) # We expect one object to have a delta chain length of two # (new_blob_2), so let's verify that actually happens: self.assertIn(b'chain length = 2', output)
def test_delta_large_object(self): # This tests an object set that will have a copy operation # 2**25 in size. This is a copy large enough that it requires # two copy operations in git's binary delta format. raise SkipTest("skipping slow, large test") with self.get_pack(pack1_sha) as orig_pack: new_blob = Blob() new_blob.data = "big blob" + ("x" * 2**25) new_blob_2 = Blob() new_blob_2.data = new_blob.data + "y" all_to_pack = list(orig_pack.pack_tuples()) + [ (new_blob, None), (new_blob_2, None), ] pack_path = os.path.join(self._tempdir, "pack_with_deltas") write_pack(pack_path, all_to_pack, deltify=True) output = run_git_or_fail(["verify-pack", "-v", pack_path]) self.assertEqual( {x[0].id for x in all_to_pack}, _git_verify_pack_object_list(output), ) # We specifically made a new blob that should be a delta # against the blob a_sha, so make sure we really got only 4 # non-delta objects: got_non_delta = int(_NON_DELTA_RE.search(output).group("non_delta")) self.assertEqual( 4, got_non_delta, "Expected 4 non-delta objects, got %d" % got_non_delta, )
def test_delta_large_object(self): # This tests an object set that will have a copy operation # 2**25 in size. This is a copy large enough that it requires # two copy operations in git's binary delta format. raise SkipTest('skipping slow, large test') orig_pack = self.get_pack(pack1_sha) orig_blob = orig_pack[a_sha] new_blob = Blob() new_blob.data = 'big blob' + ('x' * 2**25) new_blob_2 = Blob() new_blob_2.data = new_blob.data + 'y' all_to_pack = list(orig_pack.pack_tuples()) + [(new_blob, None), (new_blob_2, None)] pack_path = os.path.join(self._tempdir, "pack_with_deltas") write_pack(pack_path, all_to_pack, deltify=True) output = run_git_or_fail(['verify-pack', '-v', pack_path]) self.assertEqual(set(x[0].id for x in all_to_pack), _git_verify_pack_object_list(output)) # We specifically made a new blob that should be a delta # against the blob a_sha, so make sure we really got only 4 # non-delta objects: got_non_delta = int(_NON_DELTA_RE.search(output).group('non_delta')) self.assertEqual( 4, got_non_delta, 'Expected 4 non-delta objects, got %d' % got_non_delta)
def symlink_to_blob(symlink_target): from dulwich.objects import Blob blob = Blob() if isinstance(symlink_target, str): symlink_target = symlink_target.encode('utf-8') blob.data = symlink_target return blob
def _tree_from_structure(self, structure): # TODO : Support directories tree = Tree() for file_info in structure: # str only try: data = file_info['data'].encode('ascii') name = file_info['name'].encode('ascii') mode = file_info['mode'] except: # Skip file on encoding errors continue blob = Blob() blob.data = data # Store file's contents self.repo.object_store.add_object(blob) # Add blob entry tree.add( name, mode, blob.id ) # Store tree self.repo.object_store.add_object(tree) return tree.id
def test_emit_commit(self): b = Blob() b.data = "FOO" t = Tree() t.add("foo", stat.S_IFREG | 0o644, b.id) c = Commit() c.committer = c.author = "Jelmer <jelmer@host>" c.author_time = c.commit_time = 1271345553 c.author_timezone = c.commit_timezone = 0 c.message = "msg" c.tree = t.id self.store.add_objects([(b, None), (t, None), (c, None)]) self.fastexporter.emit_commit(c, "refs/heads/master") self.assertEqual( """blob mark :1 data 3 FOO commit refs/heads/master mark :2 author Jelmer <jelmer@host> 1271345553 +0000 committer Jelmer <jelmer@host> 1271345553 +0000 data 3 msg M 644 1 foo """, self.stream.getvalue())
def directory_to_tree(path, children, lookup_ie_sha1, unusual_modes, empty_file_name, allow_empty=False): """Create a Git Tree object from a Bazaar directory. :param path: directory path :param children: Children inventory entries :param lookup_ie_sha1: Lookup the Git SHA1 for a inventory entry :param unusual_modes: Dictionary with unusual file modes by file ids :param empty_file_name: Name to use for dummy files in empty directories, None to ignore empty directories. """ tree = Tree() for value in children: if value.name in BANNED_FILENAMES: continue child_path = osutils.pathjoin(path, value.name) try: mode = unusual_modes[child_path] except KeyError: mode = entry_mode(value) hexsha = lookup_ie_sha1(child_path, value) if hexsha is not None: tree.add(value.name.encode("utf-8"), mode, hexsha) if not allow_empty and len(tree) == 0: # Only the root can be an empty tree if empty_file_name is not None: tree.add(empty_file_name, stat.S_IFREG | 0o644, Blob().id) else: return None return tree
def ie_to_hexsha(path, ie): try: return shamap[path] except KeyError: pass # FIXME: Should be the same as in parent if ie.kind in ("file", "symlink"): try: return idmap.lookup_blob_id(ie.file_id, ie.revision) except KeyError: # no-change merge ? blob = Blob() blob.data = tree.get_file_text(path) if add_cache_entry is not None: add_cache_entry(blob, (ie.file_id, ie.revision), path) return blob.id elif ie.kind == "directory": # Not all cache backends store the tree information, # calculate again from scratch ret = directory_to_tree(path, ie.children.values(), ie_to_hexsha, unusual_modes, dummy_file_name, ie.parent_id is None) if ret is None: return ret return ret.id else: raise AssertionError
def stage(self, paths): """Stage a set of paths. :param paths: List of paths, relative to the repository path """ if isinstance(paths, basestring): paths = [paths] from dulwich.index import index_entry_from_stat index = self.open_index() for path in paths: full_path = os.path.join(self.path, path) try: st = os.stat(full_path) except OSError: # File no longer exists try: del index[path] except KeyError: pass # already removed else: blob = Blob() f = open(full_path, 'rb') try: blob.data = f.read() finally: f.close() self.object_store.add_object(blob) index[path] = index_entry_from_stat(st, blob.id, 0) index.write()
def test_set_chunks(self): b = Blob() b.chunked = [b'te', b'st', b' 5\n'] self.assertEqual(b'test 5\n', b.data) b.chunked = [b'te', b'st', b' 6\n'] self.assertEqual(b'test 6\n', b.as_raw_string()) self.assertEqual(b'test 6\n', bytes(b))
def stage(self, paths): """Stage a set of paths. :param paths: List of paths, relative to the repository path """ from dulwich.index import cleanup_mode index = self.open_index() for path in paths: full_path = os.path.join(self.path, path) blob = Blob() try: st = os.stat(full_path) except OSError: # File no longer exists try: del index[path] except KeyError: pass # Doesn't exist in the index either else: f = open(full_path, 'rb') try: blob.data = f.read() finally: f.close() self.object_store.add_object(blob) # XXX: Cleanup some of the other file properties as well? index[path] = (st.st_ctime, st.st_mtime, st.st_dev, st.st_ino, cleanup_mode(st.st_mode), st.st_uid, st.st_gid, st.st_size, blob.id, 0) index.write()
def symlink_to_blob(symlink_target): from dulwich.objects import Blob blob = Blob() if isinstance(symlink_target, str): symlink_target = encode_git_path(symlink_target) blob.data = symlink_target return blob
def blob_from_path(basepath, path): """Returns a tuple of (sha_id, mode, blob) """ fullpath = os.path.join(basepath, path) with open(fullpath, 'rb') as working_file: blob = Blob() blob.data = working_file.read() return (path, os.stat(fullpath).st_mode, blob)
def test_single_blob(self): blob = Blob() blob.data = b"foo" self.store.add_object(blob) blobs = [(b"bla", blob.id, stat.S_IFREG)] rootid = commit_tree(self.store, blobs) self.assertEqual(rootid, b"1a1e80437220f9312e855c37ac4398b68e5c1d50") self.assertEqual((stat.S_IFREG, blob.id), self.store[rootid][b"bla"]) self.assertEqual(set([rootid, blob.id]), set(self.store._data.keys()))
def test_full_tree(self): c = self.make_commit(commit_time=30) t = Tree() t.add(b'data-x', 0o644, Blob().id) c.tree = t c1 = Commit() c1.set_raw_string(c.as_raw_string()) self.assertEqual(t.id, c1.tree) self.assertEqual(c.as_raw_string(), c1.as_raw_string())
def export_fileid_map(self, fileid_map): """Export a file id map to a fileid map. :param fileid_map: File id map, mapping paths to file ids :return: A Git blob object (or None if there are no entries) """ from dulwich.objects import Blob b = Blob() b.set_raw_chunks(serialize_fileid_map(fileid_map)) return b
def test_git_dir(self): obj = Tree() a = Blob() a.data = b"foo" obj.add(b".git", 0o100644, a.id) self.repo.object_store.add_objects( [(a, None), (obj, None)]) self.assertEqual( [(obj.id, 'invalid name .git')], [(sha, str(e)) for (sha, e) in porcelain.fsck(self.repo)])
def test_current_tree_should_be_from_current_commit(): repo = MemoryRepo() tree = Tree() repo.object_store.add_object(tree) repo.do_commit(tree=tree.id, message=b'first commit') tree.add(b'test', 0o100644, Blob().id) repo.object_store.add_object(tree) repo.do_commit(tree=tree.id, message=b'second commit') assert GitRepo(repo).current_tree.id == tree.id
def commit(self): # XXX: evidence for the rest of # this functions is supposed not to exist # yes, its that # XXX: generate all objects at once and # add them as pack instead of legacy objects r = self.repo.repo store = r.object_store new_objects = [] names = sorted(self.contents) nametree = defaultdict(list) for name in names: base = name.strip('/') while base: nbase = os.path.dirname(base) nametree[nbase].append(base) base = nbase if self.base_commit: tree = r.tree(self.base_commit.commit.tree) tree._ensure_parsed() print tree._entries else: tree = Tree() for src, dest in self.renames: src = src.strip('/') dest = dest.strip('/') tree[dest] = tree[src] del tree[src] for name in names: blob = Blob() blob.data = self.contents[name] new_objects.append((blob, name)) tree.add(0555, os.path.basename(name), blob.id) new_objects.append((tree, '')) commit = Commit() if self.base_commit: commit.parents = [self.base_commit.commit.id] commit.tree = tree.id commit.message = self.extra['message'] commit.committer = self.author commit.commit_time = int(self.time_unix) commit.commit_timezone = self.time_offset commit.author = self.author commit.author_time = int(self.time_unix) commit.author_timezone = self.time_offset new_objects.append((commit, '')) store.add_objects(new_objects) self.repo.repo.refs['HEAD'] = commit.id
def test_tree_copy_after_update(self): """Check Tree.id is correctly updated when the tree is copied after updated. """ shas = [] tree = Tree() shas.append(tree.id) tree.add(b'data', 0o644, Blob().id) copied = tree.copy() shas.append(tree.id) shas.append(copied.id) self.assertNotIn(shas[0], shas[1:]) self.assertEqual(shas[1], shas[2])
def test_nested(self): blob = Blob() blob.data = b"foo" self.store.add_object(blob) blobs = [(b"bla/bar", blob.id, stat.S_IFREG)] rootid = commit_tree(self.store, blobs) self.assertEqual(rootid, b"d92b959b216ad0d044671981196781b3258fa537") dirid = self.store[rootid][b"bla"][1] self.assertEqual(dirid, b"c1a1deb9788150829579a8b4efa6311e7b638650") self.assertEqual((stat.S_IFDIR, dirid), self.store[rootid][b"bla"]) self.assertEqual((stat.S_IFREG, blob.id), self.store[dirid][b"bar"]) self.assertEqual(set([rootid, dirid, blob.id]), set(self.store._data.keys()))
def blob_from_path_and_stat(path, st): """Create a blob from a path and a stat object. :param path: Full path to file :param st: A stat object :return: A `Blob` object """ blob = Blob() if not stat.S_ISLNK(st.st_mode): with open(path, 'rb') as f: blob.data = f.read() else: blob.data = os.readlink(path) return blob
def test_blob(self): self.map.start_write_group() updater = self.cache.get_updater(Revision(b"myrevid")) updater.add_object(self._get_test_commit(), {"testament3-sha1": b"Test"}, None) b = Blob() b.data = b"TEH BLOB" updater.add_object(b, (b"myfileid", b"myrevid"), None) updater.finish() self.map.commit_write_group() self.assertEqual([("blob", (b"myfileid", b"myrevid"))], list(self.map.lookup_git_sha(b.id))) self.assertEqual(b.id, self.map.lookup_blob_id(b"myfileid", b"myrevid"))
def test_simple(self): c1, c2, c3 = build_commit_graph(self.repo.object_store, [[1], [2, 1], [3, 1, 2]]) b = Blob() b.data = b"foo the bar" t = Tree() t.add(b"somename", 0o100644, b.id) self.repo.object_store.add_object(t) self.repo.object_store.add_object(b) sha = porcelain.commit_tree( self.repo.path, t.id, message=b"Withcommit.", author=b"Joe <*****@*****.**>", committer=b"Jane <*****@*****.**>") self.assertTrue(isinstance(sha, bytes)) self.assertEqual(len(sha), 40)
def blob_from_path_and_stat(fs_path, st): """Create a blob from a path and a stat object. :param fs_path: Full file system path to file :param st: A stat object :return: A `Blob` object """ assert isinstance(fs_path, bytes) blob = Blob() if not stat.S_ISLNK(st.st_mode): with open(fs_path, 'rb') as f: blob.data = f.read() else: blob.data = os.readlink(fs_path) return blob
def test_splitlines(self): for case in [ [], [b'foo\nbar\n'], [b'bl\na', b'blie'], [b'bl\na', b'blie', b'bloe\n'], [b'', b'bl\na', b'blie', b'bloe\n'], [b'', b'', b'', b'bla\n'], [b'', b'', b'', b'bla\n', b''], [b'bl', b'', b'a\naaa'], [b'a\naaa', b'a'], ]: b = Blob() b.chunked = case self.assertEqual(b.data.splitlines(True), b.splitlines())
def test_normalize_to_crlf_no_op(self): base_content = b"line1\r\nline2" base_sha = "3a1bd7a52799fe5cf6411f1d35f4c10bacb1db96" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob(base_blob, convert_lf_to_crlf, binary_detection=False) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
def test_normalize_to_crlf_binary(self): base_content = b"line1\r\nline2\0" base_sha = "b44504193b765f7cd79673812de8afb55b372ab2" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob(base_blob, convert_lf_to_crlf, binary_detection=True) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
def test_normalize_to_lf_no_op(self): base_content = b"line1\nline2" base_sha = "f8be7bb828880727816015d21abcbc37d033f233" base_blob = Blob() base_blob.set_raw_string(base_content) self.assertEqual(base_blob.as_raw_chunks(), [base_content]) self.assertEqual(base_blob.sha().hexdigest(), base_sha) filtered_blob = normalize_blob(base_blob, convert_crlf_to_lf, binary_detection=False) self.assertEqual(filtered_blob.as_raw_chunks(), [base_content]) self.assertEqual(filtered_blob.sha().hexdigest(), base_sha)
def inventory_to_tree_and_blobs(repo, mapping, revision_id): stack = [] cur = "" tree = Tree() inv = repo.get_inventory(revision_id) for path, entry in inv.iter_entries(): while stack and not path.startswith(cur): tree.serialize() sha = tree.sha().hexdigest() yield sha, tree t = (stat.S_IFDIR, splitpath(cur)[-1:][0].encode('UTF-8'), sha) cur, tree = stack.pop() tree.add(*t) if type(entry) == InventoryDirectory: stack.append((cur, tree)) cur = path tree = Tree() if type(entry) == InventoryFile: #FIXME: We can make potentially make this Lazy to avoid shaing lots of stuff # and having all these objects in memory at once blob = Blob() _, blob._text = repo.iter_files_bytes([(entry.file_id, revision_id, path)]).next() sha = blob.sha().hexdigest() yield sha, blob name = splitpath(path)[-1:][0].encode('UTF-8') mode = stat.S_IFREG | 0644 if entry.executable: mode |= 0111 tree.add(mode, name, sha) while len(stack) > 1: tree.serialize() sha = tree.sha().hexdigest() yield sha, tree t = (stat.S_IFDIR, splitpath(cur)[-1:][0].encode('UTF-8'), sha) cur, tree = stack.pop() tree.add(*t) tree.serialize() yield tree.sha().hexdigest(), tree