def test_index_bare_add(self, rw_bare_repo): # Something is wrong after cloning to a bare repo, reading the # property rw_bare_repo.working_tree_dir will return '/tmp' # instead of throwing the Exception we are expecting. This is # a quick hack to make this test fail when expected. assert rw_bare_repo.working_tree_dir is None assert rw_bare_repo.bare contents = b'This is a BytesIO file' filesize = len(contents) fileobj = BytesIO(contents) filename = 'my-imaginary-file' istream = rw_bare_repo.odb.store(IStream(Blob.type, filesize, fileobj)) entry = BaseIndexEntry((0o100644, istream.binsha, 0, filename)) try: rw_bare_repo.index.add([entry]) except AssertionError: self.fail("Adding to the index of a bare repo is not allowed.") # Adding using a path should still require a non-bare repository. asserted = False path = osp.join('git', 'test', 'test_index.py') try: rw_bare_repo.index.add([path]) except InvalidGitRepositoryError: asserted = True assert asserted, "Adding using a filename is not correctly asserted."
def _assert_object_writing(self, db): """General tests to verify object writing, compatible to ObjectDBW :note: requires write access to the database""" # start in 'dry-run' mode, using a simple sha1 writer ostreams = (ZippedStoreShaWriter, None) for ostreamcls in ostreams: for data in self.all_data: dry_run = ostreamcls is not None ostream = None if ostreamcls is not None: ostream = ostreamcls() assert isinstance(ostream, Sha1Writer) # END create ostream prev_ostream = db.set_ostream(ostream) assert type( prev_ostream) in ostreams or prev_ostream in ostreams istream = IStream(str_blob_type, len(data), StringIO(data)) # store returns same istream instance, with new sha set my_istream = db.store(istream) sha = istream.binsha assert my_istream is istream assert db.has_object(sha) != dry_run assert len(sha) == 20 # verify data - the slow way, we want to run code if not dry_run: info = db.info(sha) assert str_blob_type == info.type assert info.size == len(data) ostream = db.stream(sha) assert ostream.read() == data assert ostream.type == str_blob_type assert ostream.size == len(data) else: self.failUnlessRaises(BadObject, db.info, sha) self.failUnlessRaises(BadObject, db.stream, sha) # DIRECT STREAM COPY # our data hase been written in object format to the StringIO # we pasesd as output stream. No physical database representation # was created. # Test direct stream copy of object streams, the result must be # identical to what we fed in ostream.seek(0) istream.stream = ostream assert istream.binsha is not None prev_sha = istream.binsha db.set_ostream(ZippedStoreShaWriter()) db.store(istream) assert istream.binsha == prev_sha new_ostream = db.ostream() # note: only works as long our store write uses the same compression # level, which is zip_best assert ostream.getvalue() == new_ostream.getvalue()
def write_tree_from_cache(entries: List[IndexEntry], odb: 'GitCmdObjectDB', sl: slice, si: int = 0) -> Tuple[bytes, List['TreeCacheTup']]: """Create a tree from the given sorted list of entries and put the respective trees into the given object database :param entries: **sorted** list of IndexEntries :param odb: object database to store the trees in :param si: start index at which we should start creating subtrees :param sl: slice indicating the range we should process on the entries list :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of tree entries being a tuple of hexsha, mode, name""" tree_items: List['TreeCacheTup'] = [] ci = sl.start end = sl.stop while ci < end: entry = entries[ci] if entry.stage != 0: raise UnmergedEntriesError(entry) # END abort on unmerged ci += 1 rbound = entry.path.find('/', si) if rbound == -1: # its not a tree tree_items.append((entry.binsha, entry.mode, entry.path[si:])) else: # find common base range base = entry.path[si:rbound] xi = ci while xi < end: oentry = entries[xi] orbound = oentry.path.find('/', si) if orbound == -1 or oentry.path[si:orbound] != base: break # END abort on base mismatch xi += 1 # END find common base # enter recursion # ci - 1 as we want to count our current item as well sha, _tree_entry_list = write_tree_from_cache( entries, odb, slice(ci - 1, xi), rbound + 1) tree_items.append((sha, S_IFDIR, base)) # skip ahead ci = xi # END handle bounds # END for each entry # finally create the tree sio = BytesIO() tree_to_stream( tree_items, sio.write) # writes to stream as bytes, but doesnt change tree_items sio.seek(0) istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) return (istream.binsha, tree_items)
def mktree(self, odb, entries): """create a tree from the given tree entries and safe it to the database""" sio = BytesIO() tree_to_stream(entries, sio.write) sio.seek(0) istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio)) return istream.binsha
def _assert_object_writing_simple(self, db): # write a bunch of objects and query their streams and info null_objs = db.size() ni = 250 for i in xrange(ni): data = pack(">L", i) istream = IStream(str_blob_type, len(data), StringIO(data)) new_istream = db.store(istream) assert new_istream is istream assert db.has_object(istream.binsha) info = db.info(istream.binsha) assert isinstance(info, OInfo) assert info.type == istream.type and info.size == istream.size stream = db.stream(istream.binsha) assert isinstance(stream, OStream) assert stream.binsha == info.binsha and stream.type == info.type assert stream.read() == data # END for each item assert db.size() == null_objs + ni shas = list(db.sha_iter()) assert len(shas) == db.size() assert len(shas[0]) == 20
def store_path(filepath): """Store file at filepath in the database and return the base index entry""" st = os.lstat(filepath) # handles non-symlinks as well stream = None if S_ISLNK(st.st_mode): stream = StringIO(os.readlink(filepath)) else: stream = open(filepath, 'rb') # END handle stream fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) fprogress(filepath, True, filepath) return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), istream.binsha, 0, to_native_path_linux(filepath)))
def test_index_bare_add(self, rw_bare_repo): # Something is wrong after cloning to a bare repo, reading the # property rw_bare_repo.working_tree_dir will return '/tmp' # instead of throwing the Exception we are expecting. This is # a quick hack to make this test fail when expected. rw_bare_repo._working_tree_dir = None contents = 'This is a StringIO file' filesize = len(contents) fileobj = StringIO(contents) filename = 'my-imaginary-file' istream = rw_bare_repo.odb.store(IStream(Blob.type, filesize, fileobj)) entry = BaseIndexEntry((100644, istream.binsha, 0, filename)) try: rw_bare_repo.index.add([entry]) except AssertionError, e: self.fail("Adding to the index of a bare repo is not allowed.")
def _store_path(self, filepath, fprogress): """Store file at filepath in the database and return the base index entry Needs the git_working_dir decorator active ! This must be assured in the calling code""" st = os.lstat(filepath) # handles non-symlinks as well stream = None if S_ISLNK(st.st_mode): # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8 stream = BytesIO(force_bytes(os.readlink(filepath), encoding='utf-8')) else: stream = open(filepath, 'rb') # END handle stream fprogress(filepath, False, filepath) istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream)) fprogress(filepath, True, filepath) stream.close() return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), istream.binsha, 0, to_native_path_linux(filepath)))
def stream_copy(self, sha_iter, odb): """Copy the streams as identified by sha's yielded by sha_iter into the given odb The streams will be copied directly :note: the object will only be written if it did not exist in the target db :return: amount of streams actually copied into odb. If smaller than the amount of input shas, one or more objects did already exist in odb""" count = 0 for sha in sha_iter: if odb.has_object(sha): continue # END check object existance ostream = self.stream(sha) # compressed data including header sio = StringIO(ostream.stream.data()) istream = IStream(ostream.type, ostream.size, sio, sha) odb.store(istream) count += 1 # END for each sha return count
def store_obj(self, obj_type: bytes, data: bytes) -> bytes: repo = self._get_repo() istream = IStream(obj_type, len(data), io.BytesIO(data)) repo.odb.store(istream) return istream.binsha
def istream_generator(offset=0, ni=ni): for data_src in xrange(ni): data = str(data_src + offset) yield IStream(str_blob_type, len(data), StringIO(data))
def run(self, src_path, dst_path): src = git.Repo(src_path) dst = git.Repo.init(dst_path) children = defaultdict(set) # binsha => set(binsha) threads = list() # [commit] depend = dict() # binsha => set(binsha) for head in src.heads: st = [head.commit] while st: commit = st.pop() if commit.binsha not in depend: depend[commit.binsha] = {parent.binsha for parent in commit.parents} if not commit.parents: threads.append(commit) for c in commit.parents: if c.binsha not in children: st.append(c) children[c.binsha].add(commit) blob_map_cache = dict() commit_binsha_map = dict() # old binsha => new binsha height = dict() # new binsha => height while threads: commit = threads.pop() index = dst.index blobs = set() for item in commit.tree.traverse(): key = item.binsha, item.mode, item.path if item.type == 'blob': if key in blob_map_cache: if blob_map_cache[key] is not None: value = blob_map_cache[key] blobs.add(value) else: res = self.blob_map(item.data_stream, item.mode, item.path) if res is not None: data, mode, path = res istream = dst.odb.store(IStream('blob', len(data), io.BytesIO(data))) value = blob_map_cache[key] = istream.binsha, mode, path blobs.add(value) else: blob_map_cache[key] = None for data, mode, path in self.commit_add(commit): istream = dst.odb.store(IStream('blob', len(data), io.BytesIO(data))) blobs.add((istream.binsha, mode, path)) # remove/add only the differene old_blobs = {(blob[1].binsha, blob[1].mode, blob[1].path) for blob in index.iter_blobs()} to_remove = list(old_blobs - blobs) to_add = list(blobs - old_blobs) for i in range(0, len(to_remove), 128): index.remove([git.Blob(dst, *t) for t in to_remove[i:i + 128]]) for i in range(0, len(to_add), 128): index.add([git.Blob(dst, *t) for t in to_add[i:i + 128]]) parent_commits=[commit_binsha_map[parent.binsha] for parent in commit.parents] message, author, authored_date, author_tz_offset, committer, committed_date, committer_tz_offset = self.commit_map(commit, commit.message, commit.author, commit.authored_date, commit.author_tz_offset, commit.committer, commit.committed_date, commit.committer_tz_offset) author_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(authored_date)) + ' ' + git.objects.util.altz_to_utctz_str(author_tz_offset) commit_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(committed_date)) + ' ' + git.objects.util.altz_to_utctz_str(committer_tz_offset) skip_flag = False if self.remove_empty_commits: # detect grandparents min_height = min(height[parent.binsha] for parent in parent_commits) if parent_commits else 0 st = parent_commits[:] grandparents = set() while st: current = st.pop() for grandparent in current.parents: if grandparent.binsha not in grandparents: grandparents.add(grandparent.binsha) if height[grandparent.binsha] > min_height: st.append(grandparent) parent_commits = [parent for parent in parent_commits if parent.binsha not in grandparents] # detect same parents for i in range(len(parent_commits) - 1, -1, -1): if parent_commits[i].binsha in set(parent.binsha for parent in parent_commits[:i]): parent_commits.pop(i) # skip empty commits for parent in parent_commits: if not index.diff(parent): dst_commit = parent skip_flag = True break if not skip_flag: dst_commit = index.commit(message, parent_commits=parent_commits, author=author, committer=committer, author_date=author_date, commit_date=commit_date) commit_binsha_map[commit.binsha] = dst_commit height[dst_commit.binsha] = max(height[parent.binsha] for parent in dst_commit.parents) + 1 if dst_commit.parents else 0 self.progress(commit, dst_commit) for child in children[commit.binsha]: depend[child.binsha].remove(commit.binsha) if not depend[child.binsha]: threads.append(child) for head in src.heads: if not head.name in dst.heads: dst.create_head(head.name) dst.heads[head.name].commit = commit_binsha_map[head.commit.binsha]