def _assert_object_writing(self, db):
        """General tests to verify object writing, compatible to ObjectDBW
		:note: requires write access to the database"""
        # start in 'dry-run' mode, using a simple sha1 writer
        ostreams = (ZippedStoreShaWriter, None)
        for ostreamcls in ostreams:
            for data in self.all_data:
                dry_run = ostreamcls is not None
                ostream = None
                if ostreamcls is not None:
                    ostream = ostreamcls()
                    assert isinstance(ostream, Sha1Writer)
                # END create ostream

                prev_ostream = db.set_ostream(ostream)
                assert type(
                    prev_ostream) in ostreams or prev_ostream in ostreams

                istream = IStream(str_blob_type, len(data), StringIO(data))

                # store returns same istream instance, with new sha set
                my_istream = db.store(istream)
                sha = istream.binsha
                assert my_istream is istream
                assert db.has_object(sha) != dry_run
                assert len(sha) == 20

                # verify data - the slow way, we want to run code
                if not dry_run:
                    info = db.info(sha)
                    assert str_blob_type == info.type
                    assert info.size == len(data)

                    ostream = db.stream(sha)
                    assert ostream.read() == data
                    assert ostream.type == str_blob_type
                    assert ostream.size == len(data)
                else:
                    self.failUnlessRaises(BadObject, db.info, sha)
                    self.failUnlessRaises(BadObject, db.stream, sha)

                    # DIRECT STREAM COPY
                    # our data hase been written in object format to the StringIO
                    # we pasesd as output stream. No physical database representation
                    # was created.
                    # Test direct stream copy of object streams, the result must be
                    # identical to what we fed in
                    ostream.seek(0)
                    istream.stream = ostream
                    assert istream.binsha is not None
                    prev_sha = istream.binsha

                    db.set_ostream(ZippedStoreShaWriter())
                    db.store(istream)
                    assert istream.binsha == prev_sha
                    new_ostream = db.ostream()

                    # note: only works as long our store write uses the same compression
                    # level, which is zip_best
                    assert ostream.getvalue() == new_ostream.getvalue()
示例#2
0
文件: lib.py 项目: pombredanne/gitdb
	def _assert_object_writing(self, db):
		"""General tests to verify object writing, compatible to ObjectDBW
		:note: requires write access to the database"""
		# start in 'dry-run' mode, using a simple sha1 writer
		ostreams = (ZippedStoreShaWriter, None)
		for ostreamcls in ostreams:
			for data in self.all_data:
				dry_run = ostreamcls is not None
				ostream = None
				if ostreamcls is not None:
					ostream = ostreamcls()
					assert isinstance(ostream, Sha1Writer)
				# END create ostream
				
				prev_ostream = db.set_ostream(ostream)
				assert type(prev_ostream) in ostreams or prev_ostream in ostreams 
					
				istream = IStream(str_blob_type, len(data), StringIO(data))
				
				# store returns same istream instance, with new sha set
				my_istream = db.store(istream)
				sha = istream.binsha
				assert my_istream is istream
				assert db.has_object(sha) != dry_run
				assert len(sha) == 20	
				
				# verify data - the slow way, we want to run code
				if not dry_run:
					info = db.info(sha)
					assert str_blob_type == info.type
					assert info.size == len(data)
					
					ostream = db.stream(sha)
					assert ostream.read() == data
					assert ostream.type == str_blob_type
					assert ostream.size == len(data)
				else:
					self.failUnlessRaises(BadObject, db.info, sha)
					self.failUnlessRaises(BadObject, db.stream, sha)
					
					# DIRECT STREAM COPY
					# our data hase been written in object format to the StringIO
					# we pasesd as output stream. No physical database representation
					# was created.
					# Test direct stream copy of object streams, the result must be 
					# identical to what we fed in
					ostream.seek(0)
					istream.stream = ostream
					assert istream.binsha is not None
					prev_sha = istream.binsha
					
					db.set_ostream(ZippedStoreShaWriter())
					db.store(istream)
					assert istream.binsha == prev_sha
					new_ostream = db.ostream()
					
					# note: only works as long our store write uses the same compression
					# level, which is zip_best
					assert ostream.getvalue() == new_ostream.getvalue()
示例#3
0
    def test_index_bare_add(self, rw_bare_repo):
        # Something is wrong after cloning to a bare repo, reading the
        # property rw_bare_repo.working_tree_dir will return '/tmp'
        # instead of throwing the Exception we are expecting. This is
        # a quick hack to make this test fail when expected.
        assert rw_bare_repo.working_tree_dir is None
        assert rw_bare_repo.bare
        contents = b'This is a BytesIO file'
        filesize = len(contents)
        fileobj = BytesIO(contents)
        filename = 'my-imaginary-file'
        istream = rw_bare_repo.odb.store(IStream(Blob.type, filesize, fileobj))
        entry = BaseIndexEntry((0o100644, istream.binsha, 0, filename))
        try:
            rw_bare_repo.index.add([entry])
        except AssertionError:
            self.fail("Adding to the index of a bare repo is not allowed.")

        # Adding using a path should still require a non-bare repository.
        asserted = False
        path = osp.join('git', 'test', 'test_index.py')
        try:
            rw_bare_repo.index.add([path])
        except InvalidGitRepositoryError:
            asserted = True
        assert asserted, "Adding using a filename is not correctly asserted."
示例#4
0
文件: fun.py 项目: jrhauser/jrhauser
def write_tree_from_cache(entries: List[IndexEntry],
                          odb: 'GitCmdObjectDB',
                          sl: slice,
                          si: int = 0) -> Tuple[bytes, List['TreeCacheTup']]:
    """Create a tree from the given sorted list of entries and put the respective
    trees into the given object database

    :param entries: **sorted** list of IndexEntries
    :param odb: object database to store the trees in
    :param si: start index at which we should start creating subtrees
    :param sl: slice indicating the range we should process on the entries list
    :return: tuple(binsha, list(tree_entry, ...)) a tuple of a sha and a list of
        tree entries being a tuple of hexsha, mode, name"""
    tree_items: List['TreeCacheTup'] = []

    ci = sl.start
    end = sl.stop
    while ci < end:
        entry = entries[ci]
        if entry.stage != 0:
            raise UnmergedEntriesError(entry)
        # END abort on unmerged
        ci += 1
        rbound = entry.path.find('/', si)
        if rbound == -1:
            # its not a tree
            tree_items.append((entry.binsha, entry.mode, entry.path[si:]))
        else:
            # find common base range
            base = entry.path[si:rbound]
            xi = ci
            while xi < end:
                oentry = entries[xi]
                orbound = oentry.path.find('/', si)
                if orbound == -1 or oentry.path[si:orbound] != base:
                    break
                # END abort on base mismatch
                xi += 1
            # END find common base

            # enter recursion
            # ci - 1 as we want to count our current item as well
            sha, _tree_entry_list = write_tree_from_cache(
                entries, odb, slice(ci - 1, xi), rbound + 1)
            tree_items.append((sha, S_IFDIR, base))

            # skip ahead
            ci = xi
        # END handle bounds
    # END for each entry

    # finally create the tree
    sio = BytesIO()
    tree_to_stream(
        tree_items,
        sio.write)  # writes to stream as bytes, but doesnt change tree_items
    sio.seek(0)

    istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
    return (istream.binsha, tree_items)
示例#5
0
 def mktree(self, odb, entries):
     """create a tree from the given tree entries and safe it to the database"""
     sio = BytesIO()
     tree_to_stream(entries, sio.write)
     sio.seek(0)
     istream = odb.store(IStream(str_tree_type, len(sio.getvalue()), sio))
     return istream.binsha
    def _assert_object_writing_simple(self, db):
        # write a bunch of objects and query their streams and info
        null_objs = db.size()
        ni = 250
        for i in xrange(ni):
            data = pack(">L", i)
            istream = IStream(str_blob_type, len(data), StringIO(data))
            new_istream = db.store(istream)
            assert new_istream is istream
            assert db.has_object(istream.binsha)

            info = db.info(istream.binsha)
            assert isinstance(info, OInfo)
            assert info.type == istream.type and info.size == istream.size

            stream = db.stream(istream.binsha)
            assert isinstance(stream, OStream)
            assert stream.binsha == info.binsha and stream.type == info.type
            assert stream.read() == data
        # END for each item

        assert db.size() == null_objs + ni
        shas = list(db.sha_iter())
        assert len(shas) == db.size()
        assert len(shas[0]) == 20
示例#7
0
		def store_path(filepath):
			"""Store file at filepath in the database and return the base index entry"""
			st = os.lstat(filepath)		# handles non-symlinks as well
			stream = None
			if S_ISLNK(st.st_mode):
				stream = StringIO(os.readlink(filepath))
			else:
				stream = open(filepath, 'rb')
			# END handle stream
			fprogress(filepath, False, filepath)
			istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream))
			fprogress(filepath, True, filepath)
			return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode), 
									istream.binsha, 0, to_native_path_linux(filepath)))
示例#8
0
 def test_index_bare_add(self, rw_bare_repo):
     # Something is wrong after cloning to a bare repo, reading the
     # property rw_bare_repo.working_tree_dir will return '/tmp'
     # instead of throwing the Exception we are expecting. This is
     # a quick hack to make this test fail when expected.
     rw_bare_repo._working_tree_dir = None
     contents = 'This is a StringIO file'
     filesize = len(contents)
     fileobj = StringIO(contents)
     filename = 'my-imaginary-file'
     istream = rw_bare_repo.odb.store(IStream(Blob.type, filesize, fileobj))
     entry = BaseIndexEntry((100644, istream.binsha, 0, filename))
     try:
         rw_bare_repo.index.add([entry])
     except AssertionError, e:
         self.fail("Adding to the index of a bare repo is not allowed.")
示例#9
0
 def _store_path(self, filepath, fprogress):
     """Store file at filepath in the database and return the base index entry
     Needs the git_working_dir decorator active ! This must be assured in the calling code"""
     st = os.lstat(filepath)     # handles non-symlinks as well
     stream = None
     if S_ISLNK(st.st_mode):
         # in PY3, readlink is string, but we need bytes. In PY2, it's just OS encoded bytes, we assume UTF-8
         stream = BytesIO(force_bytes(os.readlink(filepath), encoding='utf-8'))
     else:
         stream = open(filepath, 'rb')
     # END handle stream
     fprogress(filepath, False, filepath)
     istream = self.repo.odb.store(IStream(Blob.type, st.st_size, stream))
     fprogress(filepath, True, filepath)
     stream.close()
     return BaseIndexEntry((stat_mode_to_index_mode(st.st_mode),
                            istream.binsha, 0, to_native_path_linux(filepath)))
示例#10
0
    def stream_copy(self, sha_iter, odb):
        """Copy the streams as identified by sha's yielded by sha_iter into the given odb
		The streams will be copied directly
		:note: the object will only be written if it did not exist in the target db
		:return: amount of streams actually copied into odb. If smaller than the amount
			of input shas, one or more objects did already exist in odb"""
        count = 0
        for sha in sha_iter:
            if odb.has_object(sha):
                continue
            # END check object existance

            ostream = self.stream(sha)
            # compressed data including header
            sio = StringIO(ostream.stream.data())
            istream = IStream(ostream.type, ostream.size, sio, sha)

            odb.store(istream)
            count += 1
        # END for each sha
        return count
示例#11
0
 def store_obj(self, obj_type: bytes, data: bytes) -> bytes:
     repo = self._get_repo()
     istream = IStream(obj_type, len(data), io.BytesIO(data))
     repo.odb.store(istream)
     return istream.binsha
 def istream_generator(offset=0, ni=ni):
     for data_src in xrange(ni):
         data = str(data_src + offset)
         yield IStream(str_blob_type, len(data), StringIO(data))
示例#13
0
    def run(self, src_path, dst_path):
        src = git.Repo(src_path)
        dst = git.Repo.init(dst_path)

        children = defaultdict(set) # binsha => set(binsha)
        threads = list() # [commit]
        depend = dict() # binsha => set(binsha)
        for head in src.heads:
            st = [head.commit]
            while st:
                commit = st.pop()
                if commit.binsha not in depend:
                    depend[commit.binsha] = {parent.binsha for parent in commit.parents}
                    if not commit.parents:
                        threads.append(commit)
                    for c in commit.parents:
                        if c.binsha not in children:
                            st.append(c)
                        children[c.binsha].add(commit)

        blob_map_cache = dict()
        commit_binsha_map = dict() # old binsha => new binsha
        height = dict() # new binsha => height
        while threads:
            commit = threads.pop()
            index = dst.index
            blobs = set()
            for item in commit.tree.traverse():
                key = item.binsha, item.mode, item.path
                if item.type == 'blob':
                    if key in blob_map_cache:
                        if blob_map_cache[key] is not None:
                            value = blob_map_cache[key]
                            blobs.add(value)
                    else:
                        res = self.blob_map(item.data_stream, item.mode, item.path)
                        if res is not None:
                            data, mode, path = res
                            istream = dst.odb.store(IStream('blob', len(data), io.BytesIO(data)))
                            value = blob_map_cache[key] = istream.binsha, mode, path
                            blobs.add(value)
                        else:
                            blob_map_cache[key] = None
            for data, mode, path in self.commit_add(commit):
                istream = dst.odb.store(IStream('blob', len(data), io.BytesIO(data)))
                blobs.add((istream.binsha, mode, path))

            # remove/add only the differene
            old_blobs = {(blob[1].binsha, blob[1].mode, blob[1].path) for blob in index.iter_blobs()}
            to_remove = list(old_blobs - blobs)
            to_add = list(blobs - old_blobs)
            for i in range(0, len(to_remove), 128):
                index.remove([git.Blob(dst, *t) for t in to_remove[i:i + 128]])
            for i in range(0, len(to_add), 128):
                index.add([git.Blob(dst, *t) for t in to_add[i:i + 128]])

            parent_commits=[commit_binsha_map[parent.binsha] for parent in commit.parents]
            message, author, authored_date, author_tz_offset, committer, committed_date, committer_tz_offset = self.commit_map(commit, commit.message, commit.author, commit.authored_date, commit.author_tz_offset, commit.committer, commit.committed_date, commit.committer_tz_offset)
            author_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(authored_date)) + ' ' + git.objects.util.altz_to_utctz_str(author_tz_offset)
            commit_date = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(committed_date)) + ' ' + git.objects.util.altz_to_utctz_str(committer_tz_offset)

            skip_flag = False
            if self.remove_empty_commits:
                # detect grandparents
                min_height = min(height[parent.binsha] for parent in parent_commits) if parent_commits else 0
                st = parent_commits[:]
                grandparents = set()
                while st:
                    current = st.pop()
                    for grandparent in current.parents:
                        if grandparent.binsha not in grandparents:
                            grandparents.add(grandparent.binsha)
                            if height[grandparent.binsha] > min_height:
                                st.append(grandparent)
                parent_commits = [parent for parent in parent_commits if parent.binsha not in grandparents]

                # detect same parents
                for i in range(len(parent_commits) - 1, -1, -1):
                    if parent_commits[i].binsha in set(parent.binsha for parent in parent_commits[:i]):
                        parent_commits.pop(i)

                # skip empty commits
                for parent in parent_commits:
                    if not index.diff(parent):
                        dst_commit = parent
                        skip_flag = True
                        break

            if not skip_flag:
                dst_commit = index.commit(message, parent_commits=parent_commits, author=author, committer=committer, author_date=author_date, commit_date=commit_date)
            commit_binsha_map[commit.binsha] = dst_commit
            height[dst_commit.binsha] = max(height[parent.binsha] for parent in dst_commit.parents) + 1 if dst_commit.parents else 0

            self.progress(commit, dst_commit)

            for child in children[commit.binsha]:
                depend[child.binsha].remove(commit.binsha)
                if not depend[child.binsha]:
                    threads.append(child)

        for head in src.heads:
            if not head.name in dst.heads:
                dst.create_head(head.name)
            dst.heads[head.name].commit = commit_binsha_map[head.commit.binsha]