Exemplo n.º 1
0
    def test_reading(self):
        gdb = GitDB(os.path.join(self.gitrepopath, 'objects'))

        # we have packs and loose objects, alternates doesn't necessarily exist
        assert 1 < len(gdb.databases()) < 4

        # access should be possible
        gitdb_sha = next(gdb.sha_iter())
        assert isinstance(gdb.info(gitdb_sha), OInfo)
        assert isinstance(gdb.stream(gitdb_sha), OStream)
        ni = 50
        assert gdb.size() >= ni
        sha_list = list(gdb.sha_iter())
        assert len(sha_list) == gdb.size()
        sha_list = sha_list[:ni]  # speed up tests ...

        # This is actually a test for compound functionality, but it doesn't
        # have a separate test module
        # test partial shas
        # this one as uneven and quite short
        gitdb_sha_hex = bin_to_hex(gitdb_sha)
        assert gdb.partial_to_complete_sha_hex(gitdb_sha_hex[:5]) == gitdb_sha

        # mix even/uneven hexshas
        for i, binsha in enumerate(sha_list):
            assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
        # END for each sha

        self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
Exemplo n.º 2
0
    def test_reading(self):
        gdb = GitDB(os.path.join(self.gitrepopath, 'objects'))

        # we have packs and loose objects, alternates doesn't necessarily exist
        assert 1 < len(gdb.databases()) < 4

        # access should be possible
        gitdb_sha = next(gdb.sha_iter())
        assert isinstance(gdb.info(gitdb_sha), OInfo)
        assert isinstance(gdb.stream(gitdb_sha), OStream)
        ni = 50
        assert gdb.size() >= ni
        sha_list = list(gdb.sha_iter())
        assert len(sha_list) == gdb.size()
        sha_list = sha_list[:ni]  # speed up tests ...

        # This is actually a test for compound functionality, but it doesn't
        # have a separate test module
        # test partial shas
        # this one as uneven and quite short
        gitdb_sha_hex = bin_to_hex(gitdb_sha)
        assert gdb.partial_to_complete_sha_hex(gitdb_sha_hex[:5]) == gitdb_sha

        # mix even/uneven hexshas
        for i, binsha in enumerate(sha_list):
            assert gdb.partial_to_complete_sha_hex(
                bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
        # END for each sha

        self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex,
                              "0000")
Exemplo n.º 3
0
    def create(cls,
               object_iter,
               base_dir,
               object_count=None,
               zlib_compression=zlib.Z_BEST_SPEED):
        """Create a new on-disk entity comprised of a properly named pack file and a properly named
        and corresponding index file. The pack contains all OStream objects contained in object iter.
        :param base_dir: directory which is to contain the files
        :return: PackEntity instance initialized with the new pack

        **Note:** for more information on the other parameters see the write_pack method"""
        pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir)
        index_fd, index_path = tempfile.mkstemp('', 'index', base_dir)
        pack_write = lambda d: os.write(pack_fd, d)
        index_write = lambda d: os.write(index_fd, d)

        pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write,
                                                   index_write, object_count,
                                                   zlib_compression)
        os.close(pack_fd)
        os.close(index_fd)

        fmt = "pack-%s.%s"
        new_pack_path = os.path.join(base_dir,
                                     fmt % (bin_to_hex(pack_binsha), 'pack'))
        new_index_path = os.path.join(base_dir,
                                      fmt % (bin_to_hex(pack_binsha), 'idx'))
        os.rename(pack_path, new_pack_path)
        os.rename(index_path, new_index_path)

        return cls(new_pack_path)
Exemplo n.º 4
0
	def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha, message):
		"""Append a new log entry to the revlog at filepath.
		
		:param config_reader: configuration reader of the repository - used to obtain
			user information. May be None
		:param filepath: full path to the log file
		:param oldbinsha: binary sha of the previous commit
		:param newbinsha: binary sha of the current commit
		:param message: message describing the change to the reference
		:param write: If True, the changes will be written right away. Otherwise
			the change will not be written
		:return: RefLogEntry objects which was appended to the log
		:note: As we are append-only, concurrent access is not a problem as we 
			do not interfere with readers."""
		if len(oldbinsha) != 20 or len(newbinsha) != 20:
			raise ValueError("Shas need to be given in binary format")
		#END handle sha type
		assure_directory_exists(filepath, is_file=True)
		entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha), Actor.committer(config_reader), (int(time.time()), time.altzone), message))
		
		lf = LockFile(filepath)
		lf._obtain_lock_or_raise()
		
		fd = open(filepath, 'a')
		try:
			fd.write(repr(entry))
		finally:
			fd.close()
			lf._release_lock()
		#END handle write operation
		
		return entry
Exemplo n.º 5
0
    def append_entry(cls, config_reader, filepath, oldbinsha, newbinsha,
                     message):
        """Append a new log entry to the revlog at filepath.
		
		:param config_reader: configuration reader of the repository - used to obtain
			user information. May be None
		:param filepath: full path to the log file
		:param oldbinsha: binary sha of the previous commit
		:param newbinsha: binary sha of the current commit
		:param message: message describing the change to the reference
		:param write: If True, the changes will be written right away. Otherwise
			the change will not be written
		:return: RefLogEntry objects which was appended to the log
		:note: As we are append-only, concurrent access is not a problem as we 
			do not interfere with readers."""
        if len(oldbinsha) != 20 or len(newbinsha) != 20:
            raise ValueError("Shas need to be given in binary format")
        #END handle sha type
        assure_directory_exists(filepath, is_file=True)
        entry = RefLogEntry((bin_to_hex(oldbinsha), bin_to_hex(newbinsha),
                             Actor.committer(config_reader),
                             (int(time.time()), time.altzone), message))

        lf = LockFile(filepath)
        lf._obtain_lock_or_raise()

        fd = open(filepath, 'a')
        try:
            fd.write(repr(entry))
        finally:
            fd.close()
            lf._release_lock()
        #END handle write operation

        return entry
Exemplo n.º 6
0
    def test_base(self):
        gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, 'objects'), self.rorepo.git)

        # partial to complete - works with everything
        hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
        assert len(hexsha) == 40

        assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha

        # fails with BadObject
        for invalid_rev in ("0000", "bad/ref", "super bad"):
            self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
Exemplo n.º 7
0
    def test_base(self):
        gdb = GitCmdObjectDB(os.path.join(self.rorepo.git_dir, "objects"), self.rorepo.git)

        # partial to complete - works with everything
        hexsha = bin_to_hex(gdb.partial_to_complete_sha_hex("0.1.6"))
        assert len(hexsha) == 40

        assert bin_to_hex(gdb.partial_to_complete_sha_hex(hexsha[:20])) == hexsha

        # fails with BadObject
        for invalid_rev in ("0000", "bad/ref", "super bad"):
            self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, invalid_rev)
Exemplo n.º 8
0
    def test_large_data_streaming(self, path):
        ldb = LooseObjectDB(path)
        string_ios = list()         # list of streams we previously created

        # serial mode
        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print("Creating %s data ..." % desc, file=sys.stderr)
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes, randomize)
            elapsed = time() - st
            print("Done (in %f s)" % elapsed, file=sys.stderr)
            string_ios.append(stream)

            # writing - due to the compression it will seem faster than it is
            st = time()
            sha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(sha)
            db_file = ldb.readable_db_object_path(bin_to_hex(sha))
            fsize_kib = os.path.getsize(db_file) / 1000

            size_kib = size / 1000
            print("Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" %
                  (size_kib, fsize_kib, desc, elapsed_add, size_kib / (elapsed_add or 1)), file=sys.stderr)

            # reading all at once
            st = time()
            ostream = ldb.stream(sha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            print("Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" %
                  (size_kib, desc, elapsed_readall, size_kib / (elapsed_readall or 1)), file=sys.stderr)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(sha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert b''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print("Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" %
                  (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / (elapsed_readchunks or 1)), file=sys.stderr)

            # del db file so we keep something to do
            ostream = None  # To release the file handle (win)
            remove(db_file)
Exemplo n.º 9
0
    def test_reading(self):
        gdb = GitDB(fixture_path('../../../.git/objects'))

        # we have packs and loose objects, alternates doesn't necessarily exist
        assert 1 < len(gdb.databases()) < 4

        # access should be possible
        gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
        assert isinstance(gdb.info(gitdb_sha), OInfo)
        assert isinstance(gdb.stream(gitdb_sha), OStream)
        assert gdb.size() > 200
        sha_list = list(gdb.sha_iter())
        assert len(sha_list) == gdb.size()

        # This is actually a test for compound functionality, but it doesn't
        # have a separate test module
        # test partial shas
        # this one as uneven and quite short
        assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin(
            "155b62a9af0aa7677078331e111d0f7aa6eb4afc")

        # mix even/uneven hexshas
        for i, binsha in enumerate(sha_list):
            assert gdb.partial_to_complete_sha_hex(
                bin_to_hex(binsha)[:8 - (i % 2)]) == binsha
        # END for each sha

        self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex,
                              "0000")
Exemplo n.º 10
0
    def test_loose_correctness(self):
        """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
        into the loose object db (memory).
        This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220
        faster
        :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed
        data files, like archives."""
        from gitdb.util import bin_to_hex
        pdb = GitDB(os.path.join(self.gitrepopath, 'objects'))
        mdb = MemoryDB()
        for c, sha in enumerate(pdb.sha_iter()):
            ostream = pdb.stream(sha)
            # the issue only showed on larger files which are hardly compressible ...
            if ostream.type != str_blob_type:
                continue
            istream = IStream(ostream.type, ostream.size, ostream.stream)
            mdb.store(istream)
            assert istream.binsha == sha, "Failed on object %s" % bin_to_hex(
                sha).decode('ascii')
            # this can fail ... sometimes, so the packs dataset should be huge
            assert len(mdb.stream(sha).read()) == ostream.size

            if c and c % 1000 == 0:
                print(
                    "Verified %i loose object compression/decompression cycles"
                    % c,
                    file=sys.stderr)
            mdb._cache.clear()
Exemplo n.º 11
0
 def _assert_index_entries(self, entries, trees):
     index = IndexFile.from_tree(
         self.rorepo, *[self.rorepo.tree(bin_to_hex(t)) for t in trees])
     assert entries
     assert len(index.entries) == len(entries)
     for entry in entries:
         assert (entry.path, entry.stage) in index.entries
Exemplo n.º 12
0
	def test_reading(self):
		gdb = GitDB(fixture_path('../../.git/objects'))
		
		# we have packs and loose objects, alternates doesn't necessarily exist
		assert 1 < len(gdb.databases()) < 4
		
		# access should be possible
		gitdb_sha = hex_to_bin("5690fd0d3304f378754b23b098bd7cb5f4aa1976")
		assert isinstance(gdb.info(gitdb_sha), OInfo)
		assert isinstance(gdb.stream(gitdb_sha), OStream)
		assert gdb.size() > 200
		sha_list = list(gdb.sha_iter())
		assert len(sha_list) == gdb.size()
		
		
		# This is actually a test for compound functionality, but it doesn't 
		# have a separate test module
		# test partial shas
		# this one as uneven and quite short
		assert gdb.partial_to_complete_sha_hex('155b6') == hex_to_bin("155b62a9af0aa7677078331e111d0f7aa6eb4afc")
		
		# mix even/uneven hexshas
		for i, binsha in enumerate(sha_list):
			assert gdb.partial_to_complete_sha_hex(bin_to_hex(binsha)[:8-(i%2)]) == binsha
		# END for each sha
		
		self.failUnlessRaises(BadObject, gdb.partial_to_complete_sha_hex, "0000")
Exemplo n.º 13
0
def short_to_long(odb, hexsha):
    """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha
        or None if no candidate could be found.
    :param hexsha: hexsha with less than 40 byte"""
    try:
        return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
    except BadObject:
        return None
Exemplo n.º 14
0
def short_to_long(odb, hexsha):
    """:return: long hexadecimal sha1 from the given less-than-40 byte hexsha
		or None if no candidate could be found.
	:param hexsha: hexsha with less than 40 byte"""
    try:
        return bin_to_hex(odb.partial_to_complete_sha_hex(hexsha))
    except BadObject:
        return None
Exemplo n.º 15
0
def create_submodule_tree(odb, submodule_commit_hexsha):
    submodule_conf = '/Users/kenjif/test_gitmodules'
    conf_mode, conf_binsha = write_blob_from_path(odb, submodule_conf)
    tree_contents = []
    tree_contents.append((conf_mode, conf_binsha, '.gitmodules'))
    tree_contents.append(get_submodule_tree_content(submodule_commit_hexsha, 'jEdit'))

    tree_mode, binsha = mktree_from_iter(odb, tree_contents)
    return bin_to_hex(binsha)
Exemplo n.º 16
0
def create_submodule_tree(odb, submodule_commit_hexsha):
    submodule_conf = "/Users/kenjif/test_gitmodules"
    conf_mode, conf_binsha = write_blob(odb, submodule_conf)
    tree_contents = []
    tree_contents.append((conf_mode, conf_binsha, ".gitmodules"))
    tree_contents.append(get_submodule_tree_content(submodule_commit_hexsha, "jEdit"))

    tree_mode, binsha = mktree_from_iter(odb, tree_contents)
    return bin_to_hex(binsha)
Exemplo n.º 17
0
    def test_basics(self, path):
        ldb = LooseObjectDB(path)

        # write data
        self._assert_object_writing(ldb)

        # verify sha iteration and size
        shas = list(ldb.sha_iter())
        assert shas and len(shas[0]) == 20

        assert len(shas) == ldb.size()

        # verify find short object
        long_sha = bin_to_hex(shas[-1])
        for short_sha in (long_sha[:20], long_sha[:5]):
            assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
        # END for each sha

        self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
Exemplo n.º 18
0
 def test_basics(self, path):
     ldb = LooseObjectDB(path)
     
     # write data
     self._assert_object_writing(ldb)
     self._assert_object_writing_async(ldb)
 
     # verify sha iteration and size
     shas = list(ldb.sha_iter())
     assert shas and len(shas[0]) == 20
     
     assert len(shas) == ldb.size()
     
     # verify find short object
     long_sha = bin_to_hex(shas[-1])
     for short_sha in (long_sha[:20], long_sha[:5]):
         assert bin_to_hex(ldb.partial_to_complete_sha_hex(short_sha)) == long_sha
     # END for each sha
     
     self.failUnlessRaises(BadObject, ldb.partial_to_complete_sha_hex, '0000')
Exemplo n.º 19
0
    def create(cls, object_iter, base_dir, object_count=None, zlib_compression=zlib.Z_BEST_SPEED):
        """Create a new on-disk entity comprised of a properly named pack file and a properly named
        and corresponding index file. The pack contains all OStream objects contained in object iter.
        :param base_dir: directory which is to contain the files
        :return: PackEntity instance initialized with the new pack

        **Note:** for more information on the other parameters see the write_pack method"""
        pack_fd, pack_path = tempfile.mkstemp('', 'pack', base_dir)
        index_fd, index_path = tempfile.mkstemp('', 'index', base_dir)
        pack_write = lambda d: os.write(pack_fd, d)
        index_write = lambda d: os.write(index_fd, d)

        pack_binsha, index_binsha = cls.write_pack(object_iter, pack_write, index_write, object_count, zlib_compression)
        os.close(pack_fd)
        os.close(index_fd)

        fmt = "pack-%s.%s"
        new_pack_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'pack'))
        new_index_path = os.path.join(base_dir, fmt % (bin_to_hex(pack_binsha), 'idx'))
        os.rename(pack_path, new_pack_path)
        os.rename(index_path, new_index_path)

        return cls(new_pack_path)
Exemplo n.º 20
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     """:return: 20 byte binary sha1 string which matches the given name uniquely
     :param name: hexadecimal partial name (bytes or ascii string)
     :raise AmbiguousObjectName:
     :raise BadObject: """
     candidate = None
     for binsha in self.sha_iter():
         if bin_to_hex(binsha).startswith(force_bytes(partial_hexsha)):
             # it can't ever find the same object twice
             if candidate is not None:
                 raise AmbiguousObjectName(partial_hexsha)
             candidate = binsha
     # END for each object
     if candidate is None:
         raise BadObject(partial_hexsha)
     return candidate
Exemplo n.º 21
0
 def partial_to_complete_sha_hex(self, partial_hexsha):
     """:return: 20 byte binary sha1 string which matches the given name uniquely
     :param name: hexadecimal partial name
     :raise AmbiguousObjectName: 
     :raise BadObject: """
     candidate = None
     for binsha in self.sha_iter():
         if bin_to_hex(binsha).startswith(partial_hexsha):
             # it can't ever find the same object twice
             if candidate is not None:
                 raise AmbiguousObjectName(partial_hexsha)
             candidate = binsha
     # END for each object
     if candidate is None:
         raise BadObject(partial_hexsha)
     return candidate
Exemplo n.º 22
0
def commit_from_binsha(repo, binsha, org_commit, parents=None):
    tree = Tree.new(repo, bin_to_hex(binsha))

    env = os.environ

    offset = altz_to_utctz_str(org_commit.author_tz_offset)
    date = org_commit.authored_date
    env[Commit.env_author_date] = '{} {}'.format(date, offset)

    offset = altz_to_utctz_str(org_commit.committer_tz_offset)
    date = org_commit.committed_date
    env[Commit.env_committer_date] = '{} {}'.format(date, offset)

    return Commit.create_from_tree(repo, tree, org_commit.message, parents,
                                   head=True,
                                   author=org_commit.author,
                                   committer=org_commit.committer)
Exemplo n.º 23
0
 def _map_loose_object(self, sha):
     """
     :return: memory map of that file to allow random read access
     :raise BadObject: if object could not be located"""
     db_path = self.db_path(self.object_path(bin_to_hex(sha)))
     try:
         return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
     except OSError as e:
         if e.errno != ENOENT:
             # try again without noatime
             try:
                 return file_contents_ro_filepath(db_path)
             except OSError:
                 raise BadObject(sha)
             # didn't work because of our flag, don't try it again
             self._fd_open_flags = 0
         else:
             raise BadObject(sha)
Exemplo n.º 24
0
 def _map_loose_object(self, sha):
     """
     :return: memory map of that file to allow random read access
     :raise BadObject: if object could not be located"""
     db_path = self.db_path(self.object_path(bin_to_hex(sha)))
     try:
         return file_contents_ro_filepath(db_path, flags=self._fd_open_flags)
     except OSError as e:
         if e.errno != ENOENT:
             # try again without noatime
             try:
                 return file_contents_ro_filepath(db_path)
             except OSError:
                 raise BadObject(sha)
             # didn't work because of our flag, don't try it again
             self._fd_open_flags = 0
         else:
             raise BadObject(sha)
Exemplo n.º 25
0
Arquivo: util.py Projeto: euneon/kenja
def commit_from_binsha(repo, binsha, org_commit, parents=None):
    env = os.environ

    author_date = "%d %s" % (org_commit.authored_date, altz_to_utctz_str(org_commit.author_tz_offset))
    env[Commit.env_author_date] = author_date

    committer_date = "%d %s" % (org_commit.committed_date, altz_to_utctz_str(org_commit.committer_tz_offset))
    env[Commit.env_committer_date] = committer_date

    env[Actor.env_author_name] = org_commit.author.name.encode(org_commit.encoding)
    env[Actor.env_author_email] = org_commit.author.email or ""

    env[Actor.env_committer_name] = org_commit.committer.name.encode(org_commit.encoding)
    env[Actor.env_committer_email] = org_commit.committer.email or ""

    message = org_commit.message.encode(org_commit.encoding)

    tree = Tree.new(repo, bin_to_hex(binsha))

    return Commit.create_from_tree(repo, tree, message, parents, True)
Exemplo n.º 26
0
def commit_from_binsha(repo, binsha, org_commit, parents=None):
    message = org_commit.message.encode(org_commit.encoding)

    tree = Tree.new(repo, bin_to_hex(binsha))

    new_commit = Commit(repo, Commit.NULL_BIN_SHA, tree,
                        org_commit.author, org_commit.authored_date, org_commit.author_tz_offset,
                        org_commit.committer, org_commit.committed_date, org_commit.committer_tz_offset,
                        message, parents, org_commit.encoding)
    stream = StringIO()
    new_commit._serialize(stream)
    streamlen = stream.tell()
    stream.seek(0)
    istream = repo.odb.store(IStream(Commit.type, streamlen, stream))
    new_commit.binsha = istream.binsha

    try:
        repo.head.set_commit(new_commit, logmsg="commit: %s" % message)
    except ValueError:
        master = git.refs.Head.create(repo, repo.head.ref, new_commit, logmsg="commit (initial): %s" % message)
        repo.head.set_reference(master, logmsg='commit: Switching to %s' % master)

    return new_commit
Exemplo n.º 27
0
    def test_loose_correctness(self):
        """based on the pack(s) of our packed object DB, we will just copy and verify all objects in the back
        into the loose object db (memory).
        This should help finding dormant issues like this one https://github.com/gitpython-developers/GitPython/issues/220
        faster
        :note: It doesn't seem this test can find the issue unless the given pack contains highly compressed
        data files, like archives."""
        from gitdb.util import bin_to_hex
        pdb = GitDB(os.path.join(self.gitrepopath, 'objects'))
        mdb = MemoryDB()
        for c, sha in enumerate(pdb.sha_iter()):
            ostream = pdb.stream(sha)
            # the issue only showed on larger files which are hardly compressible ...
            if ostream.type != str_blob_type:
                continue
            istream = IStream(ostream.type, ostream.size, ostream.stream)
            mdb.store(istream)
            assert istream.binsha == sha, "Failed on object %s" % bin_to_hex(sha).decode('ascii')
            # this can fail ... sometimes, so the packs dataset should be huge
            assert len(mdb.stream(sha).read()) == ostream.size

            if c and c % 1000 == 0:
                print("Verified %i loose object compression/decompression cycles" % c, file=sys.stderr)
            mdb._cache.clear()
Exemplo n.º 28
0
 def has_object(self, sha):
     try:
         self.readable_db_object_path(bin_to_hex(sha))
         return True
     except BadObject:
         return False
Exemplo n.º 29
0
Arquivo: db.py Projeto: daleha/git-kit
	def stream(self, sha):
		"""For now, all lookup is done by pygit itself"""
		hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
		return OStream(hex_to_bin(hexsha), typename, size, stream)
Exemplo n.º 30
0
 def __str__(self):
     """:return: string of our SHA1 as understood by all git commands"""
     return bin_to_hex(self.binsha)
Exemplo n.º 31
0
    def test_rev_parse(self):
        rev_parse = self.rorepo.rev_parse

        # try special case: This one failed at some point, make sure its fixed
        assert rev_parse(
            "33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781"

        # start from reference
        num_resolved = 0

        for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)):
            path_tokens = ref.path.split("/")
            for pt in range(len(path_tokens)):
                path_section = '/'.join(path_tokens[-(pt + 1):])
                try:
                    obj = self._assert_rev_parse(path_section)
                    assert obj.type == ref.object.type
                    num_resolved += 1
                except (BadName, BadObject):
                    print("failed on %s" % path_section)
                    # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112
                    pass
                # END exception handling
            # END for each token
            if ref_no == 3 - 1:
                break
        # END for each reference
        assert num_resolved

        # it works with tags !
        tag = self._assert_rev_parse('0.1.4')
        assert tag.type == 'tag'

        # try full sha directly ( including type conversion )
        assert tag.object == rev_parse(tag.object.hexsha)
        self._assert_rev_parse_types(tag.object.hexsha, tag.object)

        # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES
        rev = '0.1.4^{tree}^{tree}'
        assert rev_parse(rev) == tag.object.tree
        assert rev_parse(rev + ':CHANGES') == tag.object.tree['CHANGES']

        # try to get parents from first revision - it should fail as no such revision
        # exists
        first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781"
        commit = rev_parse(first_rev)
        assert len(commit.parents) == 0
        assert commit.hexsha == first_rev
        self.failUnlessRaises(BadName, rev_parse, first_rev + "~")
        self.failUnlessRaises(BadName, rev_parse, first_rev + "^")

        # short SHA1
        commit2 = rev_parse(first_rev[:20])
        assert commit2 == commit
        commit2 = rev_parse(first_rev[:5])
        assert commit2 == commit

        # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one
        # needs a tag which points to a blob

        # ref^0 returns commit being pointed to, same with ref~0, and ^{}
        tag = rev_parse('0.1.4')
        for token in (('~0', '^0', '^{}')):
            assert tag.object == rev_parse('0.1.4%s' % token)
        # END handle multiple tokens

        # try partial parsing
        max_items = 40
        for i, binsha in enumerate(self.rorepo.odb.sha_iter()):
            assert rev_parse(
                bin_to_hex(binsha)[:8 -
                                   (i % 2)].decode('ascii')).binsha == binsha
            if i > max_items:
                # this is rather slow currently, as rev_parse returns an object
                # which requires accessing packs, it has some additional overhead
                break
        # END for each binsha in repo

        # missing closing brace commit^{tree
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree')

        # missing starting brace
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}')

        # REVLOG
        #######
        head = self.rorepo.head

        # need to specify a ref when using the @ syntax
        self.failUnlessRaises(BadObject, rev_parse,
                              "%s@{0}" % head.commit.hexsha)

        # uses HEAD.ref by default
        assert rev_parse('@{0}') == head.commit
        if not head.is_detached:
            refspec = '%s@{0}' % head.ref.name
            assert rev_parse(refspec) == head.ref.commit
            # all additional specs work as well
            assert rev_parse(refspec + "^{tree}") == head.commit.tree
            assert rev_parse(refspec + ":CHANGES").type == 'blob'
        # END operate on non-detached head

        # position doesn't exist
        self.failUnlessRaises(IndexError, rev_parse, '@{10000}')

        # currently, nothing more is supported
        self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}")

        # the last position
        assert rev_parse('@{1}') != head.commit
	def test_rev_parse(self):
		rev_parse = self.rorepo.rev_parse
		
		# try special case: This one failed beforehand
		assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781"
		
		# start from reference
		num_resolved = 0
		for ref in Reference.iter_items(self.rorepo):
			path_tokens = ref.path.split("/")
			for pt in range(len(path_tokens)):
				path_section = '/'.join(path_tokens[-(pt+1):]) 
				try:
					obj = self._assert_rev_parse(path_section)
					assert obj.type == ref.object.type
					num_resolved += 1
				except BadObject:
					print "failed on %s" % path_section
					# is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112
					pass
				# END exception handling
			# END for each token
		# END for each reference
		assert num_resolved
		
		# it works with tags !
		tag = self._assert_rev_parse('0.1.4')
		assert tag.type == 'tag'
		
		# try full sha directly ( including type conversion )
		assert tag.object == rev_parse(tag.object.hexsha)
		self._assert_rev_parse_types(tag.object.hexsha, tag.object)
		
		
		# multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES
		rev = '0.1.4^{tree}^{tree}'
		assert rev_parse(rev) == tag.object.tree
		assert rev_parse(rev+':CHANGES') == tag.object.tree['CHANGES']
		
		
		# try to get parents from first revision - it should fail as no such revision
		# exists
		first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781"
		commit = rev_parse(first_rev)
		assert len(commit.parents) == 0
		assert commit.hexsha == first_rev
		self.failUnlessRaises(BadObject, rev_parse, first_rev+"~")
		self.failUnlessRaises(BadObject, rev_parse, first_rev+"^")
		
		# short SHA1
		commit2 = rev_parse(first_rev[:20])
		assert commit2 == commit
		commit2 = rev_parse(first_rev[:5])
		assert commit2 == commit
		
		
		# todo: dereference tag into a blob 0.1.7^{blob} - quite a special one
		# needs a tag which points to a blob
		
		
		# ref^0 returns commit being pointed to, same with ref~0, and ^{}
		tag = rev_parse('0.1.4')
		for token in (('~0', '^0', '^{}')):
			assert tag.object == rev_parse('0.1.4%s' % token)
		# END handle multiple tokens
		
		# try partial parsing
		max_items = 40
		for i, binsha in enumerate(self.rorepo.odb.sha_iter()):
			assert rev_parse(bin_to_hex(binsha)[:8-(i%2)]).binsha == binsha
			if i > max_items:
				# this is rather slow currently, as rev_parse returns an object
				# which requires accessing packs, it has some additional overhead
				break
		# END for each binsha in repo
		
		# missing closing brace commit^{tree
		self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree')
		
		# missing starting brace
		self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}')
		
		
		# cannot handle rev-log for now 
		self.failUnlessRaises(ValueError, rev_parse, "hi@there")
Exemplo n.º 33
0
 def __str__(self):
     """:return: string of our SHA1 as understood by all git commands"""
     return bin_to_hex(self.binsha)
Exemplo n.º 34
0
 def info(self, sha):
     hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
     return OInfo(hex_to_bin(hexsha), typename, size)
Exemplo n.º 35
0
 def hexsha(self):
     """:return: our sha, hex encoded, 40 bytes"""
     return bin_to_hex(self[0])
Exemplo n.º 36
0
 def hexsha(self):
     return bin_to_hex(self[0])
Exemplo n.º 37
0
	def test_large_data_streaming(self, path):
		ldb = LooseObjectDB(path)
		string_ios = list()			# list of streams we previously created
		
		# serial mode 
		for randomize in range(2):
			desc = (randomize and 'random ') or ''
			print >> sys.stderr, "Creating %s data ..." % desc
			st = time()
			size, stream = make_memory_file(self.large_data_size_bytes, randomize)
			elapsed = time() - st
			print >> sys.stderr, "Done (in %f s)" % elapsed
			string_ios.append(stream)
			
			# writing - due to the compression it will seem faster than it is 
			st = time()
			sha = ldb.store(IStream('blob', size, stream)).binsha
			elapsed_add = time() - st
			assert ldb.has_object(sha)
			db_file = ldb.readable_db_object_path(bin_to_hex(sha))
			fsize_kib = os.path.getsize(db_file) / 1000
			
			
			size_kib = size / 1000
			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
			
			# reading all at once
			st = time()
			ostream = ldb.stream(sha)
			shadata = ostream.read()
			elapsed_readall = time() - st
			
			stream.seek(0)
			assert shadata == stream.getvalue()
			print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
			
			
			# reading in chunks of 1 MiB
			cs = 512*1000
			chunks = list()
			st = time()
			ostream = ldb.stream(sha)
			while True:
				data = ostream.read(cs)
				chunks.append(data)
				if len(data) < cs:
					break
			# END read in chunks
			elapsed_readchunks = time() - st
			
			stream.seek(0)
			assert ''.join(chunks) == stream.getvalue()
			
			cs_kib = cs / 1000
			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
			
			# del db file so we keep something to do
			os.remove(db_file)
		# END for each randomization factor
		
		
		# multi-threaded mode
		# want two, should be supported by most of todays cpus
		pool.set_size(2)
		total_kib = 0
		nsios = len(string_ios)
		for stream in string_ios:
			stream.seek(0)
			total_kib += len(stream.getvalue()) / 1000
		# END rewind
		
		def istream_iter():
			for stream in string_ios:
				stream.seek(0)
				yield IStream(str_blob_type, len(stream.getvalue()), stream)
			# END for each stream
		# END util
		
		# write multiple objects at once, involving concurrent compression
		reader = IteratorReader(istream_iter())
		istream_reader = ldb.store_async(reader)
		istream_reader.task().max_chunksize = 1
		
		st = time()
		istreams = istream_reader.read(nsios)
		assert len(istreams) == nsios
		elapsed = time() - st
		
		print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
		
		# decompress multiple at once, by reading them
		# chunk size is not important as the stream will not really be decompressed
		
		# until its read
		istream_reader = IteratorReader(iter([ i.binsha for i in istreams ]))
		ostream_reader = ldb.stream_async(istream_reader)
		
		chunk_task = TestStreamReader(ostream_reader, "chunker", None)
		output_reader = pool.add_task(chunk_task)
		output_reader.task().max_chunksize = 1
		
		st = time()
		assert len(output_reader.read(nsios)) == nsios
		elapsed = time() - st
		
		print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
		
		# store the files, and read them back. For the reading, we use a task 
		# as well which is chunked into one item per task. Reading all will
		# very quickly result in two threads handling two bytestreams of 
		# chained compression/decompression streams
		reader = IteratorReader(istream_iter())
		istream_reader = ldb.store_async(reader)
		istream_reader.task().max_chunksize = 1
		
		istream_to_sha = lambda items: [ i.binsha for i in items ]
		istream_reader.set_post_cb(istream_to_sha)
		
		ostream_reader = ldb.stream_async(istream_reader)
		
		chunk_task = TestStreamReader(ostream_reader, "chunker", None)
		output_reader = pool.add_task(chunk_task)
		output_reader.max_chunksize = 1
		
		st = time()
		assert len(output_reader.read(nsios)) == nsios
		elapsed = time() - st
		
		print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % (pool.size(), total_kib, elapsed, total_kib / elapsed)
Exemplo n.º 38
0
# for item in repo.odb.sha_iter():
#     info = repo.odb.info(item);
#     shaBin = info.binsha
#     shaStr = info.hexsha.decode('ascii')
#     type = info.type
#     typeStr = type.decode('ascii')
#     if typeStr == 'commit':
#         # tree = Tree(repo, shaBin)
#         print(info.binsha, ":", shaStr)

git = repo.git
print(git.execute('git cat-file -t ba9a146e9da6f4aa1687f1a8a78f25e0cb748dff'))

tree = Tree(repo, hex_to_bin('ba9a146e9da6f4aa1687f1a8a78f25e0cb748dff'))
for shaBin, model, name in tree._cache:
    print(bin_to_hex(shaBin), name)

# print("--------------")
# comm = Commit(repo, b'\x07$\xe5\x92f\xcb]\x97\xd4\xe9s\x9c\xaf\xa0b\xfb\n1\xa1\x95')
# print(comm.message)
# print(comm.hexsha)
# print(comm.tree)
# print(comm.parents)
# print(comm.committer.name, comm.committer.email)
# print(comm.author.name, comm.author.email)
#
# blob = Blob(repo, b'\xedH\xeb\xa2\x92w\xb0w\xf592\x81\xe5_\xe8\xf77\x87c\x8c')
# print(blob.data_stream.read().decode('utf-8'))
# print(blob.hexsha)

# commit=repo.active_branch.commit
Exemplo n.º 39
0
 def hexsha(self):
     """:return: our sha, hex encoded, 40 bytes"""
     return bin_to_hex(self[0])
Exemplo n.º 40
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     # b2a_hex produces bytes
     return bin_to_hex(self.binsha).decode('ascii')
Exemplo n.º 41
0
 def _assert_index_entries(self, entries, trees):
     index = IndexFile.from_tree(self.rorepo, *[self.rorepo.tree(bin_to_hex(t).decode('ascii')) for t in trees])
     assert entries
     assert len(index.entries) == len(entries)
     for entry in entries:
         assert (entry.path, entry.stage) in index.entries
Exemplo n.º 42
0
        children = []
        for parent in commit.parents:
            if parent.hexsha not in visited:
                children.append(parent.hexsha)

        if children:
            nodes.extend(children)
        else:
            nodes.pop()
            visited.add(node)
            post.append(node)

    return post


if __name__ == '__main__':
    repo = Repo.init('test_git')
    # (mode, binsha) = write_tree(repo.odb, 'temp')

    # (mode, binsha) = write_tree(repo.odb, 'temp/00')
    # (mode, binsha) = write_tree(repo.odb, 'temp/01')

    paths = ['temp/00', 'temp/01']
    names = ['a', 'b']

    (mode, binsha) = write_paths(repo.odb, paths, names)

    tree = Tree.new(repo, bin_to_hex(binsha))
    c = Commit.create_from_tree(repo, tree, 'test commit', None, True)
Exemplo n.º 43
0
	def test_large_data_streaming(self, rwrepo):
		# TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
		# It should be shared if possible
		ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))
		
		for randomize in range(2):
			desc = (randomize and 'random ') or ''
			print >> sys.stderr, "Creating %s data ..." % desc
			st = time()
			size, stream = make_memory_file(self.large_data_size_bytes, randomize)
			elapsed = time() - st
			print >> sys.stderr, "Done (in %f s)" % elapsed
			
			# writing - due to the compression it will seem faster than it is 
			st = time()
			binsha = ldb.store(IStream('blob', size, stream)).binsha
			elapsed_add = time() - st
			assert ldb.has_object(binsha)
			db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
			fsize_kib = os.path.getsize(db_file) / 1000
			
			
			size_kib = size / 1000
			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)
			
			# reading all at once
			st = time()
			ostream = ldb.stream(binsha)
			shadata = ostream.read()
			elapsed_readall = time() - st
			
			stream.seek(0)
			assert shadata == stream.getvalue()
			print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, elapsed_readall, size_kib / elapsed_readall)
			
			
			# reading in chunks of 1 MiB
			cs = 512*1000
			chunks = list()
			st = time()
			ostream = ldb.stream(binsha)
			while True:
				data = ostream.read(cs)
				chunks.append(data)
				if len(data) < cs:
					break
			# END read in chunks
			elapsed_readchunks = time() - st
			
			stream.seek(0)
			assert ''.join(chunks) == stream.getvalue()
			
			cs_kib = cs / 1000
			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, elapsed_readchunks, size_kib / elapsed_readchunks)
			
			# del db file so git has something to do
			os.remove(db_file)
			
			# VS. CGIT 
			##########
			# CGIT ! Can using the cgit programs be faster ?
			proc = rwrepo.git.hash_object('-w', '--stdin', as_process=True, istream=subprocess.PIPE)
			
			# write file - pump everything in at once to be a fast as possible
			data = stream.getvalue()	# cache it
			st = time()
			proc.stdin.write(data)
			proc.stdin.close()
			gitsha = proc.stdout.read().strip()
			proc.wait()
			gelapsed_add = time() - st
			del(data)
			assert gitsha == bin_to_hex(binsha)		# we do it the same way, right ?
			
			#  as its the same sha, we reuse our path
			fsize_kib = os.path.getsize(db_file) / 1000
			print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)" % (size_kib, fsize_kib, desc, gelapsed_add, size_kib / gelapsed_add)
			
			# compare ... 
			print >> sys.stderr, "Git-Python is %f %% faster than git when adding big %s files" % (100.0 - (elapsed_add / gelapsed_add) * 100, desc)
			
			
			# read all
			st = time()
			s, t, size, data = rwrepo.git.get_object_data(gitsha)
			gelapsed_readall = time() - st
			print >> sys.stderr, "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, gelapsed_readall, size_kib / gelapsed_readall)

			# compare 
			print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %sfiles" % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc)
			
			
			# read chunks
			st = time()
			s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
			while True:
				data = stream.read(cs)
				if len(data) < cs:
					break
			# END read stream
			gelapsed_readchunks = time() - st
			print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)" % (size_kib, desc, cs_kib, gelapsed_readchunks, size_kib / gelapsed_readchunks)
			
			# compare 
			print >> sys.stderr, "Git-Python is %f %% faster than git when reading big %s files in chunks" % (100.0 - (elapsed_readchunks / gelapsed_readchunks) * 100, desc)
Exemplo n.º 44
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     return bin_to_hex(self.binsha)
Exemplo n.º 45
0
    def test_large_data_streaming(self, rwrepo):
        # TODO: This part overlaps with the same file in gitdb.test.performance.test_stream
        # It should be shared if possible
        ldb = LooseObjectDB(os.path.join(rwrepo.git_dir, 'objects'))

        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print("Creating %s data ..." % desc, file=sys.stderr)
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes,
                                            randomize)
            elapsed = time() - st
            print("Done (in %f s)" % elapsed, file=sys.stderr)

            # writing - due to the compression it will seem faster than it is
            st = time()
            binsha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(binsha)
            db_file = ldb.readable_db_object_path(bin_to_hex(binsha))
            fsize_kib = os.path.getsize(db_file) / 1000

            size_kib = size / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, elapsed_add,
                    size_kib / elapsed_add)
            print(msg, file=sys.stderr)

            # reading all at once
            st = time()
            ostream = ldb.stream(binsha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            msg = "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, elapsed_readall,
                    size_kib / elapsed_readall)
            print(msg, file=sys.stderr)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(binsha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert b''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print(
                "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)"
                % (size_kib, desc, cs_kib, elapsed_readchunks,
                   size_kib / elapsed_readchunks),
                file=sys.stderr)

            # del db file so git has something to do
            os.remove(db_file)

            # VS. CGIT
            ##########
            # CGIT ! Can using the cgit programs be faster ?
            proc = rwrepo.git.hash_object('-w',
                                          '--stdin',
                                          as_process=True,
                                          istream=subprocess.PIPE)

            # write file - pump everything in at once to be a fast as possible
            data = stream.getvalue()  # cache it
            st = time()
            proc.stdin.write(data)
            proc.stdin.close()
            gitsha = proc.stdout.read().strip()
            proc.wait()
            gelapsed_add = time() - st
            del (data)
            assert gitsha == bin_to_hex(
                binsha)  # we do it the same way, right ?

            #  as its the same sha, we reuse our path
            fsize_kib = os.path.getsize(db_file) / 1000
            msg = "Added %i KiB (filesize = %i KiB) of %s data to using git-hash-object in %f s ( %f Write KiB / s)"
            msg %= (size_kib, fsize_kib, desc, gelapsed_add,
                    size_kib / gelapsed_add)
            print(msg, file=sys.stderr)

            # compare ...
            print(
                "Git-Python is %f %% faster than git when adding big %s files"
                % (100.0 - (elapsed_add / gelapsed_add) * 100, desc),
                file=sys.stderr)

            # read all
            st = time()
            s, t, size, data = rwrepo.git.get_object_data(gitsha)
            gelapsed_readall = time() - st
            print(
                "Read %i KiB of %s data at once using git-cat-file in %f s ( %f Read KiB / s)"
                % (size_kib, desc, gelapsed_readall,
                   size_kib / gelapsed_readall),
                file=sys.stderr)

            # compare
            print(
                "Git-Python is %f %% faster than git when reading big %sfiles"
                % (100.0 - (elapsed_readall / gelapsed_readall) * 100, desc),
                file=sys.stderr)

            # read chunks
            st = time()
            s, t, size, stream = rwrepo.git.stream_object_data(gitsha)
            while True:
                data = stream.read(cs)
                if len(data) < cs:
                    break
            # END read stream
            gelapsed_readchunks = time() - st
            msg = "Read %i KiB of %s data in %i KiB chunks from git-cat-file in %f s ( %f Read KiB / s)"
            msg %= (size_kib, desc, cs_kib, gelapsed_readchunks,
                    size_kib / gelapsed_readchunks)
            print(msg, file=sys.stderr)

            # compare
            print(
                "Git-Python is %f %% faster than git when reading big %s files in chunks"
                % (100.0 -
                   (elapsed_readchunks / gelapsed_readchunks) * 100, desc),
                file=sys.stderr)
Exemplo n.º 46
0
    def test_rev_parse(self):
        rev_parse = self.rorepo.rev_parse

        # try special case: This one failed at some point, make sure its fixed
        assert rev_parse("33ebe").hexsha == "33ebe7acec14b25c5f84f35a664803fcab2f7781"

        # start from reference
        num_resolved = 0

        for ref_no, ref in enumerate(Reference.iter_items(self.rorepo)):
            path_tokens = ref.path.split("/")
            for pt in range(len(path_tokens)):
                path_section = '/'.join(path_tokens[-(pt + 1):])
                try:
                    obj = self._assert_rev_parse(path_section)
                    assert obj.type == ref.object.type
                    num_resolved += 1
                except (BadName, BadObject):
                    print("failed on %s" % path_section)
                    # is fine, in case we have something like 112, which belongs to remotes/rname/merge-requests/112
                    pass
                # END exception handling
            # END for each token
            if ref_no == 3 - 1:
                break
        # END for each reference
        assert num_resolved

        # it works with tags !
        tag = self._assert_rev_parse('0.1.4')
        assert tag.type == 'tag'

        # try full sha directly ( including type conversion )
        assert tag.object == rev_parse(tag.object.hexsha)
        self._assert_rev_parse_types(tag.object.hexsha, tag.object)

        # multiple tree types result in the same tree: HEAD^{tree}^{tree}:CHANGES
        rev = '0.1.4^{tree}^{tree}'
        assert rev_parse(rev) == tag.object.tree
        assert rev_parse(rev + ':CHANGES') == tag.object.tree['CHANGES']

        # try to get parents from first revision - it should fail as no such revision
        # exists
        first_rev = "33ebe7acec14b25c5f84f35a664803fcab2f7781"
        commit = rev_parse(first_rev)
        assert len(commit.parents) == 0
        assert commit.hexsha == first_rev
        self.failUnlessRaises(BadName, rev_parse, first_rev + "~")
        self.failUnlessRaises(BadName, rev_parse, first_rev + "^")

        # short SHA1
        commit2 = rev_parse(first_rev[:20])
        assert commit2 == commit
        commit2 = rev_parse(first_rev[:5])
        assert commit2 == commit

        # todo: dereference tag into a blob 0.1.7^{blob} - quite a special one
        # needs a tag which points to a blob

        # ref^0 returns commit being pointed to, same with ref~0, and ^{}
        tag = rev_parse('0.1.4')
        for token in (('~0', '^0', '^{}')):
            assert tag.object == rev_parse('0.1.4%s' % token)
        # END handle multiple tokens

        # try partial parsing
        max_items = 40
        for i, binsha in enumerate(self.rorepo.odb.sha_iter()):
            assert rev_parse(bin_to_hex(binsha)[:8 - (i % 2)].decode('ascii')).binsha == binsha
            if i > max_items:
                # this is rather slow currently, as rev_parse returns an object
                # which requires accessing packs, it has some additional overhead
                break
        # END for each binsha in repo

        # missing closing brace commit^{tree
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^{tree')

        # missing starting brace
        self.failUnlessRaises(ValueError, rev_parse, '0.1.4^tree}')

        # REVLOG
        #######
        head = self.rorepo.head

        # need to specify a ref when using the @ syntax
        self.failUnlessRaises(BadObject, rev_parse, "%s@{0}" % head.commit.hexsha)

        # uses HEAD.ref by default
        assert rev_parse('@{0}') == head.commit
        if not head.is_detached:
            refspec = '%s@{0}' % head.ref.name
            assert rev_parse(refspec) == head.ref.commit
            # all additional specs work as well
            assert rev_parse(refspec + "^{tree}") == head.commit.tree
            assert rev_parse(refspec + ":CHANGES").type == 'blob'
        # END operate on non-detached head

        # position doesn't exist
        self.failUnlessRaises(IndexError, rev_parse, '@{10000}')

        # currently, nothing more is supported
        self.failUnlessRaises(NotImplementedError, rev_parse, "@{1 week ago}")

        # the last position
        assert rev_parse('@{1}') != head.commit
Exemplo n.º 47
0
 def hexsha(self):
     return bin_to_hex(self[0])
Exemplo n.º 48
0
    def test_large_data_streaming(self, path):
        ldb = LooseObjectDB(path)
        string_ios = list()  # list of streams we previously created

        # serial mode
        for randomize in range(2):
            desc = (randomize and 'random ') or ''
            print >> sys.stderr, "Creating %s data ..." % desc
            st = time()
            size, stream = make_memory_file(self.large_data_size_bytes,
                                            randomize)
            elapsed = time() - st
            print >> sys.stderr, "Done (in %f s)" % elapsed
            string_ios.append(stream)

            # writing - due to the compression it will seem faster than it is
            st = time()
            sha = ldb.store(IStream('blob', size, stream)).binsha
            elapsed_add = time() - st
            assert ldb.has_object(sha)
            db_file = ldb.readable_db_object_path(bin_to_hex(sha))
            fsize_kib = os.path.getsize(db_file) / 1000

            size_kib = size / 1000
            print >> sys.stderr, "Added %i KiB (filesize = %i KiB) of %s data to loose odb in %f s ( %f Write KiB / s)" % (
                size_kib, fsize_kib, desc, elapsed_add, size_kib / elapsed_add)

            # reading all at once
            st = time()
            ostream = ldb.stream(sha)
            shadata = ostream.read()
            elapsed_readall = time() - st

            stream.seek(0)
            assert shadata == stream.getvalue()
            print >> sys.stderr, "Read %i KiB of %s data at once from loose odb in %f s ( %f Read KiB / s)" % (
                size_kib, desc, elapsed_readall, size_kib / elapsed_readall)

            # reading in chunks of 1 MiB
            cs = 512 * 1000
            chunks = list()
            st = time()
            ostream = ldb.stream(sha)
            while True:
                data = ostream.read(cs)
                chunks.append(data)
                if len(data) < cs:
                    break
            # END read in chunks
            elapsed_readchunks = time() - st

            stream.seek(0)
            assert ''.join(chunks) == stream.getvalue()

            cs_kib = cs / 1000
            print >> sys.stderr, "Read %i KiB of %s data in %i KiB chunks from loose odb in %f s ( %f Read KiB / s)" % (
                size_kib, desc, cs_kib, elapsed_readchunks,
                size_kib / elapsed_readchunks)

            # del db file so we keep something to do
            os.remove(db_file)
        # END for each randomization factor

        # multi-threaded mode
        # want two, should be supported by most of todays cpus
        pool.set_size(2)
        total_kib = 0
        nsios = len(string_ios)
        for stream in string_ios:
            stream.seek(0)
            total_kib += len(stream.getvalue()) / 1000
        # END rewind

        def istream_iter():
            for stream in string_ios:
                stream.seek(0)
                yield IStream(str_blob_type, len(stream.getvalue()), stream)
            # END for each stream

        # END util

        # write multiple objects at once, involving concurrent compression
        reader = IteratorReader(istream_iter())
        istream_reader = ldb.store_async(reader)
        istream_reader.task().max_chunksize = 1

        st = time()
        istreams = istream_reader.read(nsios)
        assert len(istreams) == nsios
        elapsed = time() - st

        print >> sys.stderr, "Threads(%i): Compressed %i KiB of data in loose odb in %f s ( %f Write KiB / s)" % (
            pool.size(), total_kib, elapsed, total_kib / elapsed)

        # decompress multiple at once, by reading them
        # chunk size is not important as the stream will not really be decompressed

        # until its read
        istream_reader = IteratorReader(iter([i.binsha for i in istreams]))
        ostream_reader = ldb.stream_async(istream_reader)

        chunk_task = TestStreamReader(ostream_reader, "chunker", None)
        output_reader = pool.add_task(chunk_task)
        output_reader.task().max_chunksize = 1

        st = time()
        assert len(output_reader.read(nsios)) == nsios
        elapsed = time() - st

        print >> sys.stderr, "Threads(%i): Decompressed %i KiB of data in loose odb in %f s ( %f Read KiB / s)" % (
            pool.size(), total_kib, elapsed, total_kib / elapsed)

        # store the files, and read them back. For the reading, we use a task
        # as well which is chunked into one item per task. Reading all will
        # very quickly result in two threads handling two bytestreams of
        # chained compression/decompression streams
        reader = IteratorReader(istream_iter())
        istream_reader = ldb.store_async(reader)
        istream_reader.task().max_chunksize = 1

        istream_to_sha = lambda items: [i.binsha for i in items]
        istream_reader.set_post_cb(istream_to_sha)

        ostream_reader = ldb.stream_async(istream_reader)

        chunk_task = TestStreamReader(ostream_reader, "chunker", None)
        output_reader = pool.add_task(chunk_task)
        output_reader.max_chunksize = 1

        st = time()
        assert len(output_reader.read(nsios)) == nsios
        elapsed = time() - st

        print >> sys.stderr, "Threads(%i): Compressed and decompressed and read %i KiB of data in loose odb in %f s ( %f Combined KiB / s)" % (
            pool.size(), total_kib, elapsed, total_kib / elapsed)
Exemplo n.º 49
0
 def has_object(self, sha):
     try:
         self.readable_db_object_path(bin_to_hex(sha))
         return True
     except BadObject:
         return False
Exemplo n.º 50
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     return bin_to_hex(self.binsha)
Exemplo n.º 51
0
 def hexsha(self):
     """:return: 40 byte hex version of our 20 byte binary sha"""
     # b2a_hex produces bytes
     return bin_to_hex(self.binsha).decode('ascii')
Exemplo n.º 52
0
 def stream(self, sha):
     """For now, all lookup is done by git itself"""
     hexsha, typename, size, stream = self._git.stream_object_data(
         bin_to_hex(sha))
     return OStream(hex_to_bin(hexsha), typename, size, stream)
Exemplo n.º 53
0
Arquivo: db.py Projeto: daleha/git-kit
	def info(self, sha):
		hexsha, typename, size = self._git.get_object_header(bin_to_hex(sha))
		return OInfo(hex_to_bin(hexsha), typename, size)