示例#1
0
    def _assert_index_file(self, index, version, size):
        assert index.packfile_checksum() != index.indexfile_checksum()
        assert len(index.packfile_checksum()) == 20
        assert len(index.indexfile_checksum()) == 20
        assert index.version() == version
        assert index.size() == size
        assert len(index.offsets()) == size

        # get all data of all objects
        for oidx in xrange(index.size()):
            sha = index.sha(oidx)
            assert oidx == index.sha_to_index(sha)

            entry = index.entry(oidx)
            assert len(entry) == 3

            assert entry[0] == index.offset(oidx)
            assert entry[1] == sha
            assert entry[2] == index.crc(oidx)

            # verify partial sha
            for l in (4, 8, 11, 17, 20):
                assert index.partial_sha_to_index(sha[:l], l * 2) == oidx

        # END for each object index in indexfile
        self.failUnlessRaises(ValueError, index.partial_sha_to_index, "\0", 2)
示例#2
0
文件: lib.py 项目: corserp/greet
    def _assert_object_writing_simple(self, db):
        # write a bunch of objects and query their streams and info
        null_objs = db.size()
        ni = 250
        for i in xrange(ni):
            data = pack(">L", i)
            istream = IStream(str_blob_type, len(data), BytesIO(data))
            new_istream = db.store(istream)
            assert new_istream is istream
            assert db.has_object(istream.binsha)

            info = db.info(istream.binsha)
            assert isinstance(info, OInfo)
            assert info.type == istream.type and info.size == istream.size

            stream = db.stream(istream.binsha)
            assert isinstance(stream, OStream)
            assert stream.binsha == info.binsha and stream.type == info.type
            assert stream.read() == data
        # END for each item

        assert db.size() == null_objs + ni
        shas = list(db.sha_iter())
        assert len(shas) == db.size()
        assert len(shas[0]) == 20
示例#3
0
文件: lib.py 项目: Kronuz/gitdb
    def _assert_object_writing_simple(self, db):
        # write a bunch of objects and query their streams and info
        null_objs = db.size()
        ni = 250
        for i in xrange(ni):
            data = pack(">L", i)
            istream = IStream(str_blob_type, len(data), BytesIO(data))
            new_istream = db.store(istream)
            assert new_istream is istream
            assert db.has_object(istream.binsha)

            info = db.info(istream.binsha)
            assert isinstance(info, OInfo)
            assert info.type == istream.type and info.size == istream.size

            stream = db.stream(istream.binsha)
            assert isinstance(stream, OStream)
            assert stream.binsha == info.binsha and stream.type == info.type
            assert stream.read() == data
        # END for each item

        assert db.size() == null_objs + ni
        shas = list(db.sha_iter())
        assert len(shas) == db.size()
        assert len(shas[0]) == 20
示例#4
0
 def test_correctness(self):
     pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
     # disabled for now as it used to work perfectly, checking big repositories takes a long time
     print("Endurance run: verify streaming of objects (crc and sha)",
           file=sys.stderr)
     for crc in range(2):
         count = 0
         st = time()
         for entity in pdb.entities():
             pack_verify = entity.is_valid_stream
             sha_by_index = entity.index().sha
             for index in xrange(entity.index().size()):
                 try:
                     assert pack_verify(sha_by_index(index), use_crc=crc)
                     count += 1
                 except UnsupportedOperation:
                     pass
                 # END ignore old indices
             # END for each index
         # END for each entity
         elapsed = time() - st
         print(
             "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" %
             (count, crc, elapsed, count / elapsed),
             file=sys.stderr)
示例#5
0
 def _read_fanout(self, byte_offset):
     """Generate a fanout table from our data"""
     d = self._cursor.map()
     out = list()
     append = out.append
     for i in xrange(256):
         append(unpack_from('>L', d, byte_offset + i * 4)[0])
     # END for each entry
     return out
示例#6
0
文件: pack.py 项目: yarikoptic/gitdb
 def _read_fanout(self, byte_offset):
     """Generate a fanout table from our data"""
     d = self._cursor.map()
     out = list()
     append = out.append
     for i in xrange(256):
         append(unpack_from('>L', d, byte_offset + i * 4)[0])
     # END for each entry
     return out
示例#7
0
    def write(self, pack_sha, write):
        """Write the index file using the given write method
        :param pack_sha: binary sha over the whole pack that we index
        :return: sha1 binary sha over all index file contents"""
        # sort for sha1 hash
        self._objs.sort(key=lambda o: o[0])

        sha_writer = FlexibleSha1Writer(write)
        sha_write = sha_writer.write
        sha_write(PackIndexFile.index_v2_signature)
        sha_write(pack(">L", PackIndexFile.index_version_default))

        # fanout
        tmplist = list((0, ) * 256)  # fanout or list with 64 bit offsets
        for t in self._objs:
            tmplist[byte_ord(t[0][0])] += 1
        # END prepare fanout
        for i in xrange(255):
            v = tmplist[i]
            sha_write(pack('>L', v))
            tmplist[i + 1] += v
        # END write each fanout entry
        sha_write(pack('>L', tmplist[255]))

        # sha1 ordered
        # save calls, that is push them into c
        sha_write(b''.join(t[0] for t in self._objs))

        # crc32
        for t in self._objs:
            sha_write(pack('>L', t[1] & 0xffffffff))
        # END for each crc

        tmplist = list()
        # offset 32
        for t in self._objs:
            ofs = t[2]
            if ofs > 0x7fffffff:
                tmplist.append(ofs)
                ofs = 0x80000000 + len(tmplist) - 1
            # END hande 64 bit offsets
            sha_write(pack('>L', ofs & 0xffffffff))
        # END for each offset

        # offset 64
        for ofs in tmplist:
            sha_write(pack(">Q", ofs))
        # END for each offset

        # trailer
        assert (len(pack_sha) == 20)
        sha_write(pack_sha)
        sha = sha_writer.sha(as_hex=False)
        write(sha)
        return sha
示例#8
0
文件: pack.py 项目: yarikoptic/gitdb
    def write(self, pack_sha, write):
        """Write the index file using the given write method
        :param pack_sha: binary sha over the whole pack that we index
        :return: sha1 binary sha over all index file contents"""
        # sort for sha1 hash
        self._objs.sort(key=lambda o: o[0])

        sha_writer = FlexibleSha1Writer(write)
        sha_write = sha_writer.write
        sha_write(PackIndexFile.index_v2_signature)
        sha_write(pack(">L", PackIndexFile.index_version_default))

        # fanout
        tmplist = list((0,) * 256)                                # fanout or list with 64 bit offsets
        for t in self._objs:
            tmplist[byte_ord(t[0][0])] += 1
        # END prepare fanout
        for i in xrange(255):
            v = tmplist[i]
            sha_write(pack('>L', v))
            tmplist[i + 1] += v
        # END write each fanout entry
        sha_write(pack('>L', tmplist[255]))

        # sha1 ordered
        # save calls, that is push them into c
        sha_write(b''.join(t[0] for t in self._objs))

        # crc32
        for t in self._objs:
            sha_write(pack('>L', t[1] & 0xffffffff))
        # END for each crc

        tmplist = list()
        # offset 32
        for t in self._objs:
            ofs = t[2]
            if ofs > 0x7fffffff:
                tmplist.append(ofs)
                ofs = 0x80000000 + len(tmplist) - 1
            # END hande 64 bit offsets
            sha_write(pack('>L', ofs & 0xffffffff))
        # END for each offset

        # offset 64
        for ofs in tmplist:
            sha_write(pack(">Q", ofs))
        # END for each offset

        # trailer
        assert(len(pack_sha) == 20)
        sha_write(pack_sha)
        sha = sha_writer.sha(as_hex=False)
        write(sha)
        return sha
示例#9
0
文件: lib.py 项目: haukurmar/gitdb
def make_bytes(size_in_bytes, randomize=False):
    """:return: string with given size in bytes
    :param randomize: try to produce a very random stream"""
    actual_size = size_in_bytes // 4
    producer = xrange(actual_size)
    if randomize:
        producer = list(producer)
        random.shuffle(producer)
    # END randomize
    a = array('i', producer)
    return a.tostring()
示例#10
0
def make_bytes(size_in_bytes, randomize=False):
    """:return: string with given size in bytes
    :param randomize: try to produce a very random stream"""
    actual_size = size_in_bytes // 4
    producer = xrange(actual_size)
    if randomize:
        producer = list(producer)
        random.shuffle(producer)
    # END randomize
    a = array('i', producer)
    return a.tobytes()
示例#11
0
文件: pack.py 项目: yarikoptic/gitdb
    def offsets(self):
        """:return: sequence of all offsets in the order in which they were written

        **Note:** return value can be random accessed, but may be immmutable"""
        if self._version == 2:
            # read stream to array, convert to tuple
            a = array.array('I')    # 4 byte unsigned int, long are 8 byte on 64 bit it appears
            a.fromstring(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))

            # networkbyteorder to something array likes more
            if sys.byteorder == 'little':
                a.byteswap()
            return a
        else:
            return tuple(self.offset(index) for index in xrange(self.size()))
示例#12
0
    def offsets(self):
        """:return: sequence of all offsets in the order in which they were written

        **Note:** return value can be random accessed, but may be immmutable"""
        if self._version == 2:
            # read stream to array, convert to tuple
            a = array.array('I')    # 4 byte unsigned int, long are 8 byte on 64 bit it appears
            a.fromstring(buffer(self._cursor.map(), self._pack_offset, self._pack_64_offset - self._pack_offset))

            # networkbyteorder to something array likes more
            if sys.byteorder == 'little':
                a.byteswap()
            return a
        else:
            return tuple(self.offset(index) for index in xrange(self.size()))
示例#13
0
    def compress(self):
        """Alter the list to reduce the amount of nodes. Currently we concatenate
        add-chunks
        :return: self"""
        slen = len(self)
        if slen < 2:
            return self
        i = 0

        first_data_index = None
        while i < slen:
            dc = self[i]
            i += 1
            if dc.data is None:
                if first_data_index is not None and i - 2 - first_data_index > 1:
                    # if first_data_index is not None:
                    nd = StringIO()  # new data
                    so = self[
                        first_data_index].to  # start offset in target buffer
                    for x in xrange(first_data_index, i - 1):
                        xdc = self[x]
                        nd.write(xdc.data[:xdc.ts])
                    # END collect data

                    del (self[first_data_index:i - 1])
                    buf = nd.getvalue()
                    self.insert(first_data_index,
                                DeltaChunk(so, len(buf), 0, buf))

                    slen = len(self)
                    i = first_data_index + 1

                # END concatenate data
                first_data_index = None
                continue
            # END skip non-data chunks

            if first_data_index is None:
                first_data_index = i - 1
        # END iterate list

        # if slen_orig != len(self):
        #   print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
        return self
示例#14
0
文件: fun.py 项目: Kronuz/gitdb
    def compress(self):
        """Alter the list to reduce the amount of nodes. Currently we concatenate
        add-chunks
        :return: self"""
        slen = len(self)
        if slen < 2:
            return self
        i = 0

        first_data_index = None
        while i < slen:
            dc = self[i]
            i += 1
            if dc.data is None:
                if first_data_index is not None and i - 2 - first_data_index > 1:
                    # if first_data_index is not None:
                    nd = StringIO()                     # new data
                    so = self[first_data_index].to      # start offset in target buffer
                    for x in xrange(first_data_index, i - 1):
                        xdc = self[x]
                        nd.write(xdc.data[:xdc.ts])
                    # END collect data

                    del(self[first_data_index:i - 1])
                    buf = nd.getvalue()
                    self.insert(first_data_index, DeltaChunk(so, len(buf), 0, buf))

                    slen = len(self)
                    i = first_data_index + 1

                # END concatenate data
                first_data_index = None
                continue
            # END skip non-data chunks

            if first_data_index is None:
                first_data_index = i - 1
        # END iterate list

        # if slen_orig != len(self):
        #   print "INFO: Reduced delta list len to %f %% of former size" % ((float(len(self)) / slen_orig) * 100)
        return self
示例#15
0
 def test_correctness(self):
     pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
     # disabled for now as it used to work perfectly, checking big repositories takes a long time
     print("Endurance run: verify streaming of objects (crc and sha)", file=sys.stderr)
     for crc in range(2):
         count = 0
         st = time()
         for entity in pdb.entities():
             pack_verify = entity.is_valid_stream
             sha_by_index = entity.index().sha
             for index in xrange(entity.index().size()):
                 try:
                     assert pack_verify(sha_by_index(index), use_crc=crc)
                     count += 1
                 except UnsupportedOperation:
                     pass
                 # END ignore old indices
             # END for each index
         # END for each entity
         elapsed = time() - st
         print("PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed), file=sys.stderr)
示例#16
0
文件: pack.py 项目: haukurmar/gitdb
 def sha_iter(self):
     for entity in self.entities():
         index = entity.index()
         sha_by_index = index.sha
         for index in xrange(index.size()):
             yield sha_by_index(index)
示例#17
0
文件: pack.py 项目: yarikoptic/gitdb
 def _iter_objects(self, as_stream):
     """Iterate over all objects in our index and yield their OInfo or OStream instences"""
     _sha = self._index.sha
     _object = self._object
     for index in xrange(self._index.size()):
         yield _object(_sha(index), as_stream, index)
示例#18
0
 def _iter_objects(self, as_stream):
     """Iterate over all objects in our index and yield their OInfo or OStream instences"""
     _sha = self._index.sha
     _object = self._object
     for index in xrange(self._index.size()):
         yield _object(_sha(index), as_stream, index)
示例#19
0
 def sha_iter(self):
     for entity in self.entities():
         index = entity.index()
         sha_by_index = index.sha
         for index in xrange(index.size()):
             yield sha_by_index(index)