def test_pack_writing(self): # see how fast we can write a pack from object streams. # This will not be fast, as we take time for decompressing the streams as well ostream = CountedNullStream() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) ni = 5000 count = 0 total_size = 0 st = time() objs = list() for sha in pdb.sha_iter(): count += 1 objs.append(pdb.stream(sha)) if count == ni: break #END gather objects for pack-writing elapsed = time() - st print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed) st = time() PackEntity.write_pack(objs, ostream.write) elapsed = time() - st total_kb = ostream.bytes_written() / 1000 print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
def test_pack_writing(self): # see how fast we can write a pack from object streams. # This will not be fast, as we take time for decompressing the streams as well ostream = CountedNullStream() pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack")) ni = 5000 count = 0 total_size = 0 st = time() for sha in pdb.sha_iter(): count += 1 pdb.stream(sha) if count == ni: break #END gather objects for pack-writing elapsed = time() - st print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % ( ni, elapsed, ni / elapsed) st = time() PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()), ostream.write, object_count=ni) elapsed = time() - st total_kb = ostream.bytes_written() / 1000 print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % ( total_kb, elapsed, total_kb / elapsed)
def update_cache(self, force=False): """ Update our cache with the acutally existing packs on disk. Add new ones, and remove deleted ones. We keep the unchanged ones :param force: If True, the cache will be updated even though the directory does not appear to have changed according to its modification timestamp. :return: True if the packs have been updated so there is new information, False if there was no change to the pack database""" stat = os.stat(self.root_path()) if not force and stat.st_mtime <= self._st_mtime: return False # END abort early on no change self._st_mtime = stat.st_mtime # packs are supposed to be prefixed with pack- by git-convention # get all pack files, figure out what changed pack_files = set( glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) our_pack_files = {item[1].pack().path() for item in self._entities} # new packs for pack_file in (pack_files - our_pack_files): # init the hit-counter/priority with the size, a good measure for hit- # probability. Its implemented so that only 12 bytes will be read entity = PackEntity(pack_file) self._entities.append( [entity.pack().size(), entity, entity.index().sha_to_index]) # END for each new packfile # removed packs for pack_file in (our_pack_files - pack_files): del_index = -1 for i, item in enumerate(self._entities): if item[1].pack().path() == pack_file: del_index = i break # END found index # END for each entity assert del_index != -1 del (self._entities[del_index]) # END for each removed pack # reinitialize prioritiess self._sort_entities() return True
def update_cache(self, force=False): """ Update our cache with the acutally existing packs on disk. Add new ones, and remove deleted ones. We keep the unchanged ones :param force: If True, the cache will be updated even though the directory does not appear to have changed according to its modification timestamp. :return: True if the packs have been updated so there is new information, False if there was no change to the pack database""" stat = os.stat(self.root_path()) if not force and stat.st_mtime <= self._st_mtime: return False # END abort early on no change self._st_mtime = stat.st_mtime # packs are supposed to be prefixed with pack- by git-convention # get all pack files, figure out what changed pack_files = set(glob.glob(os.path.join(self.root_path(), "pack-*.pack"))) our_pack_files = set(item[1].pack().path() for item in self._entities) # new packs for pack_file in (pack_files - our_pack_files): # init the hit-counter/priority with the size, a good measure for hit- # probability. Its implemented so that only 12 bytes will be read entity = PackEntity(pack_file) self._entities.append([entity.pack().size(), entity, entity.index().sha_to_index]) # END for each new packfile # removed packs for pack_file in (our_pack_files - pack_files): del_index = -1 for i, item in enumerate(self._entities): if item[1].pack().path() == pack_file: del_index = i break # END found index # END for each entity assert del_index != -1 del(self._entities[del_index]) # END for each removed pack # reinitialize prioritiess self._sort_entities() return True
def test_pack_entity(self): for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), (self.packfile_v2_2, self.packindexfile_v2), (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): packfile, version, size = packinfo indexfile, version, size = indexinfo entity = PackEntity(packfile) assert entity.pack().path() == packfile assert entity.index().path() == indexfile count = 0 for info, stream in izip(entity.info_iter(), entity.stream_iter()): count += 1 assert info.binsha == stream.binsha assert len(info.binsha) == 20 assert info.type_id == stream.type_id assert info.size == stream.size # we return fully resolved items, which is implied by the sha centric access assert not info.type_id in delta_types # try all calls assert len(entity.collect_streams(info.binsha)) oinfo = entity.info(info.binsha) assert isinstance(oinfo, OInfo) assert oinfo.binsha is not None ostream = entity.stream(info.binsha) assert isinstance(ostream, OStream) assert ostream.binsha is not None # verify the stream try: assert entity.is_valid_stream(info.binsha, use_crc=True) except UnsupportedOperation: pass # END ignore version issues assert entity.is_valid_stream(info.binsha, use_crc=False) # END for each info, stream tuple assert count == size
def test_pack_entity(self): for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1), (self.packfile_v2_2, self.packindexfile_v2), (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): packfile, version, size = packinfo indexfile, version, size = indexinfo entity = PackEntity(packfile) assert entity.pack().path() == packfile assert entity.index().path() == indexfile count = 0 for info, stream in izip(entity.info_iter(), entity.stream_iter()): count += 1 assert info.binsha == stream.binsha assert len(info.binsha) == 20 assert info.type_id == stream.type_id assert info.size == stream.size # we return fully resolved items, which is implied by the sha centric access assert not info.type_id in delta_types # try all calls assert len(entity.collect_streams(info.binsha)) oinfo = entity.info(info.binsha) assert isinstance(oinfo, OInfo) assert oinfo.binsha is not None ostream = entity.stream(info.binsha) assert isinstance(ostream, OStream) assert ostream.binsha is not None # verify the stream try: assert entity.is_valid_stream(info.binsha, use_crc=True) except UnsupportedOperation: pass # END ignore version issues assert entity.is_valid_stream(info.binsha, use_crc=False) # END for each info, stream tuple assert count == size
def test_pack_entity(self, rw_dir): pack_objs = list() for packinfo, indexinfo in ( (self.packfile_v2_1, self.packindexfile_v1), (self.packfile_v2_2, self.packindexfile_v2), (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): packfile, version, size = packinfo indexfile, version, size = indexinfo entity = PackEntity(packfile) assert entity.pack().path() == packfile assert entity.index().path() == indexfile pack_objs.extend(entity.stream_iter()) count = 0 for info, stream in izip(entity.info_iter(), entity.stream_iter()): count += 1 assert info.binsha == stream.binsha assert len(info.binsha) == 20 assert info.type_id == stream.type_id assert info.size == stream.size # we return fully resolved items, which is implied by the sha centric access assert not info.type_id in delta_types # try all calls assert len(entity.collect_streams(info.binsha)) oinfo = entity.info(info.binsha) assert isinstance(oinfo, OInfo) assert oinfo.binsha is not None ostream = entity.stream(info.binsha) assert isinstance(ostream, OStream) assert ostream.binsha is not None # verify the stream try: assert entity.is_valid_stream(info.binsha, use_crc=True) except UnsupportedOperation: pass # END ignore version issues assert entity.is_valid_stream(info.binsha, use_crc=False) # END for each info, stream tuple assert count == size # END for each entity # pack writing - write all packs into one # index path can be None pack_path = tempfile.mktemp('', "pack", rw_dir) index_path = tempfile.mktemp('', 'index', rw_dir) iteration = 0 def rewind_streams(): for obj in pack_objs: obj.stream.seek(0) #END utility for ppath, ipath, num_obj in zip((pack_path, )*2, (index_path, None), (len(pack_objs), None)): pfile = open(ppath, 'wb') iwrite = None if ipath: ifile = open(ipath, 'wb') iwrite = ifile.write #END handle ip # make sure we rewind the streams ... we work on the same objects over and over again if iteration > 0: rewind_streams() #END rewind streams iteration += 1 pack_sha, index_sha = PackEntity.write_pack(pack_objs, pfile.write, iwrite, object_count=num_obj) pfile.close() assert os.path.getsize(ppath) > 100 # verify pack pf = PackFile(ppath) assert pf.size() == len(pack_objs) assert pf.version() == PackFile.pack_version_default assert pf.checksum() == pack_sha # verify index if ipath is not None: ifile.close() assert os.path.getsize(ipath) > 100 idx = PackIndexFile(ipath) assert idx.version() == PackIndexFile.index_version_default assert idx.packfile_checksum() == pack_sha assert idx.indexfile_checksum() == index_sha assert idx.size() == len(pack_objs) #END verify files exist #END for each packpath, indexpath pair # verify the packs throughly rewind_streams() entity = PackEntity.create(pack_objs, rw_dir) count = 0 for info in entity.info_iter(): count += 1 for use_crc in range(2): assert entity.is_valid_stream(info.binsha, use_crc) # END for each crc mode #END for each info assert count == len(pack_objs)
def test_pack_entity(self, rw_dir): pack_objs = list() for packinfo, indexinfo in ((self.packfile_v2_1, self.packindexfile_v1), (self.packfile_v2_2, self.packindexfile_v2), (self.packfile_v2_3_ascii, self.packindexfile_v2_3_ascii)): packfile, version, size = packinfo indexfile, version, size = indexinfo entity = PackEntity(packfile) assert entity.pack().path() == packfile assert entity.index().path() == indexfile pack_objs.extend(entity.stream_iter()) count = 0 for info, stream in zip(entity.info_iter(), entity.stream_iter()): count += 1 assert info.binsha == stream.binsha assert len(info.binsha) == 20 assert info.type_id == stream.type_id assert info.size == stream.size # we return fully resolved items, which is implied by the sha centric access assert not info.type_id in delta_types # try all calls assert len(entity.collect_streams(info.binsha)) oinfo = entity.info(info.binsha) assert isinstance(oinfo, OInfo) assert oinfo.binsha is not None ostream = entity.stream(info.binsha) assert isinstance(ostream, OStream) assert ostream.binsha is not None # verify the stream try: assert entity.is_valid_stream(info.binsha, use_crc=True) except UnsupportedOperation: pass # END ignore version issues assert entity.is_valid_stream(info.binsha, use_crc=False) # END for each info, stream tuple assert count == size # END for each entity # pack writing - write all packs into one # index path can be None pack_path1 = tempfile.mktemp('', "pack1", rw_dir) pack_path2 = tempfile.mktemp('', "pack2", rw_dir) index_path = tempfile.mktemp('', 'index', rw_dir) iteration = 0 def rewind_streams(): for obj in pack_objs: obj.stream.seek(0) # END utility for ppath, ipath, num_obj in zip((pack_path1, pack_path2), (index_path, None), (len(pack_objs), None)): iwrite = None if ipath: ifile = open(ipath, 'wb') iwrite = ifile.write # END handle ip # make sure we rewind the streams ... we work on the same objects over and over again if iteration > 0: rewind_streams() # END rewind streams iteration += 1 with open(ppath, 'wb') as pfile: pack_sha, index_sha = PackEntity.write_pack( pack_objs, pfile.write, iwrite, object_count=num_obj) assert os.path.getsize(ppath) > 100 # verify pack pf = PackFile(ppath) assert pf.size() == len(pack_objs) assert pf.version() == PackFile.pack_version_default assert pf.checksum() == pack_sha pf.close() # verify index if ipath is not None: ifile.close() assert os.path.getsize(ipath) > 100 idx = PackIndexFile(ipath) assert idx.version() == PackIndexFile.index_version_default assert idx.packfile_checksum() == pack_sha assert idx.indexfile_checksum() == index_sha assert idx.size() == len(pack_objs) idx.close() # END verify files exist # END for each packpath, indexpath pair # verify the packs thoroughly rewind_streams() entity = PackEntity.create(pack_objs, rw_dir) count = 0 for info in entity.info_iter(): count += 1 for use_crc in range(2): assert entity.is_valid_stream(info.binsha, use_crc) # END for each crc mode # END for each info assert count == len(pack_objs) entity.close()