Пример #1
0
	def test_pack_writing(self):
		# see how fast we can write a pack from object streams.
		# This will not be fast, as we take time for decompressing the streams as well
		ostream = CountedNullStream()
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		ni = 5000
		count = 0
		total_size = 0
		st = time()
		objs = list()
		for sha in pdb.sha_iter():
			count += 1
			objs.append(pdb.stream(sha))
			if count == ni:
				break
		#END gather objects for pack-writing
		elapsed = time() - st
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (ni, elapsed, ni / elapsed)
		
		st = time()
		PackEntity.write_pack(objs, ostream.write)
		elapsed = time() - st
		total_kb = ostream.bytes_written() / 1000
		print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (total_kb, elapsed, total_kb/elapsed)
Пример #2
0
 def test_correctness(self):
     pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
     # disabled for now as it used to work perfectly, checking big repositories takes a long time
     print("Endurance run: verify streaming of objects (crc and sha)",
           file=sys.stderr)
     for crc in range(2):
         count = 0
         st = time()
         for entity in pdb.entities():
             pack_verify = entity.is_valid_stream
             sha_by_index = entity.index().sha
             for index in xrange(entity.index().size()):
                 try:
                     assert pack_verify(sha_by_index(index), use_crc=crc)
                     count += 1
                 except UnsupportedOperation:
                     pass
                 # END ignore old indices
             # END for each index
         # END for each entity
         elapsed = time() - st
         print(
             "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" %
             (count, crc, elapsed, count / elapsed),
             file=sys.stderr)
	def test_pack_random_access(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# sha lookup
		st = time()
		sha_list = list(pdb.sha_iter())
		elapsed = time() - st
		ns = len(sha_list)
		print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed)
		
		# sha lookup: best-case and worst case access
		pdb_pack_info = pdb._pack_info
		access_times = list()
		for rand in range(2):
			if rand:
				random.shuffle(sha_list)
			# END shuffle shas
			st = time()
			for sha in sha_list:
				pdb_pack_info(sha)
			# END for each sha to look up
			elapsed = time() - st
			access_times.append(elapsed)
			
			# discard cache
			del(pdb._entities)
			pdb.entities()
			print >> sys.stderr, "PDB: looked up %i sha in %i packs (random=%i) in %f s ( %f shas/s )" % (ns, len(pdb.entities()), rand, elapsed, ns / elapsed)
		# END for each random mode
		elapsed_order, elapsed_rand = access_times
		
		# well, its never really sequencial regarding the memory patterns, but it 
		# shows how well the prioriy cache performs
		print >> sys.stderr, "PDB: sequential access is %f %% faster than random-access" % (100 - ((elapsed_order / elapsed_rand) * 100))
		
		
		# query info and streams only
		max_items = 10000			# can wait longer when testing memory
		for pdb_fun in (pdb.info, pdb.stream):
			st = time()
			for sha in sha_list[:max_items]:
				pdb_fun(sha)
			elapsed = time() - st
			print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed)
		# END for each function
		
		# retrieve stream and read all
		max_items = 5000
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in sha_list[:max_items]:
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (max_items, total_kib, total_kib/elapsed , elapsed, max_items / elapsed)
Пример #4
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print("PDB: looked up %i shas by index in %f s ( %f shas/s )" % (ns, elapsed, ns / elapsed), file=sys.stderr)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        # END shuffle shas
        st = time()
        for sha in sha_list:
            pdb_pack_info(sha)
        # END for each sha to look up
        elapsed = time() - st

        # discard cache
        del(pdb._entities)
        pdb.entities()
        print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" %
              (ns, len(pdb.entities()), elapsed, ns / elapsed), file=sys.stderr)
        # END for each random mode

        # query info and streams only
        max_items = 10000           # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" %
                  (max_items, pdb_fun.__name__.upper(), elapsed, max_items / elapsed), file=sys.stderr)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            read_len = len(stream.read())
            assert read_len == stream.size
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print("PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" %
              (max_items, total_kib, total_kib / elapsed, elapsed, max_items / elapsed), file=sys.stderr)
Пример #5
0
    def test_pack_writing(self):
        # see how fast we can write a pack from object streams.
        # This will not be fast, as we take time for decompressing the streams as well
        ostream = CountedNullStream()
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        ni = 5000
        count = 0
        total_size = 0
        st = time()
        for sha in pdb.sha_iter():
            count += 1
            pdb.stream(sha)
            if count == ni:
                break
        #END gather objects for pack-writing
        elapsed = time() - st
        print >> sys.stderr, "PDB Streaming: Got %i streams by sha in in %f s ( %f streams/s )" % (
            ni, elapsed, ni / elapsed)

        st = time()
        PackEntity.write_pack((pdb.stream(sha) for sha in pdb.sha_iter()),
                              ostream.write,
                              object_count=ni)
        elapsed = time() - st
        total_kb = ostream.bytes_written() / 1000
        print >> sys.stderr, "PDB Streaming: Wrote pack of size %i kb in %f s (%f kb/s)" % (
            total_kb, elapsed, total_kb / elapsed)
	def test_stream_reading(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# streaming only, meant for --with-profile runs
		ni = 5000
		count = 0
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in pdb.sha_iter():
			if count == ni:
				break
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
			count += 1
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
Пример #7
0
	def test_stream_reading(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		
		# streaming only, meant for --with-profile runs
		ni = 5000
		count = 0
		pdb_stream = pdb.stream
		total_size = 0
		st = time()
		for sha in pdb.sha_iter():
			if count == ni:
				break
			stream = pdb_stream(sha)
			stream.read()
			total_size += stream.size
			count += 1
		elapsed = time() - st
		total_kib = total_size / 1000
		print >> sys.stderr, "PDB Streaming: Got %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (ni, total_kib, total_kib/elapsed , elapsed, ni / elapsed)
Пример #8
0
	def test_correctness(self):
		pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))
		# disabled for now as it used to work perfectly, checking big repositories takes a long time
		print >> sys.stderr, "Endurance run: verify streaming of objects (crc and sha)"
		for crc in range(2):
			count = 0
			st = time()
			for entity in pdb.entities():
				pack_verify = entity.is_valid_stream
				sha_by_index = entity.index().sha
				for index in xrange(entity.index().size()):
					try:
						assert pack_verify(sha_by_index(index), use_crc=crc)
						count += 1
					except UnsupportedOperation:
						pass
					# END ignore old indices
				# END for each index
			# END for each entity
			elapsed = time() - st
			print >> sys.stderr, "PDB: verified %i objects (crc=%i) in %f s ( %f objects/s )" % (count, crc, elapsed, count / elapsed)
Пример #9
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print("PDB: looked up %i shas by index in %f s ( %f shas/s )" %
              (ns, elapsed, ns / elapsed),
              file=sys.stderr)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        # END shuffle shas
        st = time()
        for sha in sha_list:
            pdb_pack_info(sha)
        # END for each sha to look up
        elapsed = time() - st

        # discard cache
        del (pdb._entities)
        pdb.entities()
        print("PDB: looked up %i sha in %i packs in %f s ( %f shas/s )" %
              (ns, len(pdb.entities()), elapsed, ns / elapsed),
              file=sys.stderr)
        # END for each random mode

        # query info and streams only
        max_items = 10000  # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print("PDB: Obtained %i object %s by sha in %f s ( %f items/s )" %
                  (max_items, pdb_fun.__name__.upper(), elapsed,
                   max_items / elapsed),
                  file=sys.stderr)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            read_len = len(stream.read())
            assert read_len == stream.size
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print(
            "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )"
            % (max_items, total_kib, total_kib / elapsed, elapsed,
               max_items / elapsed),
            file=sys.stderr)
Пример #10
0
    def test_pack_random_access(self):
        pdb = PackedDB(os.path.join(self.gitrepopath, "objects/pack"))

        # sha lookup
        st = time()
        sha_list = list(pdb.sha_iter())
        elapsed = time() - st
        ns = len(sha_list)
        print >> sys.stderr, "PDB: looked up %i shas by index in %f s ( %f shas/s )" % (
            ns, elapsed, ns / elapsed)

        # sha lookup: best-case and worst case access
        pdb_pack_info = pdb._pack_info
        access_times = list()
        for rand in range(2):
            if rand:
                random.shuffle(sha_list)
            # END shuffle shas
            st = time()
            for sha in sha_list:
                pdb_pack_info(sha)
            # END for each sha to look up
            elapsed = time() - st
            access_times.append(elapsed)

            # discard cache
            del (pdb._entities)
            pdb.entities()
            print >> sys.stderr, "PDB: looked up %i sha in %i packs (random=%i) in %f s ( %f shas/s )" % (
                ns, len(pdb.entities()), rand, elapsed, ns / elapsed)
        # END for each random mode
        elapsed_order, elapsed_rand = access_times

        # well, its never really sequencial regarding the memory patterns, but it
        # shows how well the prioriy cache performs
        print >> sys.stderr, "PDB: sequential access is %f %% faster than random-access" % (
            100 - ((elapsed_order / elapsed_rand) * 100))

        # query info and streams only
        max_items = 10000  # can wait longer when testing memory
        for pdb_fun in (pdb.info, pdb.stream):
            st = time()
            for sha in sha_list[:max_items]:
                pdb_fun(sha)
            elapsed = time() - st
            print >> sys.stderr, "PDB: Obtained %i object %s by sha in %f s ( %f items/s )" % (
                max_items, pdb_fun.__name__.upper(), elapsed,
                max_items / elapsed)
        # END for each function

        # retrieve stream and read all
        max_items = 5000
        pdb_stream = pdb.stream
        total_size = 0
        st = time()
        for sha in sha_list[:max_items]:
            stream = pdb_stream(sha)
            stream.read()
            total_size += stream.size
        elapsed = time() - st
        total_kib = total_size / 1000
        print >> sys.stderr, "PDB: Obtained %i streams by sha and read all bytes totallying %i KiB ( %f KiB / s ) in %f s ( %f streams/s )" % (
            max_items, total_kib, total_kib / elapsed, elapsed,
            max_items / elapsed)