def run_reading_to_bytes_benchmark(): bm = benchmark.Benchmark() ################################################################## # Reading a file to bytes object ################################################################## def read_to_bytes_native(): for i in range(2000): f = open('test_utils/test_images/%d.jpg' % (i % 200), 'rb') b = f.read() def read_to_bytes_db(): for i in range(2000): b = db.open_as_bytes('test_utils/test_images/%d.jpg' % (i % 200)) bm.add('reading files to `bytes` from filesystem', baseline=read_to_bytes_native, dareblopy=read_to_bytes_db) ################################################################## # Reading files to bytes object from zip archive ################################################################## def read_jpg_bytes_from_zip_native(): archive = zipfile.ZipFile("test_utils/test_image_archive.zip", 'r') for i in range(2000): s = archive.open('%d.jpg' % (i % 200)) b = s.read() # picture_stream = io.BytesIO(b) # picture = PIL.Image.open(picture_stream) # picture.show() def read_jpg_bytes_from_zip_db(): archive = db.open_zip_archive("test_utils/test_image_archive.zip") for i in range(2000): b = archive.open_as_bytes('%d.jpg' % (i % 200)) # picture_stream = io.BytesIO(b) # picture = PIL.Image.open(picture_stream) # picture.show() bm.add('reading files to `bytes` from a zip archive', baseline=read_jpg_bytes_from_zip_native, dareblopy=read_jpg_bytes_from_zip_db, preheat=lambda: (db.open_zip_archive("test_utils/test_image_archive.zip"), zipfile.ZipFile("test_utils/test_image_archive.zip", 'r'))) # Run everything and save plot bm.run( title= 'Running time of reading files to `bytes`\nfor DareBlopy and equivalent python code', label_baseline='Python Standard Library + zipfile', output_file='test_utils/benchmark_reading_files.png', loc='ul', figsize=(8, 6), caption= "Reading 200 jpeg files, each file ~30kb. Files are read to 'bytes object (no decoding). " "Reading is performed from filesystem and from a zip archive with no compression (storage type). " "All files are read 10 times and then measured time is averaged over 10 trials." )
def test_reading_to_bytes_from_zip_does_not_exist(self): archive = db.open_zip_archive("test_utils/test_image_archive.zip") with self.assertRaises(RuntimeError) as context: archive.open_as_bytes('does_not_exist') self.assertEqual('Can\'t open file: does_not_exist', context.exception.args[0])
def test_zip_mounting(self): fs = db.FileSystem() zip = fs.open("test_utils/test_archive.zip", lockable=True) self.assertTrue(zip) fs.mount_archive(db.open_zip_archive(zip)) s = fs.open('test.txt') self.assertEqual(s.read().decode('utf-8'), "asdasdasd")
def test_reading_to_numpy_from_zip(self): archive = zipfile.ZipFile("test_utils/test_image_archive.zip", 'r') s = archive.open('0.jpg') image = PIL.Image.open(s) ndarray1 = np.array(image) archive = db.open_zip_archive("test_utils/test_image_archive.zip") ndarray2 = archive.read_jpg_as_numpy('0.jpg') self.assertTrue(np.all(ndarray1 == ndarray2))
def test_reading_to_bytes_from_zip(self): archive = zipfile.ZipFile("test_utils/test_image_archive.zip", 'r') s = archive.open('0.jpg') b1 = s.read() archive = db.open_zip_archive("test_utils/test_image_archive.zip") b2 = archive.open_as_bytes('0.jpg') b3 = archive.open('0.jpg').read() self.assertEqual(b1, b2) self.assertEqual(b1, b3)
def __init__(self, arch_path: str, index_path: str, transform=transforms.ToTensor()): super(ZippedDataset, self).__init__() if transform is None: raise Exception( "Transforms must be set at least to ToTensor() at the end") # load index self.img_class_mapping = getClassToIndexMapping(index_path) self.archive = db.open_zip_archive(arch_path) self.transform = transform self.class_to_label = self.__getClassToLabelMapping__()
def run_reading_jpeg_to_numpy_benchmark(): bm = benchmark.Benchmark() ################################################################## # Reading a jpeg image to numpy array ################################################################## def read_jpg_to_numpy_pil(): for i in range(2000): image = PIL.Image.open('test_utils/test_images/%d.jpg' % (i % 200)) ndarray = np.array(image) def read_jpg_to_numpy_db(): for i in range(2000): ndarray = db.read_jpg_as_numpy('test_utils/test_images/%d.jpg' % (i % 200)) def read_jpg_to_numpy_db_turbo(): for i in range(2000): ndarray = db.read_jpg_as_numpy( 'test_utils/test_images/%d.jpg' % (i % 200), True) bm.add('reading jpeg image to numpy', baseline=read_jpg_to_numpy_pil, dareblopy=read_jpg_to_numpy_db, dareblopy_turbo=read_jpg_to_numpy_db_turbo) ################################################################## # Reading jpeg images to numpy array from zip archive ################################################################## def read_jpg_to_numpy_from_zip_native(): archive = zipfile.ZipFile("test_utils/test_image_archive.zip", 'r') for i in range(2000): s = archive.open('%d.jpg' % (i % 200)) image = PIL.Image.open(s) ndarray = np.array(image) def read_jpg_to_numpy_from_zip_db(): archive = db.open_zip_archive("test_utils/test_image_archive.zip") for i in range(2000): ndarray = archive.read_jpg_as_numpy('%d.jpg' % (i % 200)) def read_jpg_to_numpy_from_zip_db_turbo(): archive = db.open_zip_archive("test_utils/test_image_archive.zip") for i in range(2000): ndarray = archive.read_jpg_as_numpy('%d.jpg' % (i % 200), True) bm.add('reading jpeg to numpy from zip', baseline=read_jpg_to_numpy_from_zip_native, dareblopy=read_jpg_to_numpy_from_zip_db, dareblopy_turbo=read_jpg_to_numpy_from_zip_db_turbo, preheat=lambda: (db.open_zip_archive("test_utils/test_image_archive.zip"), zipfile.ZipFile("test_utils/test_image_archive.zip", 'r'))) # Run everything and save plot bm.run( title= 'Running time of reading jpeg files to numpy `ndarray`\nfor DareBlopy and equivalent python code', label_baseline='Python Standard Library + zipfile\n + PIL + numpy', output_file='test_utils/benchmark_reading_jpeg.png', loc='lr', figsize=(8, 6), caption= "Reading 200 jpeg files, each file is ~30kb and has 256x256 resolution. " "Files are read to numpy `ndarray` (jpeg's are decoded). " "Reading is performed from filesystem and from a zip archive with no compression (storage type). " "All files are read 10 times and then measured time is averaged over 10 trials." )
def read_jpg_bytes_from_zip_db(): archive = db.open_zip_archive("test_utils/test_image_archive.zip") for i in range(2000): b = archive.open_as_bytes('%d.jpg' % (i % 200))
def read_jpg_to_numpy_from_zip_db_turbo(): archive = db.open_zip_archive( "/data/for_benchmark/test_utils/test_image_archive.zip") for i in range(200): ndarray = archive.read_jpg_as_numpy('%d.jpg' % (i % 200), True)
def read_jpg_to_numpy_from_zip_db(): archive = db.open_zip_archive("test_utils/test_image_archive.zip") for i in range(2000): ndarray = archive.read_jpg_as_numpy('%d.jpg' % (i % 200))