def test_read_sequential(self): _put(b"123456") file = minio.RandomReadMinioFile(Bucket, Key, block_size=2) self.assertEqual(file.read(2), b"12") self.assertEqual(file.read(2), b"34") self.assertEqual(file.read(2), b"56") self.assertEqual(file.read(2), b"")
def test_read_sequential(self): _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=2) self.assertEqual(file.read(2), b'12') self.assertEqual(file.read(2), b'34') self.assertEqual(file.read(2), b'56') self.assertEqual(file.read(2), b'')
def test_raise_file_not_found_between_blocks(self): _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=3) _clear() file.read(3) # first block is already loaded with self.assertRaises(FileNotFoundError): file.read(3) # second block can't be loaded
def test_recover_after_read_protocolerror(self, read_mock): # Patch DownloadChunkIterator: first attempt to stream bytes raises # ProtocolError, but subsequent attempts succeed. # # We should retry after ProtocolError. read_mock.side_effect = [ProtocolError, b'123456'] _put(b'123456') with self.assertLogs(minio.__name__, 'INFO') as logs: file = minio.RandomReadMinioFile(Bucket, Key) self.assertEqual(file.read(), b'123456') self.assertRegex(logs.output[0], 'Retrying exception')
def test_skip_block(self): _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=2) file.read(2) # read block #1 file.seek(4) # skip to block #3 file.read(2) # read block #3 # At this point, block #2 shouldn't have been read. Test by deleting # the file before trying to read: the data shouldn't come through. _clear() file.seek(2) with self.assertRaises(Exception): file.read(2) # this cannot possibly work
def test_read_stops_at_block_boundary(self): # https://docs.python.org/3/library/io.html#io.RawIOBase: # Read up to size bytes from the object and return them. As a # convenience, if size is unspecified or -1, all bytes until EOF are # returned. Otherwise, only one system call is ever made. Fewer than # size bytes may be returned if the operating system call returns fewer # than size bytes. _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=2) self.assertEqual(file.read(4), b'12') self.assertEqual(file.read(4), b'34') self.assertEqual(file.read(4), b'56') self.assertEqual(file.read(4), b'')
def test_delete_remove_uploaded_data_by_prefix_in_case_model_missing(self): workflow = Workflow.create_and_init() wf_module = workflow.tabs.first().wf_modules.create(order=0, slug="step-1") uuid = str(uuidgen.uuid4()) key = wf_module.uploaded_file_prefix + uuid minio.put_bytes(minio.UserFilesBucket, key, b"A\n1") # Don't create the UploadedFile. Simulates races during upload/delete # that could write a file on S3 but not in our database. # wf_module.uploaded_files.create(name='t.csv', size=3, uuid=uuid, # bucket=minio.UserFilesBucket, key=key) wf_module.delete() # do not crash with self.assertRaises(FileNotFoundError): with minio.RandomReadMinioFile(minio.UserFilesBucket, key) as f: f.read()
def _minio_open_random(bucket, key): if key.endswith("/_metadata"): # fastparquet insists upon trying for the 'hive' storage schema before # settling on the 'simple' storage schema. At no time have we ever # saved a file in 'hive' format; therefore there are no '_metadata' # files; therefore we can skip hitting minio here. raise FileNotFoundError # TODO store column metadata in the database, so we don't need to read it # from S3. Then consider minio.FullReadMinioFile, which could be faster. # (We'll want to benchmark.) Another option is to use the 'hive' format and # FullReadMinioFile; but that choice would be hard to un-choose, so let's # not rush into it. raw = minio.RandomReadMinioFile(bucket, key) # fastparquet actually expects a _buffered_ reader -- it expects `read()` # to always return a buffer of the same length it requests. buffered = io.BufferedReader(raw) return buffered
def test_read_starting_mid_block(self): _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=3) file.seek(2) self.assertEqual(file.read(2), b'3')
def test_raise_file_not_found(self): with self.assertRaises(FileNotFoundError): minio.RandomReadMinioFile(Bucket, Key)
def test_read_entire_file(self): _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=2) file.seek(1) self.assertEqual(file.read(), b'23456')
def test_seek_to_end(self): _put(b'123456') file = minio.RandomReadMinioFile(Bucket, Key, block_size=3) file.seek(-2, io.SEEK_END) self.assertEqual(file.read(), b'56')