def test_copytree(self): src_dir_name = 'gs://gcsio-test/source/' dest_dir_name = 'gs://gcsio-test/dest/' file_size = 1024 paths = ['a', 'b/c', 'b/d'] for path in paths: src_file_name = src_dir_name + path dest_file_name = dest_dir_name + path self._insert_random_file(self.client, src_file_name, file_size) self.assertTrue( gcsio.parse_gcs_path(src_file_name) in self.client.objects.files) self.assertFalse( gcsio.parse_gcs_path(dest_file_name) in self.client.objects.files) self.gcs.copytree(src_dir_name, dest_dir_name) for path in paths: src_file_name = src_dir_name + path dest_file_name = dest_dir_name + path self.assertTrue( gcsio.parse_gcs_path(src_file_name) in self.client.objects.files) self.assertTrue( gcsio.parse_gcs_path(dest_file_name) in self.client.objects.files)
def test_delete(self): file_name = 'gs://gcsio-test/delete_me' file_size = 1024 # Test deletion of non-existent file. self.gcs.delete(file_name) self._insert_random_file(self.client, file_name, file_size) self.assertTrue( gcsio.parse_gcs_path(file_name) in self.client.objects.files) self.gcs.delete(file_name) self.assertFalse( gcsio.parse_gcs_path(file_name) in self.client.objects.files)
def test_rename(self): src_file_name = 'gs://gcsio-test/source' dest_file_name = 'gs://gcsio-test/dest' file_size = 1024 self._insert_random_file(self.client, src_file_name, file_size) self.assertTrue( gcsio.parse_gcs_path(src_file_name) in self.client.objects.files) self.assertFalse( gcsio.parse_gcs_path(dest_file_name) in self.client.objects.files) self.gcs.rename(src_file_name, dest_file_name) self.assertFalse( gcsio.parse_gcs_path(src_file_name) in self.client.objects.files) self.assertTrue( gcsio.parse_gcs_path(dest_file_name) in self.client.objects.files)
def test_copy(self): src_file_name = 'gs://gcsio-test/source' dest_file_name = 'gs://gcsio-test/dest' file_size = 1024 self._insert_random_file(self.client, src_file_name, file_size) self.assertTrue( gcsio.parse_gcs_path(src_file_name) in self.client.objects.files) self.assertFalse( gcsio.parse_gcs_path(dest_file_name) in self.client.objects.files) self.gcs.copy(src_file_name, dest_file_name) self.assertTrue( gcsio.parse_gcs_path(src_file_name) in self.client.objects.files) self.assertTrue( gcsio.parse_gcs_path(dest_file_name) in self.client.objects.files) self.assertRaises(IOError, self.gcs.copy, 'gs://gcsio-test/non-existent', 'gs://gcsio-test/non-existent-destination')
def setUp(self): with open(testdata_util.get_full_file_path('Y.vcf.bgz'), mode='rb') as file_to_read: data = file_to_read.readlines() self._data = b''.join(data) self.client = gcsio_test.FakeGcsClient() self.gcs = gcsio.GcsIO(self.client) self._file_name = 'gs://bucket/test' bucket, name = gcsio.parse_gcs_path(self._file_name) self.client.objects.add_file( gcsio_test.FakeFile(bucket, name, self._data, 1))
def test_file_close(self): file_name = 'gs://gcsio-test/close_file' file_size = 5 * 1024 * 1024 + 2000 contents = os.urandom(file_size) f = self.gcs.open(file_name, 'w') self.assertEqual(f.mode, 'w') f.write(contents) f.close() f.close() # This should not crash. bucket, name = gcsio.parse_gcs_path(file_name) self.assertEqual( self.client.objects.get_file(bucket, name).contents, contents)
def test_file_write(self): file_name = 'gs://gcsio-test/write_file' file_size = 5 * 1024 * 1024 + 2000 contents = os.urandom(file_size) f = self.gcs.open(file_name, 'w') self.assertEqual(f.mode, 'w') f.write(contents[0:1000]) f.write(contents[1000:1024 * 1024]) f.write(contents[1024 * 1024:]) f.close() bucket, name = gcsio.parse_gcs_path(file_name) self.assertEqual( self.client.objects.get_file(bucket, name).contents, contents)
def test_file_read_line(self): file_name = 'gs://gcsio-test/read_line_file' lines = [] # Set a small buffer size to exercise refilling the buffer. # First line is carefully crafted so the newline falls as the last character # of the buffer to exercise this code path. read_buffer_size = 1024 lines.append('x' * 1023 + '\n') for _ in range(1, 1000): line_length = random.randint(100, 500) line = os.urandom(line_length).replace('\n', ' ') + '\n' lines.append(line) contents = ''.join(lines) file_size = len(contents) bucket, name = gcsio.parse_gcs_path(file_name) self.client.objects.add_file(FakeFile(bucket, name, contents, 1)) f = self.gcs.open(file_name, read_buffer_size=read_buffer_size) # Test read of first two lines. f.seek(0) self.assertEqual(f.readline(), lines[0]) self.assertEqual(f.tell(), len(lines[0])) self.assertEqual(f.readline(), lines[1]) # Test read at line boundary. f.seek(file_size - len(lines[-1]) - 1) self.assertEqual(f.readline(), '\n') # Test read at end of file. f.seek(file_size) self.assertEqual(f.readline(), '') # Test reads at random positions. random.seed(0) for _ in range(0, 10): start = random.randint(0, file_size - 1) line_index = 0 # Find line corresponding to start index. chars_left = start while True: next_line_length = len(lines[line_index]) if chars_left - next_line_length < 0: break chars_left -= next_line_length line_index += 1 f.seek(start) self.assertEqual(f.readline(), lines[line_index][chars_left:])
def test_file_flush(self): file_name = 'gs://gcsio-test/flush_file' file_size = 5 * 1024 * 1024 + 2000 contents = os.urandom(file_size) bucket, name = gcsio.parse_gcs_path(file_name) f = self.gcs.open(file_name, 'w') self.assertEqual(f.mode, 'w') f.write(contents[0:1000]) f.flush() f.write(contents[1000:1024 * 1024]) f.flush() f.flush() # Should be a NOOP. f.write(contents[1024 * 1024:]) f.close() # This should already call the equivalent of flush() in its body. self.assertEqual( self.client.objects.get_file(bucket, name).contents, contents)
def test_context_manager(self): # Test writing with a context manager. file_name = 'gs://gcsio-test/context_manager_file' file_size = 1024 contents = os.urandom(file_size) with self.gcs.open(file_name, 'w') as f: f.write(contents) bucket, name = gcsio.parse_gcs_path(file_name) self.assertEqual( self.client.objects.get_file(bucket, name).contents, contents) # Test reading with a context manager. with self.gcs.open(file_name) as f: self.assertEqual(f.read(), contents) # Test that exceptions are not swallowed by the context manager. with self.assertRaises(ZeroDivisionError): with self.gcs.open(file_name) as f: f.read(0 / 0)
def test_file_iterator(self): file_name = 'gs://gcsio-test/iterating_file' lines = [] line_count = 10 for _ in range(line_count): line_length = random.randint(100, 500) line = os.urandom(line_length).replace('\n', ' ') + '\n' lines.append(line) contents = ''.join(lines) bucket, name = gcsio.parse_gcs_path(file_name) self.client.objects.add_file(FakeFile(bucket, name, contents, 1)) f = self.gcs.open(file_name) read_lines = 0 for line in f: read_lines += 1 self.assertEqual(read_lines, line_count)
def test_gcs_path(self): self.assertEqual( gcsio.parse_gcs_path('gs://bucket/name'), ('bucket', 'name')) self.assertEqual( gcsio.parse_gcs_path('gs://bucket/name/sub'), ('bucket', 'name/sub'))
def _insert_random_file(self, client, path, size, generation=1): bucket, name = gcsio.parse_gcs_path(path) f = FakeFile(bucket, name, os.urandom(size), generation) client.objects.add_file(f) return f
def test_gcs_path(self): self.assertEqual(gcsio.parse_gcs_path('gs://bucket/name'), ('bucket', 'name')) self.assertEqual(gcsio.parse_gcs_path('gs://bucket/name/sub'), ('bucket', 'name/sub'))