def get_samples(root: str, folder_to_idx: Dict[str, int], is_valid_file: Callable, file_client: FileClient): """Make dataset by walking all images under a root. Args: root (string): root directory of folders folder_to_idx (dict): the map from class name to class idx is_valid_file (Callable): A function that takes path of a file and check if the file is a valid sample file. Returns: Tuple[list, set]: - samples: a list of tuple where each element is (image, class_idx) - empty_folders: The folders don't have any valid files. """ samples = [] available_classes = set() for folder_name in sorted(list(folder_to_idx.keys())): _dir = file_client.join_path(root, folder_name) files = list( file_client.list_dir_or_file( _dir, list_dir=False, list_file=True, recursive=True, )) for file in sorted(list(files)): if is_valid_file(file): path = file_client.join_path(folder_name, file) item = (path, folder_to_idx[folder_name]) samples.append(item) available_classes.add(folder_name) empty_folders = set(folder_to_idx.keys()) - available_classes return samples, empty_folders
def test_petrel_backend(self, backend, prefix): petrel_backend = FileClient(backend=backend, prefix=prefix) # test `allow_symlink` attribute assert not petrel_backend.allow_symlink # input path is Path object img_bytes = petrel_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # input path is str img_bytes = petrel_backend.get(str(self.img_path)) img = mmcv.imfrombytes(img_bytes) assert img.shape == self.img_shape # `path_mapping` is either None or dict with pytest.raises(AssertionError): FileClient('petrel', path_mapping=1) # test `_map_path` petrel_dir = 's3://user/data' petrel_backend = FileClient( 'petrel', path_mapping={str(self.test_data_dir): petrel_dir}) assert petrel_backend.client._map_path(str(self.img_path)) == \ str(self.img_path).replace(str(self.test_data_dir), petrel_dir) petrel_path = f'{petrel_dir}/test.jpg' petrel_backend = FileClient('petrel') # test `_format_path` assert petrel_backend.client._format_path('s3://user\\data\\test.jpg')\ == petrel_path # test `get` with patch.object(petrel_backend.client._client, 'Get', return_value=b'petrel') as mock_get: assert petrel_backend.get(petrel_path) == b'petrel' mock_get.assert_called_once_with(petrel_path) # test `get_text` with patch.object(petrel_backend.client._client, 'Get', return_value=b'petrel') as mock_get: assert petrel_backend.get_text(petrel_path) == 'petrel' mock_get.assert_called_once_with(petrel_path) # test `put` with patch.object(petrel_backend.client._client, 'put') as mock_put: petrel_backend.put(b'petrel', petrel_path) mock_put.assert_called_once_with(petrel_path, b'petrel') # test `put_text` with patch.object(petrel_backend.client._client, 'put') as mock_put: petrel_backend.put_text('petrel', petrel_path) mock_put.assert_called_once_with(petrel_path, b'petrel') # test `remove` assert has_method(petrel_backend.client._client, 'delete') # raise Exception if `delete` is not implemented with delete_and_reset_method(petrel_backend.client._client, 'delete'): assert not has_method(petrel_backend.client._client, 'delete') with pytest.raises(NotImplementedError): petrel_backend.remove(petrel_path) with patch.object(petrel_backend.client._client, 'delete') as mock_delete: petrel_backend.remove(petrel_path) mock_delete.assert_called_once_with(petrel_path) # test `exists` assert has_method(petrel_backend.client._client, 'contains') assert has_method(petrel_backend.client._client, 'isdir') # raise Exception if `delete` is not implemented with delete_and_reset_method(petrel_backend.client._client, 'contains'), delete_and_reset_method( petrel_backend.client._client, 'isdir'): assert not has_method(petrel_backend.client._client, 'contains') assert not has_method(petrel_backend.client._client, 'isdir') with pytest.raises(NotImplementedError): petrel_backend.exists(petrel_path) with patch.object(petrel_backend.client._client, 'contains', return_value=True) as mock_contains: assert petrel_backend.exists(petrel_path) mock_contains.assert_called_once_with(petrel_path) # test `isdir` assert has_method(petrel_backend.client._client, 'isdir') with delete_and_reset_method(petrel_backend.client._client, 'isdir'): assert not has_method(petrel_backend.client._client, 'isdir') with pytest.raises(NotImplementedError): petrel_backend.isdir(petrel_path) with patch.object(petrel_backend.client._client, 'isdir', return_value=True) as mock_isdir: assert petrel_backend.isdir(petrel_dir) mock_isdir.assert_called_once_with(petrel_dir) # test `isfile` assert has_method(petrel_backend.client._client, 'contains') with delete_and_reset_method(petrel_backend.client._client, 'contains'): assert not has_method(petrel_backend.client._client, 'contains') with pytest.raises(NotImplementedError): petrel_backend.isfile(petrel_path) with patch.object(petrel_backend.client._client, 'contains', return_value=True) as mock_contains: assert petrel_backend.isfile(petrel_path) mock_contains.assert_called_once_with(petrel_path) # test `join_path` assert petrel_backend.join_path(petrel_dir, 'file') == \ f'{petrel_dir}/file' assert petrel_backend.join_path(f'{petrel_dir}/', 'file') == \ f'{petrel_dir}/file' assert petrel_backend.join_path(petrel_dir, 'dir', 'file') == \ f'{petrel_dir}/dir/file' # test `get_local_path` with patch.object(petrel_backend.client._client, 'Get', return_value=b'petrel') as mock_get, \ patch.object(petrel_backend.client._client, 'contains', return_value=True) as mock_contains: with petrel_backend.get_local_path(petrel_path) as path: assert Path(path).open('rb').read() == b'petrel' # exist the with block and path will be released assert not osp.isfile(path) mock_get.assert_called_once_with(petrel_path) mock_contains.assert_called_once_with(petrel_path) # test `list_dir_or_file` assert has_method(petrel_backend.client._client, 'list') with delete_and_reset_method(petrel_backend.client._client, 'list'): assert not has_method(petrel_backend.client._client, 'list') with pytest.raises(NotImplementedError): list(petrel_backend.list_dir_or_file(petrel_dir)) with build_temporary_directory() as tmp_dir: # 1. list directories and files assert set(petrel_backend.list_dir_or_file(tmp_dir)) == set( ['dir1', 'dir2', 'text1.txt', 'text2.txt']) # 2. list directories and files recursively assert set(petrel_backend.list_dir_or_file( tmp_dir, recursive=True)) == set([ 'dir1', '/'.join(('dir1', 'text3.txt')), 'dir2', '/'.join( ('dir2', 'dir3')), '/'.join( ('dir2', 'dir3', 'text4.txt')), '/'.join( ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' ]) # 3. only list directories assert set( petrel_backend.list_dir_or_file( tmp_dir, list_file=False)) == set(['dir1', 'dir2']) with pytest.raises( TypeError, match=('`list_dir` should be False when `suffix` is not ' 'None')): # Exception is raised among the `list_dir_or_file` of client, # so we need to invode the client to trigger the exception petrel_backend.client.list_dir_or_file(tmp_dir, list_file=False, suffix='.txt') # 4. only list directories recursively assert set( petrel_backend.list_dir_or_file(tmp_dir, list_file=False, recursive=True)) == set([ 'dir1', 'dir2', '/'.join( ('dir2', 'dir3')) ]) # 5. only list files assert set(petrel_backend.list_dir_or_file( tmp_dir, list_dir=False)) == set(['text1.txt', 'text2.txt']) # 6. only list files recursively assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, recursive=True)) == set([ '/'.join(('dir1', 'text3.txt')), '/'.join( ('dir2', 'dir3', 'text4.txt')), '/'.join( ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( petrel_backend.list_dir_or_file(tmp_dir, list_dir=False, suffix='.txt')) == set( ['text1.txt', 'text2.txt']) assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'))) == set(['text1.txt', 'text2.txt']) with pytest.raises( TypeError, match='`suffix` must be a string or tuple of strings'): petrel_backend.client.list_dir_or_file(tmp_dir, list_dir=False, suffix=['.txt', '.jpg']) # 8. only list files ending with suffix recursively assert set( petrel_backend.list_dir_or_file(tmp_dir, list_dir=False, suffix='.txt', recursive=True)) == set([ '/'.join( ('dir1', 'text3.txt')), '/'.join(('dir2', 'dir3', 'text4.txt')), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( petrel_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'), recursive=True)) == set([ '/'.join(('dir1', 'text3.txt')), '/'.join( ('dir2', 'dir3', 'text4.txt')), '/'.join( ('dir2', 'img.jpg')), 'text1.txt', 'text2.txt' ])
def test_disk_backend(self): disk_backend = FileClient('disk') # test `name` attribute assert disk_backend.name == 'HardDiskBackend' # test `allow_symlink` attribute assert disk_backend.allow_symlink # test `get` # input path is Path object img_bytes = disk_backend.get(self.img_path) img = mmcv.imfrombytes(img_bytes) assert self.img_path.open('rb').read() == img_bytes assert img.shape == self.img_shape # input path is str img_bytes = disk_backend.get(str(self.img_path)) img = mmcv.imfrombytes(img_bytes) assert self.img_path.open('rb').read() == img_bytes assert img.shape == self.img_shape # test `get_text` # input path is Path object value_buf = disk_backend.get_text(self.text_path) assert self.text_path.open('r').read() == value_buf # input path is str value_buf = disk_backend.get_text(str(self.text_path)) assert self.text_path.open('r').read() == value_buf with tempfile.TemporaryDirectory() as tmp_dir: # test `put` filepath1 = Path(tmp_dir) / 'test.jpg' disk_backend.put(b'disk', filepath1) assert filepath1.open('rb').read() == b'disk' # test the `mkdir_or_exist` behavior in `put` _filepath1 = Path(tmp_dir) / 'not_existed_dir1' / 'test.jpg' disk_backend.put(b'disk', _filepath1) assert _filepath1.open('rb').read() == b'disk' # test `put_text` filepath2 = Path(tmp_dir) / 'test.txt' disk_backend.put_text('disk', filepath2) assert filepath2.open('r').read() == 'disk' # test the `mkdir_or_exist` behavior in `put_text` _filepath2 = Path(tmp_dir) / 'not_existed_dir2' / 'test.txt' disk_backend.put_text('disk', _filepath2) assert _filepath2.open('r').read() == 'disk' # test `isfile` assert disk_backend.isfile(filepath2) assert not disk_backend.isfile(Path(tmp_dir) / 'not/existed/path') # test `remove` disk_backend.remove(filepath2) # test `exists` assert not disk_backend.exists(filepath2) # test `get_local_path` # if the backend is disk, `get_local_path` just return the input with disk_backend.get_local_path(filepath1) as path: assert str(filepath1) == path assert osp.isfile(filepath1) # test `join_path` disk_dir = '/path/of/your/directory' assert disk_backend.join_path(disk_dir, 'file') == \ osp.join(disk_dir, 'file') assert disk_backend.join_path(disk_dir, 'dir', 'file') == \ osp.join(disk_dir, 'dir', 'file') # test `list_dir_or_file` with build_temporary_directory() as tmp_dir: # 1. list directories and files assert set(disk_backend.list_dir_or_file(tmp_dir)) == set( ['dir1', 'dir2', 'text1.txt', 'text2.txt']) # 2. list directories and files recursively assert set(disk_backend.list_dir_or_file( tmp_dir, recursive=True)) == set([ 'dir1', osp.join('dir1', 'text3.txt'), 'dir2', osp.join('dir2', 'dir3'), osp.join('dir2', 'dir3', 'text4.txt'), osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' ]) # 3. only list directories assert set(disk_backend.list_dir_or_file( tmp_dir, list_file=False)) == set(['dir1', 'dir2']) with pytest.raises( TypeError, match='`suffix` should be None when `list_dir` is True'): # Exception is raised among the `list_dir_or_file` of client, # so we need to invode the client to trigger the exception disk_backend.client.list_dir_or_file(tmp_dir, list_file=False, suffix='.txt') # 4. only list directories recursively assert set( disk_backend.list_dir_or_file(tmp_dir, list_file=False, recursive=True)) == set([ 'dir1', 'dir2', osp.join('dir2', 'dir3') ]) # 5. only list files assert set(disk_backend.list_dir_or_file( tmp_dir, list_dir=False)) == set(['text1.txt', 'text2.txt']) # 6. only list files recursively assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, recursive=True)) == set([ osp.join('dir1', 'text3.txt'), osp.join('dir2', 'dir3', 'text4.txt'), osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( disk_backend.list_dir_or_file(tmp_dir, list_dir=False, suffix='.txt')) == set( ['text1.txt', 'text2.txt']) assert set( disk_backend.list_dir_or_file(tmp_dir, list_dir=False, suffix=('.txt', '.jpg'))) == set( ['text1.txt', 'text2.txt']) with pytest.raises( TypeError, match='`suffix` must be a string or tuple of strings'): disk_backend.client.list_dir_or_file(tmp_dir, list_dir=False, suffix=['.txt', '.jpg']) # 8. only list files ending with suffix recursively assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix='.txt', recursive=True)) == set([ osp.join('dir1', 'text3.txt'), osp.join('dir2', 'dir3', 'text4.txt'), 'text1.txt', 'text2.txt' ]) # 7. only list files ending with suffix assert set( disk_backend.list_dir_or_file( tmp_dir, list_dir=False, suffix=('.txt', '.jpg'), recursive=True)) == set([ osp.join('dir1', 'text3.txt'), osp.join('dir2', 'dir3', 'text4.txt'), osp.join('dir2', 'img.jpg'), 'text1.txt', 'text2.txt' ])