def test_has_duplicated_files(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir) # non duplicated files case src_paths = ['brain_001.dcm', 'brain_002.dcm', 'brain_003.dcm'] mv.copyfiles(src_paths, dst_dir, DCM_DIR) assert len(mv.find_duplicated_files(dst_dir)) == 0 # duplicated files case mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[0]), mv.joinpath(dst_dir, 'dup_0.dcm')) duplicated_files = mv.find_duplicated_files(dst_dir) assert len(duplicated_files) == 1 assert (mv.joinpath(dst_dir, 'brain_001.dcm') in duplicated_files[0] and mv.joinpath(dst_dir, 'dup_0.dcm') in duplicated_files[0]) mv.non_overwrite_cp(mv.joinpath(DCM_DIR, src_paths[1]), mv.joinpath(dst_dir, 'dup_1.dcm')) duplicated_files = mv.find_duplicated_files(dst_dir) assert len(duplicated_files) == 2 mv.rmtree(dst_dir)
def gen_cls_ds_from_datafolder( in_dir, out_dir, auto_mkdirs=True, classnames=None): """ Generate classification dataset from DataFolder. This function will make a copy of each image in the DataFolder to the specified directory. Original DataFolder is left unchanged. Args: in_dir (str): DataFolder root directory. out_dir (str): directory to save all the images in DataFolder. auto_mkdirs (bool): If `out_dir` does not exist, whether to create it automatically. classnames (list[str]): names of specified classes to be collected. If not given, all classes are considered. Note: This function is expected to be used together with gen_cls_dsmd_file_from_datafolder(). Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'. """ assert mv.isdir(in_dir) # clean output directory if auto_mkdirs: mv.mkdirs(mv.parentdir(out_dir)) mv.empty_dir(out_dir) if classnames is None: classnames = mv.listdir(in_dir) for classname in classnames: class_dir = mv.joinpath(in_dir, classname) assert mv.isdir(class_dir) filenames = natsorted(mv.listdir(class_dir)) mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
def test_copyfiles(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir) src_paths = ['brain_001.dcm', 'brain_002.dcm'] mv.copyfiles(src_paths, dst_dir, DCM_DIR) assert len(mv.listdir(dst_dir)) == 2 with not_raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=False) with pytest.raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=True) mv.empty_dir(dst_dir) assert mv.isdir(dst_dir) assert len(mv.listdir(dst_dir)) == 0 mv.rmtree(dst_dir)