def batch_bdc2rws_contour(dcm_dir, bdc_dir, rws_dir, **kwargs): """ Convert BDC format annotation to rws format. Args: dcm_dir (str): dicom files directory. bdc_dir (str): bdc annotation files directory. rws_dir (str): rws annotation files directory. N.B. dicom title should be exactly the same with annotation file title. e.g. 123.dcm, 123.txt """ mv.mkdirs(rws_dir) dcm_filenames = mv.listdir(dcm_dir) bdc_filenames = mv.listdir(bdc_dir) dcm_titles = [mv.splitext(fn)[0] for fn in dcm_filenames] bdc_titles = [mv.splitext(fn)[0] for fn in bdc_filenames] file_titles = list(set(dcm_titles).intersection(set(bdc_titles))) if (len(dcm_filenames) != len(bdc_filenames) or len(file_titles) != len(dcm_filenames)): logging.warning('dicoms & annotations do not exactly match') for file_title in tqdm(file_titles): dcm_path = mv.joinpath(dcm_dir, file_title + '.dcm') bdc_path = mv.joinpath(bdc_dir, file_title + '.txt') rws_path = mv.joinpath(rws_dir, file_title + '.json') bdc2rws_contour(dcm_path, bdc_path, rws_path, **kwargs)
def gen_cls_dsmd_file_from_datafolder( root_dir, c2l_path, dsmd_path, classnames=None): """ Generate classification dataset metadata file from DataFolder for specified classes. DataFolder is a directory structure for image classification problems. Each sub-directory contains images from a special class. DataFolder directory structure looks like ----------------------- ├── class1 │ ├── 1.png │ └── 2.png │ ... ├── class2 │ ├── 3.png │ └── 4.png └── ... ----------------------- Args: root_dir (str): root data directory containing all the images. c2l_path (str): file path to save class2label info. dsmd_path (str): file path to save dataset metadata file. classnames (list[str]): names of specified classes. If not given, all classes are considered. Note: This function is expected to be used together with 'gen_cls_ds_from_datafolder()'. Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. """ assert mv.isdir(root_dir) if classnames is None: classnames = mv.listdir(root_dir) class2label = {} dsmd = {} for label, classname in enumerate(classnames): class2label[classname] = label class_dir = mv.joinpath(root_dir, classname) assert mv.isdir(class_dir) filenames = mv.listdir(class_dir) for filename in filenames: if filename in dsmd: raise FileExistsError( 'filename {} already exists'.format(filename)) dsmd[filename] = label mv.save_dsmd(c2l_path, class2label) mv.save_dsmd(dsmd_path, dsmd)
def bdc2dsmd_det_2d(annot_dir, image_dir=None, class2label=None, ignore_label_name=True, replace_ext=lambda x: x): # N.B. annotation file name and image file name should be the same num_classes = len(class2label) if class2label is not None else 1 filenames = mv.listdir(annot_dir) empty_bboxes = np.zeros((0, 4), dtype=np.float32) dsmd = { replace_ext(filename): [empty_bboxes] * num_classes for filename in filenames } for filename in filenames: annot_filepath = mv.joinpath(annot_dir, filename) bboxes = load_bdc_dr_bbox( annot_filepath, lambda x: 0 if ignore_label_name else class2label) for label, bbox in bboxes: bbox = np.array(bbox, dtype=np.float32).reshape(-1, 4) if dsmd[replace_ext(filename)][label].shape[0] == 0: dsmd[replace_ext(filename)][label] = bbox else: dsmd[replace_ext(filename)][label] = np.append( dsmd[replace_ext(filename)][label], bbox, axis=0) return mv.make_dsmd(dsmd)
def test_copyfiles(): dst_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(dst_dir) src_paths = ['brain_001.dcm', 'brain_002.dcm'] mv.copyfiles(src_paths, dst_dir, DCM_DIR) assert len(mv.listdir(dst_dir)) == 2 with not_raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=False) with pytest.raises(FileExistsError): mv.copyfiles(src_paths, dst_dir, DCM_DIR, non_overwrite=True) mv.empty_dir(dst_dir) assert mv.isdir(dst_dir) assert len(mv.listdir(dst_dir)) == 0 mv.rmtree(dst_dir)
def experiment_exists(experiment): if not mv.isdir('runs'): return False all_experiments = mv.listdir('runs') all_experiments = [e.split('_', 3)[-1] for e in all_experiments] if experiment in all_experiments: return True else: return False
def gen_cls_ds_from_datafolder( in_dir, out_dir, auto_mkdirs=True, classnames=None): """ Generate classification dataset from DataFolder. This function will make a copy of each image in the DataFolder to the specified directory. Original DataFolder is left unchanged. Args: in_dir (str): DataFolder root directory. out_dir (str): directory to save all the images in DataFolder. auto_mkdirs (bool): If `out_dir` does not exist, whether to create it automatically. classnames (list[str]): names of specified classes to be collected. If not given, all classes are considered. Note: This function is expected to be used together with gen_cls_dsmd_file_from_datafolder(). Filename of each image in DataFolder should be unique. Otherwise, A FileExistsError will be thrown. DataFolder is described in 'gen_cls_dsmd_file_from_datafolder()'. """ assert mv.isdir(in_dir) # clean output directory if auto_mkdirs: mv.mkdirs(mv.parentdir(out_dir)) mv.empty_dir(out_dir) if classnames is None: classnames = mv.listdir(in_dir) for classname in classnames: class_dir = mv.joinpath(in_dir, classname) assert mv.isdir(class_dir) filenames = natsorted(mv.listdir(class_dir)) mv.copyfiles(filenames, out_dir, class_dir, non_overwrite=True)
def test_glob_file(): filepaths = mv.glob(DATA_DIR, '*.png', mode=mv.GlobMode.FILE, recursive=True) assert len(filepaths) == 16 filepaths = mv.glob(DATA_DIR, '*.png', mode=mv.GlobMode.FILE, recursive=False) assert len(filepaths) == 0 filepaths = mv.glob(PNG_DIR, mode=mv.GlobMode.FILE, recursive=False) assert len(filepaths) == len(mv.listdir(PNG_DIR))
def batch_mask2rws(mask_dir, rws_dir, **kwargs): """ Convert mask format annotation to rws format. Args: mask_dir (str): mask files directory. rws_dir (str): rws annotation files directory. N.B. dicom file title should be exactly the same with mask file title. e.g. 123.dcm, 123.png """ mv.mkdirs(rws_dir) mask_filenames = mv.listdir(mask_dir) file_titles = [mv.splitext(fn)[0] for fn in mask_filenames] for file_title in tqdm(file_titles): mask_path = mv.joinpath(mask_dir, file_title + '.png') rws_path = mv.joinpath(rws_dir, file_title + '.json') mask2rws(mask_path, rws_path, **kwargs)
def test_gen_cls_ds(): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(tmp_dir) tmp_c2l_path = mv.joinpath(tmp_dir, 'tmp_c2l.txt') tmp_dsmd_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') mv.gen_cls_dsmd_file_from_datafolder(DF_DIR, tmp_c2l_path, tmp_dsmd_path) dsmd = mv.load_dsmd(DSMD_DF) tmp_dsmd = mv.load_dsmd(tmp_dsmd_path) c2l = mv.load_dsmd(CLS2LBL) tmp_c2l = mv.load_dsmd(tmp_c2l_path) assert_equal_dsmds(dsmd, tmp_dsmd) assert_equal_dsmds(c2l, tmp_c2l) mv.empty_dir(tmp_dir) mv.gen_cls_ds_from_datafolder(DF_DIR, tmp_dir) assert len(mv.listdir(tmp_dir)) == 8 mv.rmtree(tmp_dir)
def isdicomdir(path): """ Judge whether a given directory is a valid dicom directory. If given directory only contains dicoms (at least one dicom file), it is a dicom directory. Otherwise, it is not a dicom directory. Args: path(str): given directory path. Returns: (bool): True if given directory path is a dicom directory, otherwise False. """ if not mv.isdir(path): return False for file_name in mv.listdir(path): file_path = mv.joinpath(path, file_name) if not isdicom(file_path): return False else: return True
def __init__(self, cfg, mode, build_transform, image_loader): self.is_train = (mode == mv.ModeKey.TRAIN) self.mode2dsmd = { mv.ModeKey.TRAIN: cfg.DATA.TRAIN_DSMD, mv.ModeKey.VAL: cfg.DATA.VAL_DSMD, mv.ModeKey.TEST: cfg.DATA.TEST_DSMD, } dsmd_path = self.mode2dsmd[mode] assert mv.isfile(dsmd_path) or mv.isdir(dsmd_path) if mv.isfile(dsmd_path): # for a dsmd file self.dsmd = mv.load_dsmd(dsmd_path) self.filenames = list(self.dsmd.keys()) self.filepaths = [ mv.joinpath(cfg.DATA.IMAGE_DIR, filename) for filename in self.filenames ] else: # for a directory containing test images self.dsmd = None self.filepaths = mv.listdir(dsmd_path) self.transform = build_transform(cfg, self.is_train) self.image_loader = image_loader