def test_dsmd_io_det(): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') dsmd_loaded = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') mv.save_dsmd(tmp_path, dsmd_loaded, DSMD_DET_C2L, mode='det') dsmd_reloaded = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') assert_equal_dsmds(dsmd_loaded, dsmd_reloaded) mv.rmtree(tmp_dir)
def test_dsmd_io_cls(dsmd_file): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') dsmd_loaded = mv.load_dsmd(dsmd_file) mv.save_dsmd(tmp_path, dsmd_loaded) dsmd_reloaded = mv.load_dsmd(tmp_path) assert_equal_dsmds(dsmd_loaded, dsmd_reloaded) mv.rmtree(tmp_dir)
def test_eval_det_a(): class2label = mv.load_c2l(DSMD_DET_C2L) dts = mv.load_dsmd(DSMD_DET_DT, DSMD_DET_C2L, mode='det') gts = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') det_metric = mv.eval_det(dts, gts, num_classes=len(class2label), iou_thr=0.5) ap, num_anns = det_metric[0]['ap'], det_metric[0]['num_gt_bboxes'] assert 0.263 < ap < 0.264 assert int(num_anns) == 546 m = mv.eval_det4binarycls(dts, gts) assert 0.849 < m['accuracy'] < 0.850
def test_gen_cls_ds(): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') mv.mkdirs(tmp_dir) tmp_c2l_path = mv.joinpath(tmp_dir, 'tmp_c2l.txt') tmp_dsmd_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') mv.gen_cls_dsmd_file_from_datafolder(DF_DIR, tmp_c2l_path, tmp_dsmd_path) dsmd = mv.load_dsmd(DSMD_DF) tmp_dsmd = mv.load_dsmd(tmp_dsmd_path) c2l = mv.load_dsmd(CLS2LBL) tmp_c2l = mv.load_dsmd(tmp_c2l_path) assert_equal_dsmds(dsmd, tmp_dsmd) assert_equal_dsmds(c2l, tmp_c2l) mv.empty_dir(tmp_dir) mv.gen_cls_ds_from_datafolder(DF_DIR, tmp_dir) assert len(mv.listdir(tmp_dir)) == 8 mv.rmtree(tmp_dir)
def split_dsmd_file(dsmd_filepath, datasplit=None, shuffle=True, suffix='.csv'): """ Split a dataset medadata file into 3 parts. Split a dataset metadata file into 'train.csv', 'val.csv' and 'test.csv'. And put them in the same directory with specified dsmd file. dsmd_filepath (str): file path of dataset metadata. datasplit (dict[str, float]): how to split the dataset. e.g. {'train': 0.9, 'val': 0.1, 'test': 0.0} shuffle (bool): whether to shuffle the dataset before splitting. Note: 0.0 < datasplit['train'] + datasplit['val'] + datasplit['test'] <= 1.0 If there's no image in a split. The corresponding dsmd file will not be saved. """ if datasplit is None: datasplit = {'train': 0.9, 'val': 0.1} dsmd_dir = mv.parentdir(dsmd_filepath) dsmd = mv.load_dsmd(dsmd_filepath) num_total = len(dsmd) keys = list(dsmd.keys()) if shuffle: random.shuffle(keys) sum_ratio = 0.0 splits = {} for mode, ratio in datasplit.items(): file_path = mv.joinpath(dsmd_dir, mode + suffix) splits[file_path] = int(num_total * ratio) sum_ratio += ratio assert 0.0 < sum_ratio <= 1.0 start_index = 0 for file_path, num_cur_split in splits.items(): end_index = start_index + num_cur_split start_index = np.clip(start_index, 0, num_total) end_index = np.clip(end_index, 0, num_total) keys_split = keys[start_index:end_index] keys_split = natsorted(keys_split) dsmd_split = {keys: dsmd[keys] for keys in keys_split} if len(dsmd_split) != 0: mv.save_dsmd(file_path, dsmd_split) mv.save_dsmd(file_path, dsmd_split) start_index = end_index
def test_split_dsmd_file(dsmd_file): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') mv.mkdirs(tmp_dir) mv.cp(dsmd_file, tmp_path) datasplit = {'train': 0.9, 'val': 0.1, 'test': 0.0} # shuffle mv.split_dsmd_file(tmp_path, datasplit) train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv') val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv') test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv') assert mv.isfile(train_dsmd_file_path) assert mv.isfile(val_dsmd_file_path) assert not mv.isfile(test_dsmd_file_path) train_dsmd = mv.load_dsmd(train_dsmd_file_path) val_dsmd = mv.load_dsmd(val_dsmd_file_path) assert len(train_dsmd) == 18 assert len(val_dsmd) == 2 # non shuffle mv.split_dsmd_file(tmp_path, datasplit, shuffle=False) train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv') val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv') test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv') assert mv.isfile(train_dsmd_file_path) assert mv.isfile(val_dsmd_file_path) assert not mv.isfile(test_dsmd_file_path) train_dsmd = mv.load_dsmd(train_dsmd_file_path) val_dsmd = mv.load_dsmd(val_dsmd_file_path) assert len(train_dsmd) == 18 assert len(val_dsmd) == 2 assert 'brain_001.dcm' in train_dsmd assert 'brain_019.dcm' in val_dsmd mv.rmtree(tmp_dir)
def test_eval_det_b(): gts = mv.load_dsmd(DSMD_DET_GT, DSMD_DET_C2L, mode='det') gts = [value for key, value in gts.items()] dts = [] for img_id, _ in enumerate(gts): dts.append([]) for label_id, _ in enumerate(gts[img_id]): dts[img_id].append([]) num_bboxes = len(gts[img_id][label_id]) scores = np.ones((num_bboxes, 1), dtype=np.float32) dts[img_id][label_id] = np.hstack([gts[img_id][label_id], scores]) det_metric = mv.eval_det(dts, gts) ap, num_anns = det_metric[0]['ap'], det_metric[0]['num_gt_bboxes'] assert ap > 0.99 assert int(num_anns) == 546 m = mv.eval_det4binarycls(dts, gts) assert m['accuracy'] > 0.99
def __init__(self, cfg, mode, build_transform, image_loader): self.is_train = (mode == mv.ModeKey.TRAIN) self.mode2dsmd = { mv.ModeKey.TRAIN: cfg.DATA.TRAIN_DSMD, mv.ModeKey.VAL: cfg.DATA.VAL_DSMD, mv.ModeKey.TEST: cfg.DATA.TEST_DSMD, } dsmd_path = self.mode2dsmd[mode] assert mv.isfile(dsmd_path) or mv.isdir(dsmd_path) if mv.isfile(dsmd_path): # for a dsmd file self.dsmd = mv.load_dsmd(dsmd_path) self.filenames = list(self.dsmd.keys()) self.filepaths = [ mv.joinpath(cfg.DATA.IMAGE_DIR, filename) for filename in self.filenames ] else: # for a directory containing test images self.dsmd = None self.filepaths = mv.listdir(dsmd_path) self.transform = build_transform(cfg, self.is_train) self.image_loader = image_loader
annot_filepath, lambda x: 0 if ignore_label_name else class2label) for label, bbox in bboxes: bbox = np.array(bbox, dtype=np.float32).reshape(-1, 4) if dsmd[replace_ext(filename)][label].shape[0] == 0: dsmd[replace_ext(filename)][label] = bbox else: dsmd[replace_ext(filename)][label] = np.append( dsmd[replace_ext(filename)][label], bbox, axis=0) return mv.make_dsmd(dsmd) if __name__ == '__main__': # TODO: move to unittest annot_dir = '/mnt/sdb1/tb/internal/label' dsmd_path = '/mnt/sdb1/tb/internal/1.csv' def replace_ext(x): return x.replace('.txt', '.dcm') dsmd = bdc2dsmd_det_2d(annot_dir, replace_ext=replace_ext) mv.save_dsmd(dsmd_path, dsmd, {'tb': 0}, mode='det') dsmd = mv.load_dsmd(dsmd_path, {'tb': 0}, mode='det') bboxes = dsmd['002565.dcm'][0] image_path = '/mnt/sdb1/tb/internal/tb/002565.dcm' image = mv.dcmread_dr(image_path) image = mv.normalize_grayscale(image) * 255.0 image = image.astype(np.uint8) mv.imshow_bboxes(image, bboxes, thickness=4)