def isdicom(path): """ Judge whether a given file is a valid dicom. Args: path(str): given file path. Returns: (bool): True if given file path is a valid dicom, otherwise False. """ if not mv.isfile(path): return False # read preamble and magic code with open(path, 'rb') as f: header = f.read(132) if not header: return False # magic code of a dicom file should be 'DICM' magic_code = header[128:132] if magic_code != b'DICM': return False else: return True
def compute_md5_str(file_path): if not mv.isfile(file_path): return None with open(file_path, 'rb') as f: m = hashlib.md5() m.update(f.read()) md5_code = m.hexdigest() return str(md5_code).lower()
def __init__(self, cfg, mode, build_transform, image_loader): self.is_train = (mode == mv.ModeKey.TRAIN) self.mode2dsmd = { mv.ModeKey.TRAIN: cfg.DATA.TRAIN_DSMD, mv.ModeKey.VAL: cfg.DATA.VAL_DSMD, mv.ModeKey.TEST: cfg.DATA.TEST_DSMD, } dsmd_path = self.mode2dsmd[mode] assert mv.isfile(dsmd_path) or mv.isdir(dsmd_path) if mv.isfile(dsmd_path): # for a dsmd file self.dsmd = mv.load_dsmd(dsmd_path) self.filenames = list(self.dsmd.keys()) self.filepaths = [ mv.joinpath(cfg.DATA.IMAGE_DIR, filename) for filename in self.filenames ] else: # for a directory containing test images self.dsmd = None self.filepaths = mv.listdir(dsmd_path) self.transform = build_transform(cfg, self.is_train) self.image_loader = image_loader
def test_split_dsmd_file(dsmd_file): tmp_dir = mv.joinpath(DATA_DIR, 'temporary_subdir') tmp_path = mv.joinpath(tmp_dir, 'tmp_dsmd.txt') mv.mkdirs(tmp_dir) mv.cp(dsmd_file, tmp_path) datasplit = {'train': 0.9, 'val': 0.1, 'test': 0.0} # shuffle mv.split_dsmd_file(tmp_path, datasplit) train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv') val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv') test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv') assert mv.isfile(train_dsmd_file_path) assert mv.isfile(val_dsmd_file_path) assert not mv.isfile(test_dsmd_file_path) train_dsmd = mv.load_dsmd(train_dsmd_file_path) val_dsmd = mv.load_dsmd(val_dsmd_file_path) assert len(train_dsmd) == 18 assert len(val_dsmd) == 2 # non shuffle mv.split_dsmd_file(tmp_path, datasplit, shuffle=False) train_dsmd_file_path = mv.joinpath(tmp_dir, 'train.csv') val_dsmd_file_path = mv.joinpath(tmp_dir, 'val.csv') test_dsmd_file_path = mv.joinpath(tmp_dir, 'test.csv') assert mv.isfile(train_dsmd_file_path) assert mv.isfile(val_dsmd_file_path) assert not mv.isfile(test_dsmd_file_path) train_dsmd = mv.load_dsmd(train_dsmd_file_path) val_dsmd = mv.load_dsmd(val_dsmd_file_path) assert len(train_dsmd) == 18 assert len(val_dsmd) == 2 assert 'brain_001.dcm' in train_dsmd assert 'brain_019.dcm' in val_dsmd mv.rmtree(tmp_dir)