def __getitem__(self, idx): if self._find_channels: raise NotImplementedError index_val = self.df.index[idx] path_czi = self.df.loc[index_val, 'path_czi'] channel_signal = self.df.loc[index_val, 'channel_signal'] channel_target = self.df.loc[index_val, 'channel_target'] flip_y = self.df.loc[index_val, :].get('flip_y', -1) > 0 flip_x = self.df.loc[index_val, :].get('flip_x', -1) > 0 slices = None czi = None data = list() for channel, transform in ((channel_signal, self.transform_signal), (channel_target, self.transform_target)): element = None if np.isnan(channel): continue if self.path_cache_dir is not None: path_cached = self._get_path_cached(path_czi, channel, transform) if os.path.exists(path_cached): print('DEBUG: used cached file:', path_cached) element = tifffile.imread(path_cached) if element.ndim != 4: print('Warning! potentially corrupted file!') element = None if element is None: if czi is None: czi = CziReader(path_czi) element = czi.get_volume(channel) if transform is not None: for t in transform: element = t(element) print('DEBUG: After transform', t, element.shape) element = element[np.newaxis, ] # Add "channel" dimension to all images if self.path_cache_dir is not None: tifffile.imsave(path_cached, element) print('saved:', path_cached) # Optional augmentations if flip_y: print('flipping y') element = np.flip(element, axis=-2) if flip_x: print('flipping x') element = np.flip(element, axis=-1) if slices is None: slices = _get_slices(element.shape, self.df.loc[index_val, :]) data.append(element[slices]) data = [ torch.tensor(ar.astype(np.float), dtype=torch.float32) for ar in data ] return data
def eval_czi(path_czi, channels_sel, path_save=None): """ path_czi : path to CZI file channels_sel : list of channels to check """ print('checking:', path_czi) if not os.path.exists(path_czi): return 'file does not exist' czi = CziReader(path_czi) msg = check_czi_dims(czi.metadata) if msg != '': return msg if channels_sel == -1: channels_sel = range(czi.get_size('C')) messages = [] vols = [] for chan in channels_sel: vol = czi.get_volume(chan) vols.append(vol) msg = check_blank_slices(vol) if msg != '': messages.append('chan {:d} {:s}'.format(chan, msg)) if path_save is not None: save_vol_slices(path_save, vols) if len(messages) > 0: return ';'.join(messages) return ''
def test_czireader(): path = 'data/3500000427_100X_20170120_F05_P27.czi' czi = CziReader(path) dim_to_scale = czi.get_scales() zyx_scales = [dim_to_scale[dim] for dim in 'zyx'] npt.assert_almost_equal(zyx_scales, [0.29, 0.10833, 0.10833], decimal=3) ar_chan_0 = czi.get_volume(0) assert ar_chan_0.shape == (39, 512, 512)
def __getitem__(self, index): element = self.df.iloc[index, :] czi = CziReader(element['path_czi']) im_out = (czi.get_volume(element['channel_signal']), czi.get_volume(element['channel_target'])) im_out = [self.transform(im.astype(float)) for im in im_out] return im_out
def __getitem__(self, index): element = self.df.iloc[index, :] has_target = not np.isnan(element['channel_target']) czi = CziReader(element['path_czi']) im_out = list() im_out.append(czi.get_volume(element['channel_signal'])) if has_target: im_out.append(czi.get_volume(element['channel_target'])) if self.transform_signal is not None: for t in self.transform_signal: im_out[0] = t(im_out[0]) if has_target and self.transform_target is not None: for t in self.transform_target: im_out[1] = t(im_out[1]) im_out = [torch.from_numpy(im.astype(float)).float() for im in im_out] # unsqueeze to make the first dimension be the channel dimension im_out = [torch.unsqueeze(im, 0) for im in im_out] return im_out
def get_item_sel(self, idx, sel, apply_transforms=True): """Get item(s) from dataset element idx. DataFrames should have columns ('path_czi', 'channel_signal', 'channel_target') and optionally 'time_slice'. idx - (int) dataset element index sel - (int or iterable) 0 for 'signal', 1 for 'target' """ if isinstance(sel, int): assert sel >= 0 sels = (sel, ) elif isinstance(sel, collections.Iterable): sels = sel else: raise AttributeError path = self._df_active['path_czi'].iloc[idx] if ('_last_loaded' not in vars(self)) or self._last_loaded != path: print('reading:', path) try: self._czi = CziReader(path) self._last_loaded = path except Exception as e: warnings.warn('could not read file: {}'.format(path)) warnings.warn(str(e)) return None time_slice = None if 'time_slice' in self._df_active.columns: time_slice = self._df_active['time_slice'].iloc[idx] dict_scales = self._czi.get_scales() scales_orig = [dict_scales.get(dim) for dim in 'zyx'] # print('pixel scales:', scales_orig) if self.scale_z is not None or self.scale_xy is not None: if None in scales_orig: warnings.warn('bad pixel scales in {:s} | scales: {:s}'.format( path, str(scales_orig))) return None scales_wanted = [self.scale_z, self.scale_xy, self.scale_xy] factors_resize = list( map(lambda a, b: a / b if None not in (a, b) else 1.0, scales_orig, scales_wanted)) # print('factors_resize:', factors_resize) resizer = Resizer(factors_resize) else: resizer = None volumes = [] for i in range(len(sels)): if sels[i] == 0: chan = self._df_active['channel_signal'].iloc[idx] else: chan = self._df_active['channel_target'].iloc[idx] volume_pre = self._czi.get_volume(chan, time_slice=time_slice) if not apply_transforms: volumes.append(volume_pre) else: transforms = [] if resizer is not None: transforms.append(resizer) if self.transforms is not None: if isinstance(self.transforms[sels[i]], collections.Iterable): transforms.extend(self.transforms[sels[i]]) else: transforms.append(self.transforms[sels[i]]) volumes.append(get_vol_transformed(volume_pre, transforms)) return volumes[0] if isinstance(sel, int) else volumes
class DataSet(object): def __init__(self, path_train_csv, path_test_csv, scale_z=0.3, scale_xy=0.3, transforms=None): """Create dataset from train/test DataFrames. Parameters: df_train - pandas.DataFrame, where each row is a DataSet element df_test - pandas.DataFrame, same columns as above scale_z - desired um/px size for z-dimension scale_xy - desired um/px size for x, y dimensions transforms - list/tuple of transforms, where each element is a transform or transform list to be applied to a component of a DataSet element """ self.df_train = pd.read_csv( path_train_csv) if path_train_csv is not None else pd.DataFrame() self.df_test = pd.read_csv( path_test_csv) if path_test_csv is not None else pd.DataFrame() self.scale_z = scale_z self.scale_xy = scale_xy self.transforms = transforms self._train_select = True self._df_active = self.df_train self._czi = None self._last_loaded = None def use_train_set(self): self._train_select = True self._df_active = self.df_train def use_test_set(self): self._train_select = False self._df_active = self.df_test def is_timelapse(self): return 'time_slice' in self._df_active.columns def __len__(self): return len(self._df_active) def get_name(self, idx, *args): return self._df_active['path_czi'].iloc[idx] def get_item_sel(self, idx, sel, apply_transforms=True): """Get item(s) from dataset element idx. DataFrames should have columns ('path_czi', 'channel_signal', 'channel_target') and optionally 'time_slice'. idx - (int) dataset element index sel - (int or iterable) 0 for 'signal', 1 for 'target' """ if isinstance(sel, int): assert sel >= 0 sels = (sel, ) elif isinstance(sel, collections.Iterable): sels = sel else: raise AttributeError path = self._df_active['path_czi'].iloc[idx] if ('_last_loaded' not in vars(self)) or self._last_loaded != path: print('reading:', path) try: self._czi = CziReader(path) self._last_loaded = path except Exception as e: warnings.warn('could not read file: {}'.format(path)) warnings.warn(str(e)) return None time_slice = None if 'time_slice' in self._df_active.columns: time_slice = self._df_active['time_slice'].iloc[idx] dict_scales = self._czi.get_scales() scales_orig = [dict_scales.get(dim) for dim in 'zyx'] # print('pixel scales:', scales_orig) if self.scale_z is not None or self.scale_xy is not None: if None in scales_orig: warnings.warn('bad pixel scales in {:s} | scales: {:s}'.format( path, str(scales_orig))) return None scales_wanted = [self.scale_z, self.scale_xy, self.scale_xy] factors_resize = list( map(lambda a, b: a / b if None not in (a, b) else 1.0, scales_orig, scales_wanted)) # print('factors_resize:', factors_resize) resizer = Resizer(factors_resize) else: resizer = None volumes = [] for i in range(len(sels)): if sels[i] == 0: chan = self._df_active['channel_signal'].iloc[idx] else: chan = self._df_active['channel_target'].iloc[idx] volume_pre = self._czi.get_volume(chan, time_slice=time_slice) if not apply_transforms: volumes.append(volume_pre) else: transforms = [] if resizer is not None: transforms.append(resizer) if self.transforms is not None: if isinstance(self.transforms[sels[i]], collections.Iterable): transforms.extend(self.transforms[sels[i]]) else: transforms.append(self.transforms[sels[i]]) volumes.append(get_vol_transformed(volume_pre, transforms)) return volumes[0] if isinstance(sel, int) else volumes def __repr__(self): return 'DataSet({:d} train elements, {:d} test elements)'.format( len(self.df_train), len(self.df_test)) def __str__(self): def get_str_transform(transforms): # Return the string representation of the given transforms if transforms is None: return str(None) all_transforms = [] for transform in transforms: if transform is None: all_transforms.append(str(None)) elif isinstance(transform, (list, tuple)): str_list = [] for t in transform: str_list.append(str(t)) all_transforms.append(' => '.join(str_list)) else: all_transforms.append(str(transform)) return (os.linesep + ' ').join(all_transforms) if id(self.df_train) == id(self.df_test): n_unique = self.df_train.shape[0] else: n_unique = self.df_train.shape[0] + self.df_test.shape[0] str_active = 'train' if self._train_select else 'test' str_list = [] str_list.append('{}:'.format(self.__class__.__name__)) str_list.append('active_set: ' + str_active) str_list.append('scale_z: ' + str(self.scale_z) + ' um/px') str_list.append('scale_xy: ' + str(self.scale_xy) + ' um/px') str_list.append('train/test/total: {:d}/{:d}/{:d}'.format( len(self.df_train), len(self.df_test), n_unique)) str_list.append('transforms: ' + get_str_transform(self.transforms)) return os.linesep.join(str_list) def __getitem__(self, idx): """Returns arrays corresponding to files identified by file_tags in the folder specified by index. Once the files are read in as numpy arrays, apply the transformations specified in the constructor. Returns: volumes - n-element tuple or None. If the file read was successful, return tuple of transformed arrays else return None """ return self.get_item_sel(idx, (0, 1), apply_transforms=True)