示例#1
0
    def __getitem__(self, idx):
        if self._find_channels:
            raise NotImplementedError

        index_val = self.df.index[idx]

        path_czi = self.df.loc[index_val, 'path_czi']
        channel_signal = self.df.loc[index_val, 'channel_signal']
        channel_target = self.df.loc[index_val, 'channel_target']
        flip_y = self.df.loc[index_val, :].get('flip_y', -1) > 0
        flip_x = self.df.loc[index_val, :].get('flip_x', -1) > 0

        slices = None
        czi = None
        data = list()
        for channel, transform in ((channel_signal, self.transform_signal),
                                   (channel_target, self.transform_target)):
            element = None
            if np.isnan(channel):
                continue
            if self.path_cache_dir is not None:
                path_cached = self._get_path_cached(path_czi, channel,
                                                    transform)
                if os.path.exists(path_cached):
                    print('DEBUG: used cached file:', path_cached)
                    element = tifffile.imread(path_cached)
                    if element.ndim != 4:
                        print('Warning! potentially corrupted file!')
                        element = None
            if element is None:
                if czi is None:
                    czi = CziReader(path_czi)
                element = czi.get_volume(channel)
                if transform is not None:
                    for t in transform:
                        element = t(element)
                        print('DEBUG: After transform', t, element.shape)
                element = element[np.newaxis,
                                  ]  # Add "channel" dimension to all images
                if self.path_cache_dir is not None:
                    tifffile.imsave(path_cached, element)
                    print('saved:', path_cached)

            # Optional augmentations
            if flip_y:
                print('flipping y')
                element = np.flip(element, axis=-2)
            if flip_x:
                print('flipping x')
                element = np.flip(element, axis=-1)

            if slices is None:
                slices = _get_slices(element.shape, self.df.loc[index_val, :])
            data.append(element[slices])

        data = [
            torch.tensor(ar.astype(np.float), dtype=torch.float32)
            for ar in data
        ]
        return data
示例#2
0
def eval_czi(path_czi, channels_sel, path_save=None):
    """
    path_czi : path to CZI file
    channels_sel : list of channels to check
    """
    print('checking:', path_czi)
    if not os.path.exists(path_czi):
        return 'file does not exist'
    czi = CziReader(path_czi)
    msg = check_czi_dims(czi.metadata)
    if msg != '':
        return msg

    if channels_sel == -1:
        channels_sel = range(czi.get_size('C'))
    messages = []
    vols = []
    for chan in channels_sel:
        vol = czi.get_volume(chan)
        vols.append(vol)
        msg = check_blank_slices(vol)
        if msg != '':
            messages.append('chan {:d} {:s}'.format(chan, msg))
    if path_save is not None:
        save_vol_slices(path_save, vols)
    if len(messages) > 0:
        return ';'.join(messages)
    return ''
示例#3
0
def test_czireader():
    path = 'data/3500000427_100X_20170120_F05_P27.czi'
    czi = CziReader(path)
    dim_to_scale = czi.get_scales()
    zyx_scales = [dim_to_scale[dim] for dim in 'zyx']
    npt.assert_almost_equal(zyx_scales, [0.29, 0.10833, 0.10833], decimal=3)
    ar_chan_0 = czi.get_volume(0)
    assert ar_chan_0.shape == (39, 512, 512)
示例#4
0
 def __getitem__(self, index):
     element = self.df.iloc[index, :]
     czi = CziReader(element['path_czi'])
     
     im_out = (czi.get_volume(element['channel_signal']), czi.get_volume(element['channel_target']))
               
     im_out = [self.transform(im.astype(float)) for im in im_out]
     
     return im_out
示例#5
0
    def __getitem__(self, index):
        element = self.df.iloc[index, :]
        has_target = not np.isnan(element['channel_target'])
        czi = CziReader(element['path_czi'])

        im_out = list()
        im_out.append(czi.get_volume(element['channel_signal']))
        if has_target:
            im_out.append(czi.get_volume(element['channel_target']))
        if self.transform_signal is not None:
            for t in self.transform_signal:
                im_out[0] = t(im_out[0])
        if has_target and self.transform_target is not None:
            for t in self.transform_target:
                im_out[1] = t(im_out[1])
        im_out = [torch.from_numpy(im.astype(float)).float() for im in im_out]
        # unsqueeze to make the first dimension be the channel dimension
        im_out = [torch.unsqueeze(im, 0) for im in im_out]
        return im_out
示例#6
0
    def get_item_sel(self, idx, sel, apply_transforms=True):
        """Get item(s) from dataset element idx.

        DataFrames should have columns ('path_czi', 'channel_signal', 'channel_target') and optionally 'time_slice'.

        idx - (int) dataset element index
        sel - (int or iterable) 0 for 'signal', 1 for 'target'
        """
        if isinstance(sel, int):
            assert sel >= 0
            sels = (sel, )
        elif isinstance(sel, collections.Iterable):
            sels = sel
        else:
            raise AttributeError

        path = self._df_active['path_czi'].iloc[idx]
        if ('_last_loaded' not in vars(self)) or self._last_loaded != path:

            print('reading:', path)
            try:
                self._czi = CziReader(path)
                self._last_loaded = path
            except Exception as e:
                warnings.warn('could not read file: {}'.format(path))
                warnings.warn(str(e))
                return None

        time_slice = None
        if 'time_slice' in self._df_active.columns:
            time_slice = self._df_active['time_slice'].iloc[idx]
        dict_scales = self._czi.get_scales()
        scales_orig = [dict_scales.get(dim) for dim in 'zyx']
        # print('pixel scales:', scales_orig)

        if self.scale_z is not None or self.scale_xy is not None:
            if None in scales_orig:
                warnings.warn('bad pixel scales in {:s} | scales: {:s}'.format(
                    path, str(scales_orig)))
                return None
            scales_wanted = [self.scale_z, self.scale_xy, self.scale_xy]
            factors_resize = list(
                map(lambda a, b: a / b if None not in (a, b) else 1.0,
                    scales_orig, scales_wanted))
            # print('factors_resize:', factors_resize)
            resizer = Resizer(factors_resize)
        else:
            resizer = None

        volumes = []
        for i in range(len(sels)):
            if sels[i] == 0:
                chan = self._df_active['channel_signal'].iloc[idx]
            else:
                chan = self._df_active['channel_target'].iloc[idx]
            volume_pre = self._czi.get_volume(chan, time_slice=time_slice)
            if not apply_transforms:
                volumes.append(volume_pre)
            else:
                transforms = []
                if resizer is not None:
                    transforms.append(resizer)
                if self.transforms is not None:
                    if isinstance(self.transforms[sels[i]],
                                  collections.Iterable):
                        transforms.extend(self.transforms[sels[i]])
                    else:
                        transforms.append(self.transforms[sels[i]])

                volumes.append(get_vol_transformed(volume_pre, transforms))
        return volumes[0] if isinstance(sel, int) else volumes
示例#7
0
class DataSet(object):
    def __init__(self,
                 path_train_csv,
                 path_test_csv,
                 scale_z=0.3,
                 scale_xy=0.3,
                 transforms=None):
        """Create dataset from train/test DataFrames.
        
        Parameters:
        df_train - pandas.DataFrame, where each row is a DataSet element
        df_test - pandas.DataFrame, same columns as above
        scale_z - desired um/px size for z-dimension
        scale_xy - desired um/px size for x, y dimensions
        transforms - list/tuple of transforms, where each element is a transform or transform list to be applied
                     to a component of a DataSet element
        """
        self.df_train = pd.read_csv(
            path_train_csv) if path_train_csv is not None else pd.DataFrame()
        self.df_test = pd.read_csv(
            path_test_csv) if path_test_csv is not None else pd.DataFrame()
        self.scale_z = scale_z
        self.scale_xy = scale_xy
        self.transforms = transforms
        self._train_select = True
        self._df_active = self.df_train
        self._czi = None
        self._last_loaded = None

    def use_train_set(self):
        self._train_select = True
        self._df_active = self.df_train

    def use_test_set(self):
        self._train_select = False
        self._df_active = self.df_test

    def is_timelapse(self):
        return 'time_slice' in self._df_active.columns

    def __len__(self):
        return len(self._df_active)

    def get_name(self, idx, *args):
        return self._df_active['path_czi'].iloc[idx]

    def get_item_sel(self, idx, sel, apply_transforms=True):
        """Get item(s) from dataset element idx.

        DataFrames should have columns ('path_czi', 'channel_signal', 'channel_target') and optionally 'time_slice'.

        idx - (int) dataset element index
        sel - (int or iterable) 0 for 'signal', 1 for 'target'
        """
        if isinstance(sel, int):
            assert sel >= 0
            sels = (sel, )
        elif isinstance(sel, collections.Iterable):
            sels = sel
        else:
            raise AttributeError

        path = self._df_active['path_czi'].iloc[idx]
        if ('_last_loaded' not in vars(self)) or self._last_loaded != path:

            print('reading:', path)
            try:
                self._czi = CziReader(path)
                self._last_loaded = path
            except Exception as e:
                warnings.warn('could not read file: {}'.format(path))
                warnings.warn(str(e))
                return None

        time_slice = None
        if 'time_slice' in self._df_active.columns:
            time_slice = self._df_active['time_slice'].iloc[idx]
        dict_scales = self._czi.get_scales()
        scales_orig = [dict_scales.get(dim) for dim in 'zyx']
        # print('pixel scales:', scales_orig)

        if self.scale_z is not None or self.scale_xy is not None:
            if None in scales_orig:
                warnings.warn('bad pixel scales in {:s} | scales: {:s}'.format(
                    path, str(scales_orig)))
                return None
            scales_wanted = [self.scale_z, self.scale_xy, self.scale_xy]
            factors_resize = list(
                map(lambda a, b: a / b if None not in (a, b) else 1.0,
                    scales_orig, scales_wanted))
            # print('factors_resize:', factors_resize)
            resizer = Resizer(factors_resize)
        else:
            resizer = None

        volumes = []
        for i in range(len(sels)):
            if sels[i] == 0:
                chan = self._df_active['channel_signal'].iloc[idx]
            else:
                chan = self._df_active['channel_target'].iloc[idx]
            volume_pre = self._czi.get_volume(chan, time_slice=time_slice)
            if not apply_transforms:
                volumes.append(volume_pre)
            else:
                transforms = []
                if resizer is not None:
                    transforms.append(resizer)
                if self.transforms is not None:
                    if isinstance(self.transforms[sels[i]],
                                  collections.Iterable):
                        transforms.extend(self.transforms[sels[i]])
                    else:
                        transforms.append(self.transforms[sels[i]])

                volumes.append(get_vol_transformed(volume_pre, transforms))
        return volumes[0] if isinstance(sel, int) else volumes

    def __repr__(self):
        return 'DataSet({:d} train elements, {:d} test elements)'.format(
            len(self.df_train), len(self.df_test))

    def __str__(self):
        def get_str_transform(transforms):
            # Return the string representation of the given transforms
            if transforms is None:
                return str(None)
            all_transforms = []
            for transform in transforms:
                if transform is None:
                    all_transforms.append(str(None))
                elif isinstance(transform, (list, tuple)):
                    str_list = []
                    for t in transform:
                        str_list.append(str(t))
                    all_transforms.append(' => '.join(str_list))
                else:
                    all_transforms.append(str(transform))
            return (os.linesep + '            ').join(all_transforms)

        if id(self.df_train) == id(self.df_test):
            n_unique = self.df_train.shape[0]
        else:
            n_unique = self.df_train.shape[0] + self.df_test.shape[0]
        str_active = 'train' if self._train_select else 'test'
        str_list = []
        str_list.append('{}:'.format(self.__class__.__name__))
        str_list.append('active_set: ' + str_active)
        str_list.append('scale_z: ' + str(self.scale_z) + ' um/px')
        str_list.append('scale_xy: ' + str(self.scale_xy) + ' um/px')
        str_list.append('train/test/total: {:d}/{:d}/{:d}'.format(
            len(self.df_train), len(self.df_test), n_unique))
        str_list.append('transforms: ' + get_str_transform(self.transforms))
        return os.linesep.join(str_list)

    def __getitem__(self, idx):
        """Returns arrays corresponding to files identified by file_tags in the folder specified by index.

        Once the files are read in as numpy arrays, apply the transformations specified in the constructor.

        Returns:
        volumes - n-element tuple or None. If the file read was successful, return tuple
                  of transformed arrays else return None
        """
        return self.get_item_sel(idx, (0, 1), apply_transforms=True)