def loadData(self): config = self.config if self.config.mode != 'vect-scalar' and self.config.data_class_name is None: # image training strided = ~np.any(config.MFP) and config.mode == 'img-img' self.get_batch_kwargs = dict( batch_size=config.batch_size, strided=strided, flip=config.flip_data, grey_augment_channels=config.grey_augment_channels, ret_info=config.lazy_labels, ret_example_weights=config.use_example_weights, warp_on=config.warp_on, ignore_thresh=config.example_ignore_threshold) # the source is replaced in self.testModel to be valid self.get_batch_kwargs_test = dict( batch_size=config.monitor_batch_size, strided=strided, flip=config.flip_data, grey_augment_channels=config.grey_augment_channels, ret_info=config.lazy_labels, ret_example_weights=config.use_example_weights, warp_on=False, ignore_thresh=config.example_ignore_threshold) # no warp self.data = CNNData.CNNData( config.patch_size, config.dimensions.pred_stride, config.dimensions.offset, config.n_dim, config.n_lab, config.anisotropic_data, config.mode, config.zchxy_order, config.border_mode, config.pre_process, config.upright_x, True if config.target == 'regression' else False, config.target if config.target in ['malis', 'affinity'] else False) # return affinity graph instead of boundaries self.data.addDataFromFile(config.data_path, config.label_path, config.d_files, config.l_files, config.cube_prios, config.valid_cubes, config.downsample_xy) if self.config.preview_data_path is not None: data = trainutils.h5Load(self.config.preview_data_path) if not (isinstance(data, list) or isinstance(data, (tuple, list))): #data = np.transpose(data, (1,2,0)) # this was only a hack for I data = [ data, ] data = [d.astype(np.float32) / 255 for d in data] self.preview_data = data else: self.preview_data = None else: # non-image training self.get_batch_kwargs = dict(batch_size=config.batch_size) self.get_batch_kwargs.update(self.config.data_batch_kwargs) # the source is replaced in self.testModel to be valid self.get_batch_kwargs_test = dict( batch_size=config.monitor_batch_size) if isinstance(self.config.data_class_name, tuple): Data = trainutils.import_variable_from_file( *self.config.data_class_name) else: Data = getattr(traindata, self.config.data_class_name) self.data = Data(**self.config.data_load_kwargs) self.preview_data = None
def _read_images(self, d_path, l_path, d_files, l_files, downsample_xy): """ Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z) But image stacks are returned as (z,ch,x,y) and label as (z,x,y,) irrespective of the order in the file. If the image files have no channel this dimension is extended to a singleton dimension. """ data, label, info = [], [], [] if len(d_files) != len(l_files): raise ValueError( "d_files and l_files must be lists of same length!") for (d_f, d_key), (l_f, l_key) in zip(d_files, l_files): print 'Loading %s' % d_f, d = ut.h5Load(d_path + d_f, d_key) print 'Loading %s' % l_f l = ut.h5Load(l_path + l_f, l_key) try: info_1 = ut.h5Load(l_path + l_f, 'info') info.append(info_1) except KeyError: info.append(None) if not self.zchxy_order: if len(d.shape) == 4: self.n_ch = d.shape[0] print "Data has %i channels" % self.n_ch elif len(d.shape) == 3: # We have no channels in data self.n_ch = 1 d = d[None, :, :, :] # add (empty) 0-axis if l.size == 0: l = np.zeros_like(d[0], dtype=self.ldtype) elif self.mode == 'img-scalar': assert len(l.shape) == 1, "Scalar labels must be 1d" # Transpose such that access is optimal d = np.transpose(d, (3, 0, 1, 2)) # (ch,x,y,z)-->(z,ch,x,y) if self.mode == 'img-img': l = np.transpose(l, (2, 0, 1)) # (x,y,z)-->(z,x,y) d, l = _stripCubes(d, l, self.offset, self.ldtype) else: # data in memory layout: if len(d.shape) == 4: self.n_ch = d.shape[1] print "Data has %i channels" % self.n_ch elif len(d.shape) == 3: # We have no channels in data self.n_ch = 1 d = d[:, None, :, :] # add (empty) 0-axis if l.size == 0: sh = (d.shape[0], ) + d.shape[2:] l = np.zeros_like(sh, dtype=self.ldtype) elif self.mode == 'img-scalar': assert len(l.shape) == 1, "Scalar labels must be 1d" if self.mode == 'img-img': d, l = _stripCubes(d, l, self.offset, self.ldtype) # determine normalisation depending on int or float type if d.dtype in [ np.int, np.int8, np.int16, np.int32, np.uint32, np.uint, np.uint8, np.uint16, np.uint32, np.uint32 ]: m = 255 else: m = 1 d = np.ascontiguousarray(d, dtype=np.float32) / m if (self.ldtype is not l.dtype and np.issubdtype(l.dtype, np.integer)): m = l.max() M = np.iinfo(self.ldtype).max if m > M: raise ValueError( "Loading of data: labels must be cast to %s, but %s cannot store value %g, maximum allowed value: %g. You may try to renumber labels." % (self.ldtype, self.ldtype, m, M)) l = np.ascontiguousarray(l, dtype=self.ldtype) if downsample_xy: f = int(downsample_xy) l_sh = l.shape cut = np.mod(l_sh, f) d = d[:, :, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]] sh = d[:, :, ::f, ::f].shape new_d = np.zeros(sh, dtype=np.float32) l = l[:, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]] sh = l[:, ::f, ::f].shape new_l = np.zeros(sh, dtype=self.ldtype) for i in xrange(f): for j in xrange(f): new_d += d[:, :, i::f, j::f] new_l += l[:, i::f, j::f] d = new_d / f**2 l = new_l / f**2 gc.collect() print "Internal data.shape=%s, label.shape=%s" % (d.shape, l.shape) print '---' data.append(d) label.append(l) return data, label, info
def _read_images(self, d_path, l_path, d_files, l_files, downsample_xy): """ Image files on disk are expected to be in order (ch,x,y,z) or (x,y,z) But image stacks are returned as (z,ch,x,y) and label as (z,x,y,) irrespective of the order in the file. If the image files have no channel this dimension is extended to a singleton dimension. """ data, label, info = [], [], [] if len(d_files) != len(l_files): raise ValueError("d_files and l_files must be lists of same length!") for (d_f, d_key), (l_f, l_key) in zip(d_files, l_files): print 'Loading %s' % d_f, d = ut.h5Load(d_path + d_f, d_key) print 'Loading %s' % l_f l = ut.h5Load(l_path + l_f, l_key) try: info_1 = ut.h5Load(l_path + l_f, 'info') info.append(info_1) except KeyError: info.append(None) if not self.zchxy_order: if len(d.shape) == 4: self.n_ch = d.shape[0] print "Data has %i channels" % self.n_ch elif len(d.shape) == 3: # We have no channels in data self.n_ch = 1 d = d[None, :, :, :] # add (empty) 0-axis if l.size == 0: l = np.zeros_like(d[0], dtype=self.ldtype) elif self.mode == 'img-scalar': assert len(l.shape) == 1, "Scalar labels must be 1d" # Transpose such that access is optimal d = np.transpose(d, (3, 0, 1, 2)) # (ch,x,y,z)-->(z,ch,x,y) if self.mode == 'img-img': l = np.transpose(l, (2, 0, 1)) # (x,y,z)-->(z,x,y) d, l = _stripCubes(d, l, self.offset, self.ldtype) else: # data in memory layout: if len(d.shape) == 4: self.n_ch = d.shape[1] print "Data has %i channels" % self.n_ch elif len(d.shape) == 3: # We have no channels in data self.n_ch = 1 d = d[:, None, :, :] # add (empty) 0-axis if l.size == 0: sh = (d.shape[0], ) + d.shape[2:] l = np.zeros_like(sh, dtype=self.ldtype) elif self.mode == 'img-scalar': assert len(l.shape) == 1, "Scalar labels must be 1d" if self.mode == 'img-img': d, l = _stripCubes(d, l, self.offset, self.ldtype) # determine normalisation depending on int or float type if d.dtype in [np.int, np.int8, np.int16, np.int32, np.uint32, np.uint, np.uint8, np.uint16, np.uint32, np.uint32]: m = 255 else: m = 1 d = np.ascontiguousarray(d, dtype=np.float32) / m if (self.ldtype is not l.dtype and np.issubdtype(l.dtype, np.integer)): m = l.max() M = np.iinfo(self.ldtype).max if m > M: raise ValueError("Loading of data: labels must be cast to %s, but %s cannot store value %g, maximum allowed value: %g. You may try to renumber labels." % (self.ldtype, self.ldtype, m, M)) l = np.ascontiguousarray(l, dtype=self.ldtype) if downsample_xy: f = int(downsample_xy) l_sh = l.shape cut = np.mod(l_sh, f) d = d[:, :, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]] sh = d[:, :, ::f, ::f].shape new_d = np.zeros(sh, dtype=np.float32) l = l[:, :l_sh[-2] - cut[-2], :l_sh[-1] - cut[-1]] sh = l[:, ::f, ::f].shape new_l = np.zeros(sh, dtype=self.ldtype) for i in xrange(f): for j in xrange(f): new_d += d[:, :, i::f, j::f] new_l += l[:, i::f, j::f] d = new_d / f**2 l = new_l / f**2 gc.collect() print "Internal data.shape=%s, label.shape=%s" % (d.shape, l.shape) print '---' data.append(d) label.append(l) return data, label, info
def loadData(self): config = self.config if self.config.mode != 'vect-scalar' and self.config.data_class_name is None: # image training strided = ~np.any(config.MFP) and config.mode == 'img-img' self.get_batch_kwargs = dict( batch_size=config.batch_size, strided=strided, flip=config.flip_data, grey_augment_channels=config.grey_augment_channels, ret_info=config.lazy_labels, ret_example_weights=config.use_example_weights, warp_on=config.warp_on, ignore_thresh=config.example_ignore_threshold) # the source is replaced in self.testModel to be valid self.get_batch_kwargs_test = dict( batch_size=config.monitor_batch_size, strided=strided, flip=config.flip_data, grey_augment_channels=config.grey_augment_channels, ret_info=config.lazy_labels, ret_example_weights=config.use_example_weights, warp_on=False, ignore_thresh=config.example_ignore_threshold) # no warp self.data = CNNData.CNNData( config.patch_size, config.dimensions.pred_stride, config.dimensions.offset, config.n_dim, config.n_lab, config.anisotropic_data, config.mode, config.zchxy_order, config.border_mode, config.pre_process, config.upright_x, True if config.target == 'regression' else False, config.target if config.target in ['malis', 'affinity'] else False) # return affinity graph instead of boundaries self.data.addDataFromFile(config.data_path, config.label_path, config.d_files, config.l_files, config.cube_prios, config.valid_cubes, config.downsample_xy) if self.config.preview_data_path is not None: data = trainutils.h5Load(self.config.preview_data_path) if not (isinstance(data, list) or isinstance(data, (tuple, list))): #data = np.transpose(data, (1,2,0)) # this was only a hack for I data = [data, ] data = [d.astype(np.float32) / 255 for d in data] self.preview_data = data else: self.preview_data = None else: # non-image training self.get_batch_kwargs = dict(batch_size=config.batch_size) self.get_batch_kwargs.update(self.config.data_batch_kwargs) # the source is replaced in self.testModel to be valid self.get_batch_kwargs_test = dict(batch_size=config.monitor_batch_size) if isinstance(self.config.data_class_name, tuple): Data = trainutils.import_variable_from_file(*self.config.data_class_name) else: Data = getattr(traindata, self.config.data_class_name) self.data = Data(**self.config.data_load_kwargs) self.preview_data = None