def layer_op(self, *_unused_args, **_unused_kwargs): """ This function first reads two vectors, and interpolates them with self.n_interpolations mixing coefficients. Location coordinates are set to ``np.ones`` for all the vectors. """ while True: image_id_x, data_x, _ = self.reader(idx=None, shuffle=False) image_id_y, data_y, _ = self.reader(idx=None, shuffle=True) if not data_x or not data_y: break if image_id_x == image_id_y: continue embedding_x = data_x[self.window.names[0]] embedding_y = data_y[self.window.names[0]] steps = np.linspace(0, 1, self.n_interpolations) for (_, mixture) in enumerate(steps): output_vector = \ embedding_x * mixture + embedding_y * (1 - mixture) coordinates = np.ones((1, N_SPATIAL * 2 + 1), dtype=np.int32) coordinates[0, 0:2] = [image_id_x, image_id_y] output_dict = {} for name in self.window.names: coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = coordinates output_dict[image_data_key] = output_vector[np.newaxis, ...] yield output_dict
def layer_op(self, idx=None): """ Generating each image as a window. Overriding this function to create new image sampling strategies. This function should either yield or return a dictionary (of multiple windows per image):: return a dictionary: { 'image_name': a numpy array [n_samples, h, w, d, chn], 'image_name_location': [n_samples, 7] } where the 7-element location vector encode the image_id, starting and ending coordinates of the image window. Following the same notation, the dictionary can be extended to multiple modalities; the keys will be:: {'image_name_1', 'image_name_1_location', 'image_name_2', 'image_name_2_location', ...} :param idx: image_id used to load the image at the i-th row of the input :return: a image data dictionary """ image_id, image_data, _ = self.reader(idx=idx) for mod in list(image_data): spatial_shape = image_data[mod].shape[:N_SPATIAL] coords = self.dummy_coordinates(image_id, spatial_shape, 1) image_data[LOCATION_FORMAT.format(mod)] = coords image_data[mod] = image_data[mod][np.newaxis, ...] return image_data
def layer_op(self, *_unused_args, **_unused_kwargs): """ This function first reads two vectors, and interpolates them with self.n_interpolations mixing coefficients. Location coordinates are set to ``np.ones`` for all the vectors. """ while True: image_id_x, data_x, _ = self.reader(idx=None, shuffle=False) image_id_y, data_y, _ = self.reader(idx=None, shuffle=True) if not data_x or not data_y: break if image_id_x == image_id_y: continue embedding_x = data_x[self.window.names[0]] embedding_y = data_y[self.window.names[0]] steps = np.linspace(0, 1, self.n_interpolations) for (_, mixture) in enumerate(steps): output_vector = \ embedding_x * mixture + embedding_y * (1 - mixture) coordinates = np.ones((N_SPATIAL * 2 + 1), dtype=np.int32) coordinates[0:2] = [image_id_x, image_id_y] output_dict = {} for name in self.window.names: coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = coordinates output_dict[image_data_key] = output_vector yield output_dict
def layer_op(self): """ This function first draws two samples, and interpolates them with self.n_interpolations mixing coefficients. Location coordinates are set to ``np.ones`` for all the vectors. """ total_iter = self.repeat if self.repeat is not None else 1 while total_iter > 0: total_iter = total_iter - 1 if self.repeat is not None else 1 embedding_x = np.random.normal( self.mean, self.stddev, self.window.shapes[self.window.names[0]]) embedding_y = np.random.normal( self.mean, self.stddev, self.window.shapes[self.window.names[0]]) steps = np.linspace(0, 1, self.n_interpolations) for (_, mixture) in enumerate(steps): output_vector = \ embedding_x * mixture + embedding_y * (1 - mixture) coordinates = np.ones((1, N_SPATIAL * 2 + 1), dtype=np.int32) output_dict = {} for name in self.window.names: coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = coordinates output_dict[image_data_key] = output_vector yield output_dict
def layer_op(self): """ This function first draws two samples, and interpolates them with self.n_interpolations mixing coefficients. Location coordinates are set to ``np.ones`` for all the vectors. """ total_iter = self.repeat if self.repeat is not None else 1 while total_iter > 0: total_iter = total_iter - 1 if self.repeat is not None else 1 embedding_x = np.random.normal( self.mean, self.stddev, *self.window.shapes[self.window.names[0]][1:]) embedding_y = np.random.normal( self.mean, self.stddev, *self.window.shapes[self.window.names[0]][1:]) steps = np.linspace(0, 1, self.n_interpolations) for (_, mixture) in enumerate(steps): output_vector = \ embedding_x * mixture + embedding_y * (1 - mixture) coordinates = np.ones((N_SPATIAL * 2 + 1), dtype=np.int32) output_dict = {} for name in self.window.names: coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = coordinates output_dict[image_data_key] = output_vector yield output_dict
def layer_op(self, idx=None): """ This function generates sampling windows to the input buffer image data are from ``self.reader()``. It first completes window shapes based on image data, then resize each image as window and output a dictionary (required by input buffer) :return: output data dictionary ``{'image_modality': data_array}`` """ image_id, data, interp_orders = self.reader(idx=idx) image_shapes = \ dict((name, data[name].shape) for name in self.window.names) # window shapes can be dynamic, here they # are converted to static ones # as now we know the image shapes static_window_shapes = self.window.match_image_shapes(image_shapes) # for resize sampler the coordinates are not used # simply use the spatial dims of the input image output_dict = {} for name in list(data): # prepare output dictionary keys coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = self.dummy_coordinates( image_id, static_window_shapes[name], self.window.n_samples) image_array = [] for _ in range(self.window.n_samples): # prepare image data image_shape = image_shapes[name] window_shape = static_window_shapes[name] if image_shape == window_shape or interp_orders[name][0] < 0: # already in the same shape image_window = data[name] else: zoom_ratio = [ float(p) / float(d) for p, d in zip(window_shape, image_shape) ] image_window = self.zoom_3d( image=data[name], ratio=zoom_ratio, interp_order=interp_orders[name][0]) image_array.append(image_window[np.newaxis, ...]) if len(image_array) > 1: output_dict[image_data_key] = \ np.concatenate(image_array, axis=0) else: output_dict[image_data_key] = image_array[0] # the output image shape should be # [enqueue_batch_size, x, y, z, time, modality] # here enqueue_batch_size = 1 as we only have one sample # per image return output_dict
def layer_op(self, idx=None): """ Generating each image as a window. Overriding this function to create new image sampling strategies. This function should either yield a dictionary (for single window per image):: yield a dictionary { 'image_name': a numpy array, 'image_name_location': (image_id, x_start, y_start, z_start, x_end, y_end, z_end) } or return a dictionary (for multiple windows per image):: return a dictionary: { 'image_name': a numpy array, 'image_name_location': [n_samples, 7] } where the 7-element location vector encode the image_id, starting and ending coordinates of the image window. Following the same notation, the dictionary can be extended to multiple modalities; the keys will be:: {'image_name_1', 'image_name_location_1', 'image_name_2', 'image_name_location_2', ...} :param idx: image_id used to load the image at the i-th row of the input :return: a image data dictionary """ # dataset: from a window generator # assumes self.window.n_samples == 1 # the generator should yield one window at each iteration assert self.window.n_samples == 1, \ 'image_window_dataset.layer_op() requires: ' \ 'windows_per_image should be 1.' image_id, image_data, _ = self.reader(idx=idx) for mod in list(image_data): spatial_shape = image_data[mod].shape[:N_SPATIAL] coords = self.dummy_coordinates(image_id, spatial_shape, 1) image_data[LOCATION_FORMAT.format(mod)] = coords image_data[mod] = image_data[mod][np.newaxis, ...] if self.csv_reader is not None: _, label_data, _ = self.csv_reader(idx=image_id) image_data['label'] = label_data['label'] image_data['label_location'] = image_data['image_location'] return image_data
def layer_op(self, idx=None): """ This function generates sampling windows to the input buffer image data are from ``self.reader()``. It first completes window shapes based on image data, then resize each image as window and output a dictionary (required by input buffer) :return: output data dictionary ``{'image_modality': data_array}`` """ image_id, data, interp_orders = self.reader(idx=idx) image_shapes = \ dict((name, data[name].shape) for name in self.window.names) # window shapes can be dynamic, here they # are converted to static ones # as now we know the image shapes static_window_shapes = self.window.match_image_shapes(image_shapes) # for resize sampler the coordinates are not used # simply use the spatial dims of the input image output_dict = {} for name in list(data): # prepare output dictionary keys coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = self.dummy_coordinates( image_id, static_window_shapes[name], self.window.n_samples) image_array = [] for _ in range(self.window.n_samples): # prepare image data image_shape = image_shapes[name] window_shape = static_window_shapes[name] if image_shape == window_shape or interp_orders[name][0] < 0: # already in the same shape image_window = data[name] else: zoom_ratio = [float(p) / float(d) for p, d in zip(window_shape, image_shape)] image_window = self.zoom_3d( image=data[name], ratio=zoom_ratio, interp_order=interp_orders[name][0]) image_array.append(image_window[np.newaxis, ...]) if len(image_array) > 1: output_dict[image_data_key] = \ np.concatenate(image_array, axis=0) else: output_dict[image_data_key] = image_array[0] # the output image shape should be # [enqueue_batch_size, x, y, z, time, modality] # here enqueue_batch_size = 1 as we only have one sample # per image return output_dict
def layer_op(self, idx=None): """ Generating each image as a window. Overriding this function to create new image sampling strategies. This function should either yield a dictionary (for single window per image):: yield a dictionary { 'image_name': a numpy array, 'image_name_location': (image_id, x_start, y_start, z_start, x_end, y_end, z_end) } or return a dictionary (for multiple windows per image):: return a dictionary: { 'image_name': a numpy array, 'image_name_location': [n_samples, 7] } where the 7-element location vector encode the image_id, starting and ending coordinates of the image window. Following the same notation, the dictionary can be extended to multiple modalities; the keys will be:: {'image_name_1', 'image_name_location_1', 'image_name_2', 'image_name_location_2', ...} :param idx: image_id used to load the image at the i-th row of the input :return: a image data dictionary """ # dataset: from a window generator # assumes self.window.n_samples == 1 # the generator should yield one window at each iteration assert self.window.n_samples == 1, \ 'image_window_dataset.layer_op() requires: ' \ 'windows_per_image should be 1.' image_id, image_data, _ = self.reader(idx=idx) for mod in list(image_data): spatial_shape = image_data[mod].shape[:N_SPATIAL] coords = self.dummy_coordinates(image_id, spatial_shape, 1) image_data[LOCATION_FORMAT.format(mod)] = coords image_data[mod] = image_data[mod][np.newaxis, ...] return image_data
def layer_op(self): while True: image_id, data, _ = self.reader(idx=None, shuffle=False) if not data: break image_shapes = { name: data[name].shape for name in self.window.names } static_window_shapes = self.window.match_image_shapes(image_shapes) coordinates = grid_spatial_coordinates(image_id, image_shapes, static_window_shapes, self.border_size) # extend the number of sampling locations to be divisible # by batch size n_locations = list(coordinates.values())[0].shape[0] extra_locations = 0 if (n_locations % self.batch_size) > 0: extra_locations = \ self.batch_size - n_locations % self.batch_size total_locations = n_locations + extra_locations tf.logging.info('grid sampling image sizes: %s', image_shapes) tf.logging.info('grid sampling window sizes: %s', static_window_shapes) if extra_locations > 0: tf.logging.info( "yielding %s locations from image, " "extended to %s to be divisible by batch size %s", n_locations, total_locations, self.batch_size) else: tf.logging.info("yielding %s locations from image", n_locations) for i in range(total_locations): idx = i % n_locations # initialise output dict output_dict = {} for name in list(data): assert coordinates[name].shape[0] == n_locations, \ "different number of grid samples from the input" \ "images, don't know how to combine them in the queue" x_start, y_start, z_start, x_end, y_end, z_end = \ coordinates[name][idx, 1:] try: image_window = data[name][x_start:x_end, y_start:y_end, z_start:z_end, ...] except ValueError: tf.logging.fatal( "dimensionality miss match in input volumes, " "please specify spatial_window_size with a " "3D tuple and make sure each element is " "smaller than the image length in each dim.") raise # fill output dict with data coord_key = LOCATION_FORMAT.format(name) image_key = name output_dict[coord_key] = coordinates[name][idx:idx + 1, ...] output_dict[image_key] = image_window[np.newaxis, ...] yield output_dict # this is needed because otherwise reading beyond the last element # raises an out-of-range error, and the last grid sample # will not be processed properly. try: for _ in range(1): for name in list(output_dict): output_dict[name] = np.ones_like(output_dict[name]) * -1 yield output_dict except (NameError, KeyError): tf.logging.fatal("No feasible samples from %s", self) raise
def layer_op(self, idx=None): """ This function generates sampling windows to the input buffer image data are from ``self.reader()`` It first completes window shapes based on image data, then finds random coordinates based on the window shapes finally extract window with the coordinates and output a dictionary (required by input buffer). :return: output data dictionary ``{image_modality: data_array, image_location: n_samples * 7}`` """ image_id, data, _ = self.reader(idx=idx, shuffle=True) image_shapes = dict( (name, data[name].shape) for name in self.window.names) static_window_shapes = self.window.match_image_shapes(image_shapes) # find random coordinates based on window and image shapes coordinates = self._spatial_coordinates_generator( subject_id=image_id, data=data, img_sizes=image_shapes, win_sizes=static_window_shapes, n_samples=self.window.n_samples) # initialise output dict, placeholders as dictionary keys # this dictionary will be used in # enqueue operation in the form of: `feed_dict=output_dict` output_dict = {} # fill output dict with data for name in list(data): coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name # fill the coordinates location_array = coordinates[name] output_dict[coordinates_key] = location_array # fill output window array image_array = [] for window_id in range(self.window.n_samples): x_start, y_start, z_start, x_end, y_end, z_end = \ location_array[window_id, 1:] try: image_window = data[name][ x_start:x_end, y_start:y_end, z_start:z_end, ...] image_array.append(image_window[np.newaxis, ...]) except ValueError: tf.logging.fatal( "dimensionality miss match in input volumes, " "please specify spatial_window_size with a " "3D tuple and make sure each element is " "smaller than the image length in each dim. " "Current coords %s", location_array[window_id]) raise if len(image_array) > 1: output_dict[image_data_key] = \ np.concatenate(image_array, axis=0) else: output_dict[image_data_key] = image_array[0] # the output image shape should be # [enqueue_batch_size, x, y, z, time, modality] # where enqueue_batch_size = windows_per_image return output_dict
def layer_op(self, idx=None): """ This function generates sampling windows to the input buffer image data are from ``self.reader()`` It first find the appropriate indices from the data frame in which the centre samples are stored and extract information about the windows to draw on the data. The final dictionary is filled according to the appropriate samples. Different modes on how to take care of unsuitable centres (too big patch size for instance are implemented) :return: output data dictionary ``{image_modality: data_array, image_location: n_samples * 7}`` """ if self.window.n_samples > 1: raise ValueError("\nThe number of windows per image has to be " "1 with a csv_reader") # flag_multi_row = False print("Trying to run csv patch sampler ") if 'sampler' not in self.csv_reader.names: tf.logging.warning('Uniform sampling because no csv sampler ' 'provided') # if 'multi' in self.csv_reader.type_by_task.values(): # flag_multi_row = True try: _, _, subject_id = self.csv_reader(idx) except ValueError: tf.logging.fatal("No available subject") raise assert len(self.available_subjects) >0, "No available subject from " \ "check" # assert len(self.available_subjects) > 0, "No available subject from " \ # "check" print("subject id is ", subject_id) if len(self.available_subjects) > 0: idx_subject_id = np.where( self.available_subjects == subject_id)[0][0] image_id, data, _ = self.reader(idx=idx_subject_id, shuffle=True) subj_indices, csv_data, _ = self.csv_reader(subject_id=subject_id) image_shapes = dict( (name, data[name].shape) for name in self.window.names) static_window_shapes = self.window.match_image_shapes(image_shapes) # Perform the checks relative to the sample choices and create the # corresponding (if needed) padding information to be applied num_idx, num_discard = self.check_csv_sampler_valid( subject_id, image_shapes, static_window_shapes) print(num_idx, num_discard, "available, discarded") if 'sampler' not in self.csv_reader.names: tf.logging.warning('Uniform sampling because no csv sampler ' 'provided') # In the remove configuration, none of the unsuitable sample is used. # Thus if the chosen subject does not have any suitable sample, # another one must be drawn. An error is raised if none of the # subjects has suitable samples if self.mode_correction == 'remove': if num_idx == num_discard: if subject_id in set(self.available_subjects): self.available_subjects.drop([idx_subject_id], inplace=True) print('self.available_subjects', self.available_subjects, idx_subject_id) subject_id = None else: tf.logging.warning( '%s may have already been dropped from list of available subjects' % subject_id) subject_id = None while subject_id is None and len( self.available_subjects) > 0: _, _, subject_id = self.csv_reader(idx) print('list of available subjects is ', self.available_subjects, idx_subject_id) # print("subject id is ", subject_id) # Find the index corresponding to the drawn subject id in # the reader if subject_id in set(self.available_subjects): idx_subject_id = np.where( self.available_subjects == subject_id)[0][0] image_id, data, _ = self.reader(idx=idx_subject_id, shuffle=True) subj_indices, csv_data, _ = self.csv_reader( subject_id=subject_id) if 'sampler' not in self.csv_reader.names: tf.logging.warning( 'Uniform sampling because no csv sampler provided' ) image_shapes = dict((name, data[name].shape) for name in self.window.names) static_window_shapes = self.window.match_image_shapes( image_shapes) num_idx, num_discard = self.check_csv_sampler_valid( subject_id, image_shapes, static_window_shapes) if num_idx == num_discard: if subject_id in set(self.available_subjects): self.available_subjects.drop( idx_subject_id) subject_id = None else: subject_id = None else: subject_id = None if subject_id is None: tf.logging.fatal("None of the subjects has any suitable " "samples. Consider using a different " "alternative to unsuitable samples or " "reducing your patch size") raise ValueError # find csv coordinates and return coordinates (not corrected) and # corresponding csv indices try: print('subject id to try is %s' % subject_id) coordinates, idx = self.csvcenter_spatial_coordinates( subject_id=subject_id, data=data, img_sizes=image_shapes, win_sizes=static_window_shapes, n_samples=self.window.n_samples, mode_correction=self.mode_correction) reject = False if self.mode_correction == 'remove': reject = True # print(idx, "index selected") # initialise output dict, placeholders as dictionary keys # this dictionary will be used in # enqueue operation in the form of: `feed_dict=output_dict` output_dict = {} potential_pad = self.csv_reader.pad_by_task['sampler'][idx][0] potential_pad_corr_end = -1.0 * np.asarray( potential_pad[N_SPATIAL:]) potential_pad_corr = np.concatenate( (potential_pad[:N_SPATIAL], potential_pad_corr_end), 0) # fill output dict with data for name in list(data): coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name # fill the coordinates location_array = coordinates[name] output_dict[coordinates_key] = location_array # fill output window array image_array = [] for window_id in range(self.window.n_samples): x_start, y_start, z_start, x_end, y_end, z_end = \ location_array[window_id, 1:].astype(np.int32) + \ potential_pad_corr.astype(np.int32) # print(location_array[window_id, 1:]+potential_pad_corr) try: image_window = data[name][x_start:x_end, y_start:y_end, z_start:z_end, ...] if np.sum(potential_pad) > 0: new_pad = np.reshape(potential_pad, [2, N_SPATIAL]).T add_pad = np.tile([0, 0], [ len(np.shape(image_window)) - N_SPATIAL, 1 ]) new_pad = np.concatenate((new_pad, add_pad), 0).astype(np.int32) # print(new_pad, "is padding") new_img = np.pad(image_window, pad_width=new_pad, mode='constant', constant_values=0) image_array.append(new_img[np.newaxis, ...]) else: image_array.append(image_window[np.newaxis, ...]) except ValueError: tf.logging.fatal( "dimensionality miss match in input volumes, " "please specify spatial_window_size with a " "3D tuple and make sure each element is " "smaller than the image length in each dim. " "Current coords %s", location_array[window_id]) raise if len(image_array) > 1: output_dict[image_data_key] = \ np.concatenate(image_array, axis=0) else: output_dict[image_data_key] = image_array[0] # fill output dict with csv_data # print("filling output dict") if self.csv_reader is not None: idx_dict = {} list_keys = self.csv_reader.df_by_task.keys() for k in list_keys: if self.csv_reader.type_by_task[k] == 'multi': idx_dict[k] = idx else: for n in range(0, self.window.n_samples): idx_dict[k] = 0 _, csv_data_dict, _ = self.csv_reader( idx=idx_dict, subject_id=subject_id, reject=reject) for name in csv_data_dict.keys(): csv_data_array = [] for n in range(0, self.window.n_samples): csv_data_array.append(csv_data_dict[name]) if len(csv_data_array) == 1: output_dict[name] = np.asarray(csv_data_array[0], dtype=np.float32) else: output_dict[name] = np.concatenate( csv_data_array, 0).astype(dtype=np.float32) for name in csv_data_dict.keys(): output_dict[ name + '_location'] = output_dict['image_location'] return output_dict # the output image shape should be # [enqueue_batch_size, x, y, z, time, modality] # where enqueue_batch_size = windows_per_image except ValueError: tf.logging.fatal("Cannot provide output for %s" % subject_id) raise else: tf.logging.fatal("%s not in available list of subjects" % subject_id) raise ValueError
def layer_op(self): while True: image_id, data, _ = self.reader(idx=None, shuffle=False) if not data: break image_shapes = {name: data[name].shape for name in self.window.names} static_window_shapes = self.window.match_image_shapes(image_shapes) coordinates = grid_spatial_coordinates( image_id, image_shapes, static_window_shapes, self.border_size) # extend the number of sampling locations to be divisible # by batch size n_locations = list(coordinates.values())[0].shape[0] extra_locations = 0 if (n_locations % self.batch_size) > 0: extra_locations = \ self.batch_size - n_locations % self.batch_size total_locations = n_locations + extra_locations tf.logging.info( 'grid sampling image sizes: %s', image_shapes) tf.logging.info( 'grid sampling window sizes: %s', static_window_shapes) if extra_locations > 0: tf.logging.info( "yielding %d locations from image, " "extended to %d to be divisible by batch size %d", n_locations, total_locations, self.batch_size) else: tf.logging.info( "yielding %s locations from image", n_locations) for i in range(total_locations): idx = i % n_locations # initialise output dict output_dict = {} for name in list(data): assert coordinates[name].shape[0] == n_locations, \ "different number of grid samples from the input" \ "images, don't know how to combine them in the queue" x_start, y_start, z_start, x_end, y_end, z_end = \ coordinates[name][idx, 1:] try: image_window = data[name][ x_start:x_end, y_start:y_end, z_start:z_end, ...] except ValueError: tf.logging.fatal( "dimensionality miss match in input volumes, " "please specify spatial_window_size with a " "3D tuple and make sure each element is " "smaller than the image length in each dim.") raise # fill output dict with data coordinates_key = LOCATION_FORMAT.format(name) image_data_key = name output_dict[coordinates_key] = coordinates[name][idx, ...] output_dict[image_data_key] = image_window[...] yield output_dict # refactor? for _ in range(self.queue_length + self.batch_size): for name in list(output_dict): output_dict[name] = np.ones_like(output_dict[name]) * -1 yield output_dict