def _process_mask(mask, ignore_label, image_info): mask = tf.cast(mask, dtype=tf.float32) mask = tf.reshape(mask, shape=[1, data['height'], data['width'], 1]) mask += 1 if self._segmentation_resize_eval_groundtruth: # Resizes eval masks to match input image sizes. In that case, mean IoU # is computed on output_size not the original size of the images. image_scale = image_info[2, :] offset = image_info[3, :] mask = preprocess_ops.resize_and_crop_masks( mask, image_scale, self._output_size, offset) else: mask = tf.image.pad_to_bounding_box( mask, 0, 0, self._segmentation_groundtruth_padded_size[0], self._segmentation_groundtruth_padded_size[1]) mask -= 1 # Assign ignore label to the padded region. mask = tf.where( tf.equal(mask, -1), ignore_label * tf.ones_like(mask), mask) mask = tf.squeeze(mask, axis=0) return mask
def _parse_train_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) if self._train_on_crops: if data['image/height'] < self._output_size[0] or data[ 'image/width'] < self._output_size[1]: raise ValueError( 'Image size has to be larger than crop size (output_size)') label = tf.reshape(label, [data['image/height'], data['image/width'], 1]) image_mask = tf.concat([image, label], axis=2) image_mask_crop = tf.image.random_crop(image_mask, self._output_size + [4]) image = image_mask_crop[:, :, :-1] label = tf.reshape(image_mask_crop[:, :, -1], [1] + self._output_size) # Flips image randomly during training. if self._aug_rand_hflip: image, _, label = preprocess_ops.random_horizontal_flip( image, masks=label) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] # Pad label and make sure the padded region assigned to the ignore label. # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) label = preprocess_ops.resize_and_crop_masks(label, image_scale, self._output_size, offset) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) valid_mask = tf.not_equal(label, self._ignore_label) labels = { 'masks': label, 'valid_masks': valid_mask, 'image_info': image_info, } # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels
def _parse_eval_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, self._output_size, preserve_aspect_ratio=self._preserve_aspect_ratio) if self._resize_eval_groundtruth: # Resizes eval masks to match input image sizes. In that case, mean IoU # is computed on output_size not the original size of the images. image_scale = image_info[2, :] offset = image_info[3, :] label = preprocess_ops.resize_and_crop_masks( label, image_scale, self._output_size, offset) else: label = tf.image.pad_to_bounding_box( label, 0, 0, self._groundtruth_padded_size[0], self._groundtruth_padded_size[1]) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) valid_mask = tf.not_equal(label, self._ignore_label) labels = { 'masks': label, 'valid_masks': valid_mask, 'image_info': image_info } # Normalizes image with mean and std pixel values. # Must be done after augmenter since certain ops rely on uint8 image = preprocess_ops.normalize_image(image, offset=MEAN_RGB, scale=STDDEV_RGB) # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels
def _parse_train_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) # Flips image randomly during training. if self._aug_rand_hflip: image, label = preprocess_ops.random_horizontal_flip(image, masks=label) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] # Pad label and make sure the padded region assigned to the ignore label. # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) label = preprocess_ops.resize_and_crop_masks(label, image_scale, self._output_size, offset) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) valid_mask = tf.not_equal(label, self._ignore_label) labels = { 'masks': label, 'valid_masks': valid_mask, 'image_info': image_info, } # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels
def _parse_eval_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) if self._resize_eval: # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, self._output_size) # Resizes and crops mask. image_scale = image_info[2, :] offset = image_info[3, :] label = preprocess_ops.resize_and_crop_masks( label, image_scale, self._output_size, offset) else: # Pads image and mask to output size. image = tf.image.pad_to_bounding_box(image, 0, 0, self._output_size[0], self._output_size[1]) label = tf.image.pad_to_bounding_box(label, 0, 0, self._output_size[0], self._output_size[1]) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) valid_mask = tf.not_equal(label, self._ignore_label) labels = {'masks': label, 'valid_masks': valid_mask} # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels
def _parse_eval_data(self, data): """Parses data for evaluation. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: A dictionary of {'images': image, 'labels': labels} where image: image tensor that is preproessed to have normalized value and dimension [output_size[0], output_size[1], 3] labels: a dictionary of tensors used for training. The following describes {key: value} pairs in the dictionary. source_ids: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [scaled_height, scaled_width]], anchor_boxes: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, 4] representing anchor boxes at each level. """ segmentation_mask = tf.cast(data['groundtruth_segmentation_mask'], tf.float32) segmentation_mask = tf.reshape( segmentation_mask, shape=[1, data['height'], data['width'], 1]) segmentation_mask += 1 image, labels = super(Parser, self)._parse_eval_data(data) if self._segmentation_resize_eval_groundtruth: # Resizes eval masks to match input image sizes. In that case, mean IoU # is computed on output_size not the original size of the images. image_info = labels['image_info'] image_scale = image_info[2, :] offset = image_info[3, :] segmentation_mask = preprocess_ops.resize_and_crop_masks( segmentation_mask, image_scale, self._output_size, offset) else: segmentation_mask = tf.image.pad_to_bounding_box( segmentation_mask, 0, 0, self._segmentation_groundtruth_padded_size[0], self._segmentation_groundtruth_padded_size[1]) segmentation_mask -= 1 # Assign ignore label to the padded region. segmentation_mask = tf.where( tf.equal(segmentation_mask, -1), self._segmentation_ignore_label * tf.ones_like(segmentation_mask), segmentation_mask) segmentation_mask = tf.squeeze(segmentation_mask, axis=0) segmentation_valid_mask = tf.not_equal(segmentation_mask, self._segmentation_ignore_label) labels['groundtruths'].update({ 'gt_segmentation_mask': segmentation_mask, 'gt_segmentation_valid_mask': segmentation_valid_mask }) return image, labels
def _parse_train_data(self, data): """Parses data for training. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: image: image tensor that is preproessed to have normalized value and dimension [output_size[0], output_size[1], 3] labels: a dictionary of tensors used for training. The following describes {key: value} pairs in the dictionary. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [scaled_height, scaled_width]], anchor_boxes: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, 4] representing anchor boxes at each level. rpn_score_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location]. The height_l and width_l represent the dimension of class logits at l-th level. rpn_box_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. gt_boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled image that is fed to the network. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. gt_classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. gt_masks: Groundtruth masks cropped by the bounding box and resized to a fixed size determined by mask_crop_size. gt_segmentation_mask: Groundtruth mask for segmentation head, this is resized to a fixed size determined by output_size. gt_segmentation_valid_mask: Binary mask that marks the pixels that are supposed to be used in computing the segmentation loss while training. """ segmentation_mask = data['groundtruth_segmentation_mask'] # Flips image randomly during training. if self.aug_rand_hflip: masks = data['groundtruth_instance_masks'] image_mask = tf.concat([data['image'], segmentation_mask], axis=2) image_mask, boxes, masks = preprocess_ops.random_horizontal_flip( image_mask, data['groundtruth_boxes'], masks) segmentation_mask = image_mask[:, :, -1:] image = image_mask[:, :, :-1] data['image'] = image data['boxes'] = boxes data['masks'] = masks image, labels = super(Parser, self)._parse_train_data(data) image_info = labels['image_info'] image_scale = image_info[2, :] offset = image_info[3, :] segmentation_mask = tf.reshape( segmentation_mask, shape=[1, data['height'], data['width']]) segmentation_mask = tf.cast(segmentation_mask, tf.float32) # Pad label and make sure the padded region assigned to the ignore label. # The label is first offset by +1 and then padded with 0. segmentation_mask += 1 segmentation_mask = tf.expand_dims(segmentation_mask, axis=3) segmentation_mask = preprocess_ops.resize_and_crop_masks( segmentation_mask, image_scale, self._output_size, offset) segmentation_mask -= 1 segmentation_mask = tf.where( tf.equal(segmentation_mask, -1), self._segmentation_ignore_label * tf.ones_like(segmentation_mask), segmentation_mask) segmentation_mask = tf.squeeze(segmentation_mask, axis=0) segmentation_valid_mask = tf.not_equal(segmentation_mask, self._segmentation_ignore_label) labels.update({ 'gt_segmentation_mask': segmentation_mask, 'gt_segmentation_valid_mask': segmentation_valid_mask }) return image, labels
def _parse_train_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) if self._crop_size: label = tf.reshape(label, [data['image/height'], data['image/width'], 1]) # If output_size is specified, resize image, and label to desired # output_size. if self._output_size: image = tf.image.resize(image, self._output_size, method='bilinear') label = tf.image.resize(label, self._output_size, method='nearest') image_mask = tf.concat([image, label], axis=2) image_mask_crop = tf.image.random_crop(image_mask, self._crop_size + [4]) image = image_mask_crop[:, :, :-1] label = tf.reshape(image_mask_crop[:, :, -1], [1] + self._crop_size) # Flips image randomly during training. if self._aug_rand_hflip: image, _, label = preprocess_ops.random_horizontal_flip( image, masks=label) train_image_size = self._crop_size if self._crop_size else self._output_size # Rotates image randomly during training if self._rotate_min != 0.0 and \ self._rotate_max != 0.0 and \ self._rotate_min < self._rotate_max: image, label = preprocess_ops.random_rotation( image, masks=label, rotate_max=self._rotate_max, rotate_min=self._rotate_min, ignore_label=self._ignore_label) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, train_image_size, train_image_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max, preserve_aspect_ratio=self._preserve_aspect_ratio) # Modify brightness randomly during training if self._bright_min != 1.0 and \ self._bright_max != 1.0 and \ self._bright_min < self._bright_max: image = preprocess_ops.random_brightness( image, bright_min=self._bright_min, bright_max=self._bright_max) # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] # Pad label and make sure the padded region assigned to the ignore label. # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) label = preprocess_ops.resize_and_crop_masks(label, image_scale, train_image_size, offset) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) # Apply randaug if self._augmenter is not None: image, label = self._augmenter.distort_image_and_mask( image, label, self._ignore_label) valid_mask = tf.not_equal(label, self._ignore_label) labels = { 'masks': label, 'valid_masks': valid_mask, 'image_info': image_info, } # Normalizes image with mean and std pixel values. # Must be done after augmenter since certain ops rely on uint8 image = preprocess_ops.normalize_image(image, offset=MEAN_RGB, scale=STDDEV_RGB) # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels