def _parse_eval_data(self, data): """Generates images and labels that are usable for model evaluation. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ image = tf.cast(data['image'], dtype=tf.float32) boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] image_shape = tf.shape(input=image)[0:2] # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_ops.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, [self._output_height, self._output_width], padded_size=[self._output_height, self._output_width], aug_scale_min=1.0, aug_scale_max=1.0) unpad_image_shape = tf.cast(tf.shape(image), tf.float32) # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) labels = self._build_label(unpad_image_shape=unpad_image_shape, boxes=boxes, classes=classes, image_info=image_info, data=data) if self._bgr_ordering: red, green, blue = tf.unstack(image, num=3, axis=2) image = tf.stack([blue, green, red], axis=2) image = preprocess_ops.normalize_image(image=image, offset=self._channel_means, scale=self._channel_stds) image = tf.cast(image, self._dtype) return image, labels
def _build_inputs(self, image): """Builds classification model inputs for serving.""" # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image, offset=MEAN_RGB, scale=STDDEV_RGB) image, image_info = preprocess_ops.resize_and_crop_image( image, self._input_image_size, padded_size=self._input_image_size, aug_scale_min=1.0, aug_scale_max=1.0) return image, image_info
def _build_inputs(self, image): """Builds detection model inputs for serving.""" model_params = self.params.task.model # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image, offset=MEAN_RGB, scale=STDDEV_RGB) image, image_info = preprocess_ops.resize_and_crop_image( image, self._input_image_size, padded_size=preprocess_ops.compute_padded_size( self._input_image_size, 2**model_params.max_level), aug_scale_min=1.0, aug_scale_max=1.0) anchor_boxes = self._build_anchor_boxes() return image, anchor_boxes, image_info
def test_resize_and_crop_image_rectangluar_case( self, input_height, input_width, desired_height, desired_width, stride, scale_y, scale_x, output_height, output_width): image = tf.convert_to_tensor( np.random.rand(input_height, input_width, 3)) desired_size = (desired_height, desired_width) resized_image, image_info = preprocess_ops.resize_and_crop_image( image, desired_size=desired_size, padded_size=preprocess_ops.compute_padded_size( desired_size, stride)) resized_image_shape = tf.shape(resized_image) self.assertAllEqual([output_height, output_width, 3], resized_image_shape.numpy()) self.assertNDArrayNear( [[input_height, input_width], [desired_height, desired_width], [scale_y, scale_x], [0.0, 0.0]], image_info.numpy(), 1e-5)
def _parse_eval_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, self._output_size) if self._resize_eval_groundtruth: # Resizes eval masks to match input image sizes. In that case, mean IoU # is computed on output_size not the original size of the images. image_scale = image_info[2, :] offset = image_info[3, :] label = preprocess_ops.resize_and_crop_masks( label, image_scale, self._output_size, offset) else: label = tf.image.pad_to_bounding_box( label, 0, 0, self._groundtruth_padded_size[0], self._groundtruth_padded_size[1]) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) valid_mask = tf.not_equal(label, self._ignore_label) labels = { 'masks': label, 'valid_masks': valid_mask, 'image_info': image_info } # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels
def _parse_data(self, data, is_training): image = data['image'] if self._augmenter is not None and is_training: image = self._augmenter.distort(image) image = preprocess_ops.normalize_image(image) category_mask = tf.cast( data['groundtruth_panoptic_category_mask'][:, :, 0], dtype=tf.float32) instance_mask = tf.cast( data['groundtruth_panoptic_instance_mask'][:, :, 0], dtype=tf.float32) # Flips image randomly during training. if self._aug_rand_hflip and is_training: masks = tf.stack([category_mask, instance_mask], axis=0) image, _, masks = preprocess_ops.random_horizontal_flip( image=image, masks=masks) category_mask = masks[0] instance_mask = masks[1] # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, self._output_size, aug_scale_min=self._aug_scale_min if is_training else 1.0, aug_scale_max=self._aug_scale_max if is_training else 1.0) category_mask = self._resize_and_crop_mask(category_mask, image_info, is_training=is_training) instance_mask = self._resize_and_crop_mask(instance_mask, image_info, is_training=is_training) (instance_centers_heatmap, instance_centers_offset, semantic_weights) = self._encode_centers_and_offets( instance_mask=instance_mask[:, :, 0]) # Cast image and labels as self._dtype image = tf.cast(image, dtype=self._dtype) category_mask = tf.cast(category_mask, dtype=self._dtype) instance_mask = tf.cast(instance_mask, dtype=self._dtype) instance_centers_heatmap = tf.cast(instance_centers_heatmap, dtype=self._dtype) instance_centers_offset = tf.cast(instance_centers_offset, dtype=self._dtype) valid_mask = tf.not_equal(category_mask, self._ignore_label) things_mask = tf.not_equal(instance_mask, self._ignore_label) labels = { 'category_mask': category_mask, 'instance_mask': instance_mask, 'instance_centers_heatmap': instance_centers_heatmap, 'instance_centers_offset': instance_centers_offset, 'semantic_weights': semantic_weights, 'valid_mask': valid_mask, 'things_mask': things_mask, 'image_info': image_info } return image, labels
def _parse_train_data(self, data): """Generates images and labels that are usable for model training. We use random flip, random scaling (between 0.6 to 1.3), cropping, and color jittering as data augmentation Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ image = tf.cast(data['image'], dtype=tf.float32) boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] image_shape = tf.shape(input=image)[0:2] if self._aug_rand_hflip: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes) # Image augmentation if not self._odapi_augmentation: # Color and lighting jittering if self._aug_rand_hue: image = tf.image.random_hue(image=image, max_delta=.02) if self._aug_rand_contrast: image = tf.image.random_contrast(image=image, lower=0.8, upper=1.25) if self._aug_rand_saturation: image = tf.image.random_saturation(image=image, lower=0.8, upper=1.25) if self._aug_rand_brightness: image = tf.image.random_brightness(image=image, max_delta=.2) image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_ops.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, [self._output_height, self._output_width], padded_size=[self._output_height, self._output_width], aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) unpad_image_shape = tf.cast(tf.shape(image), tf.float32) # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] boxes = preprocess_ops.resize_and_crop_boxes( boxes, image_scale, image_info[1, :], offset) else: # Color and lighting jittering if self._aug_rand_hue: image = cn_prep_ops.random_adjust_hue(image=image, max_delta=.02) if self._aug_rand_contrast: image = cn_prep_ops.random_adjust_contrast(image=image, min_delta=0.8, max_delta=1.25) if self._aug_rand_saturation: image = cn_prep_ops.random_adjust_saturation(image=image, min_delta=0.8, max_delta=1.25) if self._aug_rand_brightness: image = cn_prep_ops.random_adjust_brightness(image=image, max_delta=.2) sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale( image=image, boxes=boxes, labels=classes, scale_min=self._aug_scale_min, scale_max=self._aug_scale_max) image, unpad_image_shape = cn_prep_ops.resize_to_range( image=sc_image, min_dimension=self._output_width, max_dimension=self._output_width, pad_to_max_dimension=True) preprocessed_shape = tf.cast(tf.shape(image), tf.float32) unpad_image_shape = tf.cast(unpad_image_shape, tf.float32) im_box = tf.stack([ 0.0, 0.0, preprocessed_shape[0] / unpad_image_shape[0], preprocessed_shape[1] / unpad_image_shape[1] ]) realigned_bboxes = box_list_ops.change_coordinate_frame( boxlist=box_list.BoxList(sc_boxes), window=im_box) valid_boxes = box_list_ops.assert_or_prune_invalid_boxes( realigned_bboxes.get()) boxes = box_list_ops.to_absolute_coordinates( boxlist=box_list.BoxList(valid_boxes), height=self._output_height, width=self._output_width).get() image_info = tf.stack([ tf.cast(image_shape, dtype=tf.float32), tf.constant([self._output_height, self._output_width], dtype=tf.float32), tf.cast(tf.shape(sc_image)[0:2] / image_shape, dtype=tf.float32), tf.constant([0., 0.]) ]) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) labels = self._build_label(unpad_image_shape=unpad_image_shape, boxes=boxes, classes=classes, image_info=image_info, data=data) if self._bgr_ordering: red, green, blue = tf.unstack(image, num=3, axis=2) image = tf.stack([blue, green, red], axis=2) image = preprocess_ops.normalize_image(image=image, offset=self._channel_means, scale=self._channel_stds) image = tf.cast(image, self._dtype) return image, labels
def _parse_eval_data(self, data): """Parses data for evaluation. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: A dictionary of {'images': image, 'labels': labels} where image: image tensor that is preproessed to have normalized value and dimension [output_size[0], output_size[1], 3] labels: a dictionary of tensors used for training. The following describes {key: value} pairs in the dictionary. source_ids: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [scaled_height, scaled_width], anchor_boxes: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, 4] representing anchor boxes at each level. """ # Gets original image and its size. image = data['image'] image_shape = tf.shape(image)[0:2] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, padded_size=preprocess_ops.compute_padded_size( self._output_size, 2 ** self._max_level), aug_scale_min=1.0, aug_scale_max=1.0) image_height, image_width, _ = image.get_shape().as_list() # Casts input image to self._dtype image = tf.cast(image, dtype=self._dtype) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape) # Compute Anchor boxes. input_anchor = anchor.build_anchor_generator( min_level=self._min_level, max_level=self._max_level, num_scales=self._num_scales, aspect_ratios=self._aspect_ratios, anchor_size=self._anchor_size) anchor_boxes = input_anchor(image_size=(image_height, image_width)) labels = { 'image_info': image_info, 'anchor_boxes': anchor_boxes, } groundtruths = { 'source_id': data['source_id'], 'height': data['height'], 'width': data['width'], 'num_detections': tf.shape(data['groundtruth_classes'])[0], 'boxes': boxes, 'classes': data['groundtruth_classes'], 'areas': data['groundtruth_area'], 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), } groundtruths['source_id'] = utils.process_source_id( groundtruths['source_id']) groundtruths = utils.pad_groundtruths_to_fixed_size( groundtruths, self._max_num_instances) labels['groundtruths'] = groundtruths return image, labels
def _parse_train_data(self, data): """Parses data for training. Args: data: the decoded tensor dictionary from TfExampleDecoder. Returns: image: image tensor that is preproessed to have normalized value and dimension [output_size[0], output_size[1], 3] labels: a dictionary of tensors used for training. The following describes {key: value} pairs in the dictionary. image_info: a 2D `Tensor` that encodes the information of the image and the applied preprocessing. It is in the format of [[original_height, original_width], [scaled_height, scaled_width], anchor_boxes: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, 4] representing anchor boxes at each level. rpn_score_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location]. The height_l and width_l represent the dimension of class logits at l-th level. rpn_box_targets: ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, anchors_per_location * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. gt_boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled image that is fed to the network. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. gt_classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. gt_masks: groundtrugh masks cropped by the bounding box and resized to a fixed size determined by mask_crop_size. """ classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] if self._include_mask: masks = data['groundtruth_instance_masks'] is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training: num_groundtruths = tf.shape(classes)[0] with tf.control_dependencies([num_groundtruths, is_crowds]): indices = tf.cond( tf.greater(tf.size(is_crowds), 0), lambda: tf.where(tf.logical_not(is_crowds))[:, 0], lambda: tf.cast(tf.range(num_groundtruths), tf.int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) if self._include_mask: masks = tf.gather(masks, indices) # Gets original image and its size. image = data['image'] if self._augmenter is not None: image = self._augmenter.distort(image) image_shape = tf.shape(image)[0:2] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: if self._include_mask: image, boxes, masks = preprocess_ops.random_horizontal_flip( image, boxes, masks) else: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. # Now the coordinates of boxes are w.r.t. the original image. boxes = box_ops.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, padded_size=preprocess_ops.compute_padded_size( self._output_size, 2 ** self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. # Now the coordinates of boxes are w.r.t the scaled image. image_scale = image_info[2, :] offset = image_info[3, :] boxes = preprocess_ops.resize_and_crop_boxes( boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) if self._include_mask: masks = tf.gather(masks, indices) # Transfer boxes to the original image space and do normalization. cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2]) cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2]) cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape) num_masks = tf.shape(masks)[0] masks = tf.image.crop_and_resize( tf.expand_dims(masks, axis=-1), cropped_boxes, box_indices=tf.range(num_masks, dtype=tf.int32), crop_size=[self._mask_crop_size, self._mask_crop_size], method='bilinear') masks = tf.squeeze(masks, axis=-1) # Assigns anchor targets. # Note that after the target assignment, box targets are absolute pixel # offsets w.r.t. the scaled image. input_anchor = anchor.build_anchor_generator( min_level=self._min_level, max_level=self._max_level, num_scales=self._num_scales, aspect_ratios=self._aspect_ratios, anchor_size=self._anchor_size) anchor_boxes = input_anchor(image_size=(image_height, image_width)) anchor_labeler = anchor.RpnAnchorLabeler( self._rpn_match_threshold, self._rpn_unmatched_threshold, self._rpn_batch_size_per_im, self._rpn_fg_fraction) rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors( anchor_boxes, boxes, tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32)) # Casts input image to self._dtype image = tf.cast(image, dtype=self._dtype) # Packs labels for model_fn outputs. labels = { 'anchor_boxes': anchor_boxes, 'image_info': image_info, 'rpn_score_targets': rpn_score_targets, 'rpn_box_targets': rpn_box_targets, 'gt_boxes': preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_instances, -1), 'gt_classes': preprocess_ops.clip_or_pad_to_fixed_size(classes, self._max_num_instances, -1), } if self._include_mask: labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size( masks, self._max_num_instances, -1) return image, labels
def _parse_eval_data(self, data): """Parses data for training and evaluation.""" groundtruths = {} classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] # If not empty, `attributes` is a dict of (name, ground_truth) pairs. # `ground_gruth` of attributes is assumed in shape [N, attribute_size]. # TODO(xianzhi): support parsing attributes weights. attributes = data.get('groundtruth_attributes', {}) # Gets original image and its size. image = data['image'] image_shape = tf.shape(input=image)[0:2] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_ops.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, padded_size=preprocess_ops.compute_padded_size( self._output_size, 2**self._max_level), aug_scale_min=1.0, aug_scale_max=1.0) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) for k, v in attributes.items(): attributes[k] = tf.gather(v, indices) # Assigns anchors. input_anchor = anchor.build_anchor_generator( min_level=self._min_level, max_level=self._max_level, num_scales=self._num_scales, aspect_ratios=self._aspect_ratios, anchor_size=self._anchor_size) anchor_boxes = input_anchor(image_size=(image_height, image_width)) anchor_labeler = anchor.AnchorLabeler(self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, att_targets, cls_weights, box_weights) = anchor_labeler.label_anchors( anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes) # Casts input image to desired data type. image = tf.cast(image, dtype=self._dtype) # Sets up groundtruth data for evaluation. groundtruths = { 'source_id': data['source_id'], 'height': data['height'], 'width': data['width'], 'num_detections': tf.shape(data['groundtruth_classes']), 'image_info': image_info, 'boxes': box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape), 'classes': data['groundtruth_classes'], 'areas': data['groundtruth_area'], 'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32), } if 'groundtruth_attributes' in data: groundtruths['attributes'] = data['groundtruth_attributes'] groundtruths['source_id'] = utils.process_source_id( groundtruths['source_id']) groundtruths = utils.pad_groundtruths_to_fixed_size( groundtruths, self._max_num_instances) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': anchor_boxes, 'cls_weights': cls_weights, 'box_weights': box_weights, 'image_info': image_info, 'groundtruths': groundtruths, } if att_targets: labels['attribute_targets'] = att_targets return image, labels
def _parse_train_data(self, data): """Parses data for training and evaluation.""" classes = data['groundtruth_classes'] boxes = data['groundtruth_boxes'] # If not empty, `attributes` is a dict of (name, ground_truth) pairs. # `ground_gruth` of attributes is assumed in shape [N, attribute_size]. # TODO(xianzhi): support parsing attributes weights. attributes = data.get('groundtruth_attributes', {}) is_crowds = data['groundtruth_is_crowd'] # Skips annotations with `is_crowd` = True. if self._skip_crowd_during_training: num_groundtrtuhs = tf.shape(input=classes)[0] with tf.control_dependencies([num_groundtrtuhs, is_crowds]): indices = tf.cond( pred=tf.greater(tf.size(input=is_crowds), 0), true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0], false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf. int64)) classes = tf.gather(classes, indices) boxes = tf.gather(boxes, indices) for k, v in attributes.items(): attributes[k] = tf.gather(v, indices) # Gets original image. image = data['image'] # Apply autoaug or randaug. if self._augmenter is not None: image, boxes = self._augmenter.distort_with_boxes(image, boxes) image_shape = tf.shape(input=image)[0:2] # Normalizes image with mean and std pixel values. image = preprocess_ops.normalize_image(image) # Flips image randomly during training. if self._aug_rand_hflip: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes) # Converts boxes from normalized coordinates to pixel coordinates. boxes = box_ops.denormalize_boxes(boxes, image_shape) # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, self._output_size, padded_size=preprocess_ops.compute_padded_size( self._output_size, 2**self._max_level), aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) image_height, image_width, _ = image.get_shape().as_list() # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale, image_info[1, :], offset) # Filters out ground truth boxes that are all zeros. indices = box_ops.get_non_empty_box_indices(boxes) boxes = tf.gather(boxes, indices) classes = tf.gather(classes, indices) for k, v in attributes.items(): attributes[k] = tf.gather(v, indices) # Assigns anchors. input_anchor = anchor.build_anchor_generator( min_level=self._min_level, max_level=self._max_level, num_scales=self._num_scales, aspect_ratios=self._aspect_ratios, anchor_size=self._anchor_size) anchor_boxes = input_anchor(image_size=(image_height, image_width)) anchor_labeler = anchor.AnchorLabeler(self._match_threshold, self._unmatched_threshold) (cls_targets, box_targets, att_targets, cls_weights, box_weights) = anchor_labeler.label_anchors( anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes) # Casts input image to desired data type. image = tf.cast(image, dtype=self._dtype) # Packs labels for model_fn outputs. labels = { 'cls_targets': cls_targets, 'box_targets': box_targets, 'anchor_boxes': anchor_boxes, 'cls_weights': cls_weights, 'box_weights': box_weights, 'image_info': image_info, } if att_targets: labels['attribute_targets'] = att_targets return image, labels
def _parse_train_data(self, data): """Parses data for training and evaluation.""" image, label = self._prepare_image_and_label(data) if self._crop_size: label = tf.reshape(label, [data['image/height'], data['image/width'], 1]) # If output_size is specified, resize image, and label to desired # output_size. if self._output_size: image = tf.image.resize(image, self._output_size, method='bilinear') label = tf.image.resize(label, self._output_size, method='nearest') image_mask = tf.concat([image, label], axis=2) image_mask_crop = tf.image.random_crop(image_mask, self._crop_size + [4]) image = image_mask_crop[:, :, :-1] label = tf.reshape(image_mask_crop[:, :, -1], [1] + self._crop_size) # Flips image randomly during training. if self._aug_rand_hflip: image, _, label = preprocess_ops.random_horizontal_flip( image, masks=label) train_image_size = self._crop_size if self._crop_size else self._output_size # Resizes and crops image. image, image_info = preprocess_ops.resize_and_crop_image( image, train_image_size, train_image_size, aug_scale_min=self._aug_scale_min, aug_scale_max=self._aug_scale_max) # Resizes and crops boxes. image_scale = image_info[2, :] offset = image_info[3, :] # Pad label and make sure the padded region assigned to the ignore label. # The label is first offset by +1 and then padded with 0. label += 1 label = tf.expand_dims(label, axis=3) label = preprocess_ops.resize_and_crop_masks(label, image_scale, train_image_size, offset) label -= 1 label = tf.where(tf.equal(label, -1), self._ignore_label * tf.ones_like(label), label) label = tf.squeeze(label, axis=0) valid_mask = tf.not_equal(label, self._ignore_label) labels = { 'masks': label, 'valid_masks': valid_mask, 'image_info': image_info, } # Cast image as self._dtype image = tf.cast(image, dtype=self._dtype) return image, labels