def compute_iou(box1, box2, yxyx=False): """Calculates the intersection of union between box1 and box2. Args: box1: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in x_center, y_center, width, height. box2: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in x_center, y_center, width, height. Returns: iou: a `Tensor` who represents the intersection over union. """ # get box corners with tf.name_scope('iou'): if not yxyx: box1 = box_utils.xcycwh_to_yxyx(box1) box2 = box_utils.xcycwh_to_yxyx(box2) b1mi, b1ma = tf.split(box1, 2, axis=-1) b2mi, b2ma = tf.split(box2, 2, axis=-1) intersect_mins = tf.math.maximum(b1mi, b2mi) intersect_maxes = tf.math.minimum(b1ma, b2ma) intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, tf.zeros_like(intersect_mins)) intersection = tf.reduce_prod( intersect_wh, axis=-1) # intersect_wh[..., 0] * intersect_wh[..., 1] box1_area = tf.math.abs(tf.reduce_prod(b1ma - b1mi, axis=-1)) box2_area = tf.math.abs(tf.reduce_prod(b2ma - b2mi, axis=-1)) union = box1_area + box2_area - intersection iou = intersection / (union + 1e-7 ) # tf.math.divide_no_nan(intersection, union) iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0) return iou
def validation_step(self, inputs, model, metrics=None): #get the data point image, label = inputs # computer detivative and apply gradients y_pred = model(image, training=False) loss, metrics = self.build_losses(y_pred['raw_output'], label) # #custom metrics loss_metrics = {'loss': loss} loss_metrics.update(metrics) label['boxes'] = xcycwh_to_yxyx(label['bbox']) del label['bbox'] coco_model_outputs = { 'detection_boxes': y_pred['bbox'], 'detection_scores': y_pred['confidence'], 'detection_classes': y_pred['classes'], 'num_detections': tf.shape(y_pred['bbox'])[:-1], 'source_id': label['source_id'], } loss_metrics.update( {self.coco_metric.name: (label, coco_model_outputs)}) return loss_metrics
def parse_prediction_path(self, generator, len_mask, scale_xy, inputs): shape = tf.shape(inputs) # reshape the yolo output to (batchsize, width, height, number_anchors, remaining_points) data = tf.reshape(inputs, [shape[0], shape[1], shape[2], len_mask, -1]) # compute the true box output values _, obns, classifics = tf.split(data, [4, 1, -1], axis=-1) scaled = tf.shape(classifics)[-1] objectness = tf.squeeze(obns, axis=-1) box = box_utils.xcycwh_to_yxyx(boxes) # compute the mask of where objects have been located num_dets = tf.reduce_sum(objectness, axis=(1, 2, 3)) mask = tf.cast(tf.ones_like(sub), dtype=tf.bool) mask = tf.reduce_any(mask, axis=(0, -1)) # reduce the dimentions of the predictions to (batch size, max predictions, -1) box = tf.boolean_mask(box, mask, axis=1) classifications = tf.boolean_mask(scaled, mask, axis=1) objectness = tf.squeeze(tf.boolean_mask(objectness, mask, axis=1), axis=-1) objectness, box, classifications = nms_ops.sort_drop( objectness, box, classifications, self._max_boxes) return objectness, box, classifications, num_dets
def translate_boxes(box, classes, translate_x, translate_y): with tf.name_scope('translate_boxs'): box = box_ops.yxyx_to_xcycwh(box) x, y, w, h = tf.split(box, 4, axis=-1) x = x + translate_x y = y + translate_y x_mask_lower = x >= 0 y_mask_lower = y >= 0 x_mask_upper = x < 1 y_mask_upper = y < 1 x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper) y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper) mask = tf.math.logical_and(x_mask, y_mask) x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) box = tf.concat([x, y, w, h], axis=-1) box = box_ops.xcycwh_to_yxyx(box) return box, classes
def pad_filter_to_bbox(image, boxes, classes, target_width, target_height, offset_width, offset_height): with tf.name_scope('resize_crop_filter'): shape = tf.shape(image) if tf.shape(shape)[0] == 4: height = shape[1] width = shape[2] else: # tf.shape(shape)[0] == 3: height = shape[0] width = shape[1] image = tf.image.pad_to_bounding_box(image, offset_height, offset_width, target_height, target_width) x_lower_bound = tf.cast(offset_width / width, tf.float32) y_lower_bound = tf.cast(offset_height / height, tf.float32) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1) x = (x + x_lower_bound) * tf.cast(width / target_width, x.dtype) y = (y + y_lower_bound) * tf.cast(height / target_height, y.dtype) w = w * tf.cast(width / target_width, w.dtype) h = h * tf.cast(height / target_height, h.dtype) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) return image, boxes, classes
def cutmix_1(image_to_crop, boxes1, classes1, image_mask, boxes2, classes2, target_width, target_height, offset_width, offset_height): with tf.name_scope('cutmix'): image, boxes, classes = cut_out(image_mask, boxes2, classes2, target_width, target_height, offset_width, offset_height) image_, boxes_, classes_ = crop_filter_to_bbox(image_to_crop, boxes1, classes1, target_width, target_height, offset_width, offset_height, fix=True) image += image_ boxes = tf.concat([boxes, boxes_], axis=-2) classes = tf.concat([classes, classes_], axis=-1) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) mask = x > 0 x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) return image, boxes, classes
def crop_filter_to_bbox(image, boxes, classes, target_width, target_height, offset_width, offset_height, fix=False): with tf.name_scope('resize_crop_filter'): shape = tf.shape(image) if tf.shape(shape)[0] == 4: height = shape[1] width = shape[2] else: # tf.shape(shape)[0] == 3: height = shape[0] width = shape[1] image = tf.image.crop_to_bounding_box(image, offset_height, offset_width, target_height, target_width) if fix: image = tf.image.pad_to_bounding_box(image, offset_height, offset_width, height, width) x_lower_bound = offset_width / width y_lower_bound = offset_height / height x_upper_bound = (offset_width + target_width) / width y_upper_bound = (offset_height + target_height) / height boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1) x_mask_lower = x > x_lower_bound y_mask_lower = y > y_lower_bound x_mask_upper = x < x_upper_bound y_mask_upper = y < y_upper_bound x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper) y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper) mask = tf.math.logical_and(x_mask, y_mask) x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) if not fix: x = (x - x_lower_bound) * tf.cast(width / target_width, x.dtype) y = (y - y_lower_bound) * tf.cast(height / target_height, y.dtype) w = w * tf.cast(width / target_width, w.dtype) h = h * tf.cast(height / target_height, h.dtype) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) return image, boxes, classes
def compute_diou(box1, box2): """Calculates the distance intersection of union between box1 and box2. Args: box1: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in x_center, y_center, width, height. box2: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in x_center, y_center, width, height. Returns: iou: a `Tensor` who represents the distance intersection over union. """ with tf.name_scope('diou'): # compute center distance dist = box_utils.center_distance(box1[..., 0:2], box2[..., 0:2]) # get box corners box1 = box_utils.xcycwh_to_yxyx(box1) box2 = box_utils.xcycwh_to_yxyx(box2) # compute IOU intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2]) intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4]) intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, tf.zeros_like(intersect_mins)) intersection = intersect_wh[..., 0] * intersect_wh[..., 1] box1_area = tf.math.abs( tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1)) box2_area = tf.math.abs( tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1)) union = box1_area + box2_area - intersection iou = tf.math.divide_no_nan(intersection, union) iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0) # compute max diagnal of the smallest enclosing box c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2]) c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4]) diag_dist = tf.reduce_sum((c_maxes - c_mins)**2, axis=-1) regularization = tf.math.divide_no_nan(dist, diag_dist) diou = iou + regularization return iou, diou
def parse_prediction_path(self, generator, len_mask, scale_xy, inputs): shape = tf.shape(inputs) # reshape the yolo output to (batchsize, width, height, number_anchors, remaining_points) data = tf.reshape(inputs, [shape[0], shape[1], shape[2], len_mask, -1]) centers, anchors = generator(shape[1], shape[2], shape[0], dtype=data.dtype) # compute the true box output values ubox, obns, classifics = tf.split(data, [4, 1, -1], axis=-1) classes = tf.shape(classifics)[-1] obns = tf.squeeze(obns, axis=-1) _, _, boxes = self.parse_yolo_box_predictions( ubox, tf.cast(shape[1], data.dtype), tf.cast(shape[2], data.dtype), anchors, centers, scale_x_y=scale_xy) box = box_utils.xcycwh_to_yxyx(boxes) # computer objectness and generate grid cell mask for where objects are located in the image objectness = tf.expand_dims(tf.math.sigmoid(obns), axis=-1) scaled = tf.math.sigmoid(classifics) * objectness # compute the mask of where objects have been located mask_check = tf.fill( tf.shape(objectness), tf.cast(self._thresh, dtype=objectness.dtype)) sub = tf.math.ceil(tf.nn.relu(objectness - mask_check)) num_dets = tf.reduce_sum(sub, axis=(1, 2, 3)) box = box * sub scaled = scaled * sub objectness = objectness * sub mask = tf.cast(tf.ones_like(sub), dtype=tf.bool) mask = tf.reduce_any(mask, axis=(0, -1)) # reduce the dimentions of the predictions to (batch size, max predictions, -1) box = tf.boolean_mask(box, mask, axis=1) classifications = tf.boolean_mask(scaled, mask, axis=1) objectness = tf.squeeze(tf.boolean_mask(objectness, mask, axis=1), axis=-1) #objectness, box, classifications = nms_ops.sort_drop(objectness, box, classifications, self._max_boxes) box, classifications, objectness = nms_ops.nms( box, classifications, objectness, self._max_boxes, 2.5, self._nms_thresh, sorted=False, one_hot=True) return objectness, box, classifications, num_dets
def _parse_eval_data(self, data): """Generates images and labels that are usable for model training. Args: data: a dict of Tensors produced by the decoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ shape = tf.shape(data['image']) image = data['image'] / 255 boxes = data['groundtruth_boxes'] width = shape[1] height = shape[0] image, boxes = preprocessing_ops.fit_preserve_aspect_ratio( image, boxes, width=width, height=height, target_dim=self._image_w) boxes = box_utils.yxyx_to_xcycwh(boxes) best_anchors = preprocessing_ops.get_best_anchor( boxes, self._anchors, width=self._image_w, height=self._image_h) boxes = pad_max_instances(boxes, self._max_num_instances, 0) classes = pad_max_instances(data['groundtruth_classes'], self._max_num_instances, -1) best_anchors = pad_max_instances(best_anchors, self._max_num_instances, 0) area = pad_max_instances(data['groundtruth_area'], self._max_num_instances, 0) is_crowd = pad_max_instances( tf.cast(data['groundtruth_is_crowd'], tf.int32), self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'area': tf.cast(area, self._dtype), 'is_crowd': is_crowd, 'best_anchors': tf.cast(best_anchors, self._dtype), 'width': width, 'height': height, 'num_detections': tf.shape(data['groundtruth_classes'])[0] } # if self._fixed_size: grid = self._build_grid( labels, self._image_w, batch=False, use_tie_breaker=self._use_tie_breaker) labels.update({'grid_form': grid}) labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox']) return image, labels
def compute_giou(box1, box2): """Calculates the generalized intersection of union between box1 and box2. Args: box1: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in x_center, y_center, width, height. box2: a `Tensor` whose last dimension is 4 representing the coordinates of boxes in x_center, y_center, width, height. Returns: iou: a `Tensor` who represents the generalized intersection over union. """ with tf.name_scope('giou'): # get box corners box1 = box_utils.xcycwh_to_yxyx(box1) box2 = box_utils.xcycwh_to_yxyx(box2) # compute IOU intersect_mins = tf.math.maximum(box1[..., 0:2], box2[..., 0:2]) intersect_maxes = tf.math.minimum(box1[..., 2:4], box2[..., 2:4]) intersect_wh = tf.math.maximum(intersect_maxes - intersect_mins, tf.zeros_like(intersect_mins)) intersection = intersect_wh[..., 0] * intersect_wh[..., 1] box1_area = tf.math.abs( tf.reduce_prod(box1[..., 2:4] - box1[..., 0:2], axis=-1)) box2_area = tf.math.abs( tf.reduce_prod(box2[..., 2:4] - box2[..., 0:2], axis=-1)) union = box1_area + box2_area - intersection iou = tf.math.divide_no_nan(intersection, union) iou = tf.clip_by_value(iou, clip_value_min=0.0, clip_value_max=1.0) # find the smallest box to encompase both box1 and box2 c_mins = tf.math.minimum(box1[..., 0:2], box2[..., 0:2]) c_maxes = tf.math.maximum(box1[..., 2:4], box2[..., 2:4]) c = box_utils.get_area((c_mins, c_maxes), use_tuple=True) # compute giou giou = iou - tf.math.divide_no_nan((c - union), c) return iou, giou
def _postprocess_fn(self, image, label): if self._cutmix: batch_size = tf.shape(image)[0] if batch_size >= 1: boxes = box_utils.xcycwh_to_yxyx(label['bbox']) classes = label['classes'] image, boxes, classes, num_detections = preprocessing_ops.randomized_cutmix_batch( image, boxes, classes) boxes = box_utils.yxyx_to_xcycwh(boxes) label['bbox'] = pad_max_instances( boxes, self._max_num_instances, pad_axis=-2, pad_value=0) label['classes'] = pad_max_instances( classes, self._max_num_instances, pad_axis=-1, pad_value=-1) randscale = self._image_w // self._net_down_scale if not self._fixed_size: do_scale = tf.greater( tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 1 - self._pct_rand) if do_scale: randscale = tf.random.uniform([], minval=10, maxval=21, seed=self._seed, dtype=tf.int32) width = randscale * self._net_down_scale image = tf.image.resize(image, (width, width)) best_anchors = preprocessing_ops.get_best_anchor_batch( label['bbox'], self._anchors, width=self._image_w, height=self._image_h) label['best_anchors'] = pad_max_instances( best_anchors, self._max_num_instances, pad_axis=-2, pad_value=0) grid = self._build_grid( label, width, batch=True, use_tie_breaker=self._use_tie_breaker) label.update({'grid_form': grid}) label['bbox'] = box_utils.xcycwh_to_yxyx(label['bbox']) return image, label
def cut_out(image_full, boxes, classes, target_width, target_height, offset_width, offset_height): shape = tf.shape(image_full) if tf.shape(shape)[0] == 4: width = shape[1] height = shape[2] else: # tf.shape(shape)[0] == 3: width = shape[0] height = shape[1] image_crop = tf.image.crop_to_bounding_box(image_full, offset_height, offset_width, target_height, target_width) + 1 image_crop = tf.ones_like(image_crop) image_crop = tf.image.pad_to_bounding_box(image_crop, offset_height, offset_width, height, width) image_crop = 1 - image_crop x_lower_bound = offset_width / width y_lower_bound = offset_height / height x_upper_bound = (offset_width + target_width) / width y_upper_bound = (offset_height + target_height) / height boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(tf.cast(boxes, x_lower_bound.dtype), 4, axis=-1) x_mask_lower = x > x_lower_bound y_mask_lower = y > y_lower_bound x_mask_upper = x < x_upper_bound y_mask_upper = y < y_upper_bound x_mask = tf.math.logical_and(x_mask_lower, x_mask_upper) y_mask = tf.math.logical_and(y_mask_lower, y_mask_upper) mask = tf.math.logical_not(tf.math.logical_and(x_mask, y_mask)) x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) image_full *= image_crop return image_full, boxes, classes
def cutmix_batch(image, boxes, classes, target_width, target_height, offset_width, offset_height): with tf.name_scope('cutmix_batch'): image_, boxes_, classes_ = cut_out(image, boxes, classes, target_width, target_height, offset_width, offset_height) image__, boxes__, classes__ = crop_filter_to_bbox(image, boxes, classes, target_width, target_height, offset_width, offset_height, fix=True) mix = tf.random.uniform([], minval=0, maxval=1) if mix > 0.5: i_split1, i_split2 = tf.split(image__, 2, axis=0) b_split1, b_split2 = tf.split(boxes__, 2, axis=0) c_split1, c_split2 = tf.split(classes__, 2, axis=0) image__ = tf.concat([i_split2, i_split1], axis=0) boxes__ = tf.concat([b_split2, b_split1], axis=0) classes__ = tf.concat([c_split2, c_split1], axis=0) image = image_ + image__ boxes = tf.concat([boxes_, boxes__], axis=-2) classes = tf.concat([classes_, classes__], axis=-1) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) mask = x > 0 x = shift_zeros(x, mask) # tf.boolean_mask(x, mask) y = shift_zeros(y, mask) # tf.boolean_mask(y, mask) w = shift_zeros(w, mask) # tf.boolean_mask(w, mask) h = shift_zeros(h, mask) # tf.boolean_mask(h, mask) classes = shift_zeros(tf.expand_dims(classes, axis=-1), mask) classes = tf.squeeze(classes, axis=-1) boxes = tf.cast(tf.concat([x, y, w, h], axis=-1), boxes.dtype) boxes = box_ops.xcycwh_to_yxyx(boxes) x = tf.squeeze(x, axis=-1) classes = tf.where(x == 0, -1, classes) num_detections = tf.reduce_sum(tf.cast(x > 0, tf.int32), axis=-1) return image, boxes, classes, num_detections
def fit_preserve_aspect_ratio(image, boxes, width=None, height=None, target_dim=None): if width is None or height is None: shape = tf.shape(data['image']) if tf.shape(shape)[0] == 4: width = shape[1] height = shape[2] else: width = shape[0] height = shape[1] clipper = tf.math.maximum(width, height) if target_dim is None: target_dim = clipper pad_width = clipper - width pad_height = clipper - height image = tf.image.pad_to_bounding_box(image, pad_width // 2, pad_height // 2, clipper, clipper) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) y *= tf.cast(width / clipper, tf.float32) x *= tf.cast(height / clipper, tf.float32) y += tf.cast((pad_width / clipper) / 2, tf.float32) x += tf.cast((pad_height / clipper) / 2, tf.float32) h *= tf.cast(width / clipper, tf.float32) w *= tf.cast(height / clipper, tf.float32) boxes = tf.concat([x, y, w, h], axis=-1) boxes = box_ops.xcycwh_to_yxyx(boxes) image = tf.image.resize(image, (target_dim, target_dim)) return image, boxes
def parse_prediction_path(self, inputs, len_mask): shape = tf.shape(inputs) # reshape the yolo output to (batchsize, width, height, number_anchors, remaining_points) data = tf.reshape(inputs, [shape[0], shape[1], shape[2], len_mask, -1]) # compute the true box output values boxes, objectness, classifics = tf.split(data, [4, 1, -1], axis=-1) #objectness = tf.squeeze(obns, axis=-1) box = box_utils.xcycwh_to_yxyx(boxes) mask = tf.cast(tf.ones_like(objectness), dtype=tf.bool) mask = tf.reduce_any(mask, axis=(0, -1)) # reduce the dimentions of the predictions to (batch size, max predictions, -1) box = tf.boolean_mask(box, mask, axis=1) classifications = tf.boolean_mask(classifics, mask, axis=1) #objectness = tf.boolean_mask(objectness, mask, axis=1) objectness = tf.squeeze(tf.boolean_mask(objectness, mask, axis=1), axis=-1) objectness, box, classifications = nms_ops.sort_drop( objectness, box, classifications, self._max_boxes) return objectness, box, classifications
logging.info('Finished loading pretrained checkpoint from %s', ckpt_dir_or_file) if __name__ == '__main__': import matplotlib.pyplot as plt from yolo.utils.run_utils import prep_gpu prep_gpu() config = exp_cfg.YoloTask(model=exp_cfg.Yolo(base='v3')) task = YoloTask(config) model = task.build_model() model.summary() task.initialize(model) train_data = task.build_inputs(config.train_data) # test_data = task.build_inputs(config.task.validation_data) for l, (i, j) in enumerate(train_data): preds = model(i, training=False) boxes = xcycwh_to_yxyx(j['bbox']) i = tf.image.draw_bounding_boxes(i, boxes, [[1.0, 0.0, 0.0]]) i = tf.image.draw_bounding_boxes(i, preds['bbox'], [[0.0, 1.0, 0.0]]) plt.imshow(i[0].numpy()) plt.show() if l > 2: break
def _parse_train_data(self, data): """Generates images and labels that are usable for model training. Args: data: a dict of Tensors produced by the decoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ image = data['image'] / 255 # / 255 boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] do_blur = tf.random.uniform([], minval=0, maxval=1, seed=self._seed, dtype=tf.float32) if do_blur > 0.9: image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15) elif do_blur > 0.7: image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6) elif do_blur > 0.4: image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3) image = tf.image.rgb_to_hsv(image) i_h, i_s, i_v = tf.split(image, 3, axis=-1) if self._aug_rand_hue: delta = preprocessing_ops.rand_uniform_strong( -0.1, 0.1 ) # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32) i_h = i_h + delta # Hue i_h = tf.clip_by_value(i_h, 0.0, 1.0) if self._aug_rand_saturation: delta = preprocessing_ops.rand_scale( 0.75 ) # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32) i_s = i_s * delta if self._aug_rand_brightness: delta = preprocessing_ops.rand_scale( 0.75 ) # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32) i_v = i_v * delta image = tf.concat([i_h, i_s, i_v], axis=-1) image = tf.image.hsv_to_rgb(image) stddev = tf.random.uniform([], minval=0, maxval=40 / 255, seed=self._seed, dtype=tf.float32) noise = tf.random.normal( shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed) noise = tf.math.minimum(noise, 0.5) noise = tf.math.maximum(noise, 0) image += noise image = tf.clip_by_value(image, 0.0, 1.0) image_shape = tf.shape(image)[:2] if self._random_flip: image, boxes, _ = preprocess_ops.random_horizontal_flip( image, boxes, seed=self._seed) if self._jitter_boxes != 0.0: boxes = box_ops.denormalize_boxes(boxes, image_shape) boxes = box_ops.jitter_boxes(boxes, 0.025) boxes = box_ops.normalize_boxes(boxes, image_shape) if self._jitter_im != 0.0: image, boxes, classes = preprocessing_ops.random_jitter( image, boxes, classes, self._jitter_im, seed=self._seed) # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed) if self._aug_rand_zoom: image, boxes, classes = preprocessing_ops.random_zoom_crop( image, boxes, classes, self._jitter_im) shape = tf.shape(image) width = shape[1] height = shape[0] randscale = self._image_w // self._net_down_scale if self._fixed_size: do_scale = tf.greater( tf.random.uniform([], minval=0, maxval=1, seed=self._seed), 1 - self._pct_rand) if do_scale: randscale = tf.random.uniform([], minval=10, maxval=15, seed=self._seed, dtype=tf.int32) if self._letter_box: image, boxes = preprocessing_ops.fit_preserve_aspect_ratio( image, boxes, width=width, height=height, target_dim=randscale * self._net_down_scale) width = randscale * self._net_down_scale height = randscale * self._net_down_scale shape = tf.shape(image) width = shape[1] height = shape[0] image, boxes, classes = preprocessing_ops.resize_crop_filter( image, boxes, classes, default_width=width, # randscale * self._net_down_scale, default_height=height, # randscale * self._net_down_scale, target_width=self._image_w, target_height=self._image_h, randomize=False) boxes = box_utils.yxyx_to_xcycwh(boxes) image = tf.clip_by_value(image, 0.0, 1.0) num_dets = tf.shape(classes)[0] # padding classes = preprocess_ops.clip_or_pad_to_fixed_size(classes, self._max_num_instances, -1) if self._fixed_size and not self._cutmix: best_anchors = preprocessing_ops.get_best_anchor( boxes, self._anchors, width=self._image_w, height=self._image_h) best_anchors = preprocess_ops.clip_or_pad_to_fixed_size( best_anchors, self._max_num_instances, 0) boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'best_anchors': tf.cast(best_anchors, self._dtype), 'width': width, 'height': height, 'num_detections': num_dets } grid = self._build_grid( labels, self._image_w, use_tie_breaker=self._use_tie_breaker) labels.update({'grid_form': grid}) labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox']) else: boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes, self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'width': width, 'height': height, 'num_detections': num_dets } return image, labels
max_level=params.parser.max_level, min_process_size=params.parser.min_process_size, max_process_size=params.parser.max_process_size, max_num_instances=params.parser.max_num_instances, random_flip=params.parser.random_flip, pct_rand=params.parser.pct_rand, seed=params.parser.seed, anchors=anchors) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read(input_context=None) return dataset if __name__ == '__main__': dataset, dsp = test_yolo_input_task() for l, (i, j) in enumerate(dataset): boxes = box_ops.xcycwh_to_yxyx(j['bbox']) i = tf.image.draw_bounding_boxes(i, boxes, [[1.0, 0.0, 1.0]]) plt.imshow(i[0].numpy()) plt.show() if l > 30: break