def distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=0.3, aspect_ratio_range=(0.9, 1.1), area_range=(0.1, 1.0), max_attempts=200, clip_bboxes=True, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. bboxes = tf.clip_by_value(bboxes, 0.0, 1.0) bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bboxes) labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, threshold=BBOX_CROP_OVERLAP, assign_negative=False) return cropped_image, labels, bboxes, distort_bbox
def distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=0.3, aspect_ratio_range=(0.9, 1.1), area_range=(0.1, 1.0), max_attempts=200, clip_bboxes=True, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bboxes) labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, threshold=BBOX_CROP_OVERLAP, assign_negative=False) return cropped_image, labels, bboxes, distort_bbox
def distorted_image(image, height, labels, width, bbox, thread_id, scope=None): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. # Display the bounding box in the first thread only. with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox, height, width]): if not thread_id: image_with_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), bbox) tf.summary.image('image_with_bounding_boxes', image_with_box) bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=bbox, min_object_covered=0.1, aspect_ratio_range=(0.9, 1.1), area_range=(0.1, 1.0), max_attempts=200, use_image_if_no_bounding_boxes=True) if not thread_id: image_with_distorted_box = tf.image.draw_bounding_boxes( tf.expand_dims(image, 0), distort_bbox) tf.summary.image('images_with_distorted_bounding_box', image_with_distorted_box) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. distorted_image = tf.image.resize_images( cropped_image, [height, width], method=tf.image.ResizeMethod.BILINEAR) distorted_image.set_shape([height, width, 3]) if not thread_id: tf.summary.image('cropped_resized_image', tf.expand_dims(distorted_image, 0)) distorted_image = tf.image.random_flip_left_right(distorted_image) # Randomly distort the colors. distorted_image = distort_color(distorted_image, thread_id) if not thread_id: tf.summary.image('final_distorted_image', tf.expand_dims(distorted_image, 0)) # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bbox) print "labels: %s " % (labels) label, bboxes, num = tfe.bboxes_filter_overlap(labels, bboxes, threshold=0.4) return distorted_image, label, bboxes, num
def distorted_bounding_box_crop(image, labels, bboxes, cord, min_object_covered=0.1, aspect_ratio_range=(0.8, 1.2), area_range=(0.1, 1.0), max_attempts=200, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes,cord]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=False) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bboxes) cord = tfe.polybox_resize(distort_bbox, cord) labels, bboxes, cord, num = tfe.bboxes_filter_overlap(labels, bboxes,cord, BBOX_CROP_OVERLAP) return cropped_image, labels, bboxes, cord, distort_bbox,num
def distorted_bounding_box_crop(image, labels, bboxes, xs, ys, min_object_covered, aspect_ratio_range, area_range, max_attempts=200, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 2-D float Tensor of bounding boxes arranged [num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes, xs, ys]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. num_bboxes = tf.shape(bboxes)[0] def has_bboxes(): return bboxes, labels, xs, ys def no_bboxes(): xmin = tf.random_uniform((1, 1), minval=0, maxval=0.9) ymin = tf.random_uniform((1, 1), minval=0, maxval=0.9) w = tf.constant(0.1, dtype=tf.float32) h = w xmax = xmin + w ymax = ymin + h rnd_bboxes = tf.concat([ymin, xmin, ymax, xmax], axis=1) rnd_labels = tf.constant([0], dtype=tf.int32) rnd_xs = tf.concat([xmin, xmax, xmax, xmin], axis=1) rnd_ys = tf.concat([ymin, ymin, ymax, ymax], axis=1) return rnd_bboxes, rnd_labels, rnd_xs, rnd_ys bboxes, labels, xs, ys = tf.cond(num_bboxes > 0, has_bboxes, no_bboxes) bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # Update bounding boxes: resize and filter out. bboxes, xs, ys = tfe.bboxes_resize(distort_bbox, bboxes, xs, ys) labels, bboxes, xs, ys = tfe.bboxes_filter_overlap( labels, bboxes, xs, ys, threshold=BBOX_CROP_OVERLAP, assign_value=LABEL_IGNORE) return cropped_image, labels, bboxes, xs, ys, distort_bbox
def distorted_bounding_box_crop(image, labels, bboxes, xs, ys, min_object_covered, aspect_ratio_range, area_range, max_attempts = 200, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 2-D float Tensor of bounding boxes arranged [num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes, xs, ys]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. num_bboxes = tf.shape(bboxes)[0] def has_bboxes(): return bboxes, labels, xs, ys def no_bboxes(): xmin = tf.random_uniform((1,1), minval = 0, maxval = 0.9) ymin = tf.random_uniform((1,1), minval = 0, maxval = 0.9) w = tf.constant(0.1, dtype = tf.float32) h = w xmax = xmin + w ymax = ymin + h rnd_bboxes = tf.concat([ymin, xmin, ymax, xmax], axis = 1) rnd_labels = tf.constant([config.background_label], dtype = tf.int64) rnd_xs = tf.concat([xmin, xmax, xmax, xmin], axis = 1) rnd_ys = tf.concat([ymin, ymin, ymax, ymax], axis = 1) return rnd_bboxes, rnd_labels, rnd_xs, rnd_ys bboxes, labels, xs, ys = tf.cond(num_bboxes > 0, has_bboxes, no_bboxes) bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # Update bounding boxes: resize and filter out. bboxes, xs, ys = tfe.bboxes_resize(distort_bbox, bboxes, xs, ys) labels, bboxes, xs, ys = tfe.bboxes_filter_overlap(labels, bboxes, xs, ys, threshold=BBOX_CROP_OVERLAP, assign_value = LABEL_IGNORE) return cropped_image, labels, bboxes, xs, ys, distort_bbox
def preprocess_for_train(image, labels, bboxes, xs, ys, out_shape, data_format='NHWC', scope='ssd_preprocessing_train'): """Preprocesses the given image for training. Note that the actual resizing scale is sampled from [`resize_size_min`, `resize_size_max`]. Args: image: A `Tensor` representing an image of arbitrary size. output_height: The height of the image after preprocessing. output_width: The width of the image after preprocessing. resize_side_min: The lower bound for the smallest side of the image for aspect-preserving resizing. resize_side_max: The upper bound for the smallest side of the image for aspect-preserving resizing. Returns: A preprocessed image. """ fast_mode = False with tf.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Randomly flip the image horizontally. if FLIP: image, bboxes, xs, ys = tf_image.random_flip_left_right_bbox( image, bboxes, xs, ys) if ROTATE: # random rotate the image [-10, 10] image, bboxes, xs, ys = tf_rotate_image(image, xs, ys) # samples = tf.multinomial(tf.log([[0.25, 0.25, 0.25, 0.25]]), 1) # note log-prob # scale=elems[tf.cast(samples[0][0], tf.int32)] # if SCALE: # image,bboxes,xs,ys=tf_scale_image(image,bboxes,xs,ys,640) image_shape = tf.cast(tf.shape(image), dtype=tf.float32) image_h, image_w = image_shape[0], image_shape[1] if USE_NM_CROP: mask = tf.greater_equal(labels, 1) valid_bboxes = tf.boolean_mask(bboxes, mask) # FIXME bboxes may is empty # NOTE tf_func must return value must be numpy, or you will madding!!!!!! crop_bbox = tf.py_func(generate_sample, [image_shape, valid_bboxes], tf.float32) else: scales = tf.random_shuffle([0.5, 1.]) scales = tf.Print(scales, [crop]) target_h = tf.cast(640 / scales[0], dtype=tf.float32) target_w = tf.cast(640 / scales[0], dtype=tf.float32) bbox_begin_h_max = tf.maximum(image_h - target_h, 0) bbox_begin_w_max = tf.maximum(image_w - target_w, 0) bbox_begin_h = tf.random_uniform([], minval=0, maxval=bbox_begin_h_max, dtype=tf.float32) bbox_begin_w = tf.random_uniform([], minval=0, maxval=bbox_begin_w_max, dtype=tf.float32) crop_bbox = [bbox_begin_h/image_h, bbox_begin_w/image_w, \ (bbox_begin_h+target_h)/image_h, (bbox_begin_w+target_w)/image_w] image = tf.image.crop_and_resize(tf.expand_dims(image, 0), [crop_bbox], [0], (640, 640), extrapolation_value=128) image = tf.squeeze(image, 0) bboxes, xs, ys = tfe.bboxes_resize(crop_bbox, bboxes, xs, ys) labels, bboxes, xs, ys = tfe.bboxes_filter_overlap( labels, bboxes, xs, ys, threshold=BBOX_CROP_OVERLAP, assign_value=LABEL_IGNORE) if ROTATE_90: rnd = tf.random_uniform((), minval=0, maxval=1) image, bboxes, xs, ys = tf.cond( tf.less(rnd, 0.2), lambda: tf_image.random_rotate90(image, bboxes, xs, ys), lambda: (image, bboxes, xs, ys)) # tf_summary_image(tf.to_float(image), bboxes, 'crop_image') # what is the enpand's meanoing? # expand image if MAX_EXPAND_SCALE > 1: rnd2 = tf.random_uniform((), minval=0, maxval=1) def expand(): scale = tf.random_uniform([], minval=1.0, maxval=MAX_EXPAND_SCALE, dtype=tf.float32) image_shape = tf.cast(tf.shape(image), dtype=tf.float32) image_h, image_w = image_shape[0], image_shape[1] target_h = tf.cast(image_h * scale, dtype=tf.int32) target_w = tf.cast(image_w * scale, dtype=tf.int32) tf.logging.info('expanded') return tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, xs, ys, target_h, target_w) def no_expand(): return image, bboxes, xs, ys image, bboxes, xs, ys = tf.cond(tf.less(rnd2, config.expand_prob), expand, no_expand) # Convert to float scaled [0, 1]. # if image.dtype != tf.float32: # image = tf.image.convert_image_dtype(image, dtype=tf.float32) # tf_summary_image(image, bboxes, 'image_with_bboxes') # Distort image and bounding boxes. dst_image = image # use tf.image.sample_distorted_bounding_box() random crop train patch, but can't control the scale if False: dst_image, labels, bboxes, xs, ys, distort_bbox = \ distorted_bounding_box_crop(image, labels, bboxes, xs, ys, min_object_covered=MIN_OBJECT_COVERED, aspect_ratio_range=CROP_ASPECT_RATIO_RANGE, area_range=AREA_RANGE) # Resize image to output size. dst_image = tf_image.resize_image( dst_image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Filter bboxes using the length of shorter sides if USING_SHORTER_SIDE_FILTERING: xs = xs * out_shape[1] ys = ys * out_shape[0] labels, bboxes, xs, ys = tfe.bboxes_filter_by_shorter_side( labels, bboxes, xs, ys, min_height=MIN_SHORTER_SIDE, max_height=MAX_SHORTER_SIDE, assign_value=LABEL_IGNORE) xs = xs / out_shape[1] ys = ys / out_shape[0] # Randomly distort the colors. There are 4 ways to do it. dst_image = apply_with_random_selector( dst_image / 255.0, lambda x, ordering: distort_color(x, ordering, fast_mode), num_cases=4) dst_image = dst_image * 255. # tf_summary_image(dst_image, bboxes, 'image_color_distorted') # FIXME: change the input value # NOTE: resnet v1 use VGG data process, so we use the same way image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) # Image data format. if data_format == 'NCHW': image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, xs, ys
def distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=0.3, aspect_ratio_range=(0.9, 1.1), area_range=(0.1, 1.0), max_attempts=200, clip_bboxes=True, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): # 高级的随机裁剪 # The bounding box coordinates are floats in `[0.0, 1.0]` relative to the width # and height of the underlying image. # 1-D, 1-D, [1, 1, 4] bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), # [1, n, 4] min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) ''' Returns: A tuple of `Tensor` objects (begin, size, bboxes). begin: A `Tensor`. Has the same type as `image_size`. 1-D, containing `[offset_height, offset_width, 0]`. Provide as input to `tf.slice`. size: A `Tensor`. Has the same type as `image_size`. 1-D, containing `[target_height, target_width, -1]`. Provide as input to `tf.slice`. bboxes: A `Tensor` of type `float32`. 3-D with shape `[1, 1, 4]` containing the distorted bounding box. Provide as input to `tf.image.draw_bounding_boxes`. ''' # [4] distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # <-----设置了尺寸了哈 # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bboxes) # [4], [n, 4] labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, threshold=BBOX_CROP_OVERLAP, # 0.5 assign_negative=False) # 返回随机裁剪的图片,筛选调整后的labels(n,)、bboxes(n, 4),裁剪图片对应原图坐标(4,) return cropped_image, labels, bboxes, distort_bbox
def distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=0.3, aspect_ratio_range=(0.9, 1.1), area_range=(0.1, 1.0), max_attempts=200, clip_bboxes=True, scope=None): """Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. #bbox_begin:和 image_size 具有相同的类型。包含 [offset_height, offset_width, 0] 的一维数组。作为 tf.slice 的输入。 #bbox_size:作为 tf.slice 的输入。 #distort_bbox:作为 tf.image.draw_bounding_boxes 的输入。 bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image),# 是包含 [height, width, channels] 三个值的一维数组。数值类型必须是 uint8,int8,int16,int32,int64 中的一种。 bounding_boxes=tf.expand_dims(bboxes, 0),#是一个 shape 为 [batch, N, 4] 的三维数组,数据类型为float32,第一个batch是因为函数是处理一组图片的,N表示描述与图像相关联的N个边界框的形状,而标注框由4个数字 [y_min, x_min, y_max, x_max] 表示出来。例如:tf.constant([[[0.05, 0.05, 0.9, 0.7], [0.35, 0.47, 0.5, 0.56]]]) 的 shape 为 [1,2,4] 表示一张图片中的两个标注框; min_object_covered=min_object_covered,#图像的裁剪区域必须包含所提供的任意一个边界框的至少 min_object_covered 的内容。该参数的值应为非负数,当为0时,裁剪区域不必与提供的任何边界框有重叠部分。 aspect_ratio_range=aspect_ratio_range,#图像的裁剪区域的宽高比(宽高比=宽/高) 必须在这个范围内。 area_range=area_range,#默认为 [0.05, 1] 。图像的裁剪区域必须包含这个范围内的图像的一部分。 max_attempts=max_attempts,#尝试生成图像指定约束的裁剪区域的次数。经过 max_attempts 次失败后,将返回整个图像。 use_image_if_no_bounding_boxes=True)#如果没有提供边框,则用它来控制行为。如果为True,则假设有一个覆盖整个输入的隐含边界框。如果为False,就报错。 distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bboxes) labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, threshold=BBOX_CROP_OVERLAP, assign_negative=False) return cropped_image, labels, bboxes, distort_bbox
def distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=0.3, aspect_ratio_range=(0.9, 1.1), area_range=(0.1, 1.0), max_attempts=200, clip_bboxes=True, scope=None): """ #注意这个函数的解析表明的是使用其中一个随机扭曲的bbox生成cropped_image Generates cropped_image using a one of the bboxes randomly distorted. See `tf.image.sample_distorted_bounding_box` for more documentation. Args: #观察这些参数可以发现,这个函数实际是对一张图片的多个gt bbox中随机选择一个(暂时理解为ground truth bbox)进行随机扭曲,返回cropped_image和bbox等等 image: 3-D Tensor of image (it will be converted to floats in [0, 1]). bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords] where each coordinate is [0, 1) and the coordinates are arranged as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole image. min_object_covered: An optional `float`. Defaults to `0.1`. The cropped area of the image must contain at least this fraction of any bounding box supplied. aspect_ratio_range: An optional list of `floats`. The cropped area of the image must have an aspect ratio = width / height within this range. area_range: An optional list of `floats`. The cropped area of the image must contain a fraction of the supplied image within in this range. max_attempts: An optional `int`. Number of attempts at generating a cropped region of the image of the specified constraints. After `max_attempts` failures, return the entire image. scope: Optional scope for name_scope. #注意返回值是一个tuple,分别是cropped_image和distorted bbox,这个我们主要参考tf.image.sample_distorted_bounding_box的实现 Returns: A tuple, a 3-D Tensor cropped_image and the distorted bbox """ with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bboxes]): # Each bounding box has shape [1, num_boxes, box coords] and # the coordinates are ordered [ymin, xmin, ymax, xmax]. # 为什么要用sample_distorted_bounding_box的原因在于可以随机的截取图片中一个块,减小需要关注的物体大小对图像识别算法的影响 # tf.image.sample_distorted_bounding_box的讲解主要参考:https://blog.csdn.net/tz_zs/article/details/77920116 # 需要注意的是,返回值的类型为: # begin: 和 image_size 具有相同的类型。包含 [offset_height, offset_width, 0] 的一维数组。作为 tf.slice 的输入。 # size: 和 image_size 具有相同的类型。包含 [target_height, target_width, -1] 的一维数组。作为 tf.slice 的输入。 # 根据begin,size两个参数我们可以tf.slice出来我们所需要的裁剪出来的小图,而bboxes主要用于在图像上面的显示bbox工作!!! # 那么为什么bboxes的shape为[1,1,4]呢?是不是因为tf.image.sample_distorted_bounding_box函数仅裁出来了一个bbox呢? # bboxes:shape为 [1, 1, 4] 的三维矩阵,数据类型为float32,表示随机变形后的边界框。作为 tf.image.draw_bounding_boxes 的输入。 bbox_begin, bbox_size, distort_bbox = tf.image.sample_distorted_bounding_box( tf.shape(image), bounding_boxes=tf.expand_dims(bboxes, 0), min_object_covered=min_object_covered, aspect_ratio_range=aspect_ratio_range, area_range=area_range, max_attempts=max_attempts, use_image_if_no_bounding_boxes=True) distort_bbox = distort_bbox[0, 0] # Crop the image to the specified bounding box. # 注意tf.slice中的begin参数和size参数,begin.shape[-1]=0,size.shape[-1]=-1,可以由tf.image.sample_distorted_bouning_box中确定, # 然后我们就可以从图像中裁剪我们所期望的小图! cropped_image = tf.slice(image, bbox_begin, bbox_size) # Restore the shape since the dynamic slice loses 3rd dimension. cropped_image.set_shape([None, None, 3]) # Update bounding boxes: resize and filter out. bboxes = tfe.bboxes_resize(distort_bbox, bboxes) labels, bboxes = tfe.bboxes_filter_overlap(labels, bboxes, threshold=BBOX_CROP_OVERLAP, assign_negative=False) #注意我们的返回值cropped_image的shape为[None,None,3],不用担心,在后面preprocess_for_train中我们会怎样呢,对了没错, #resize到ssd所需要的大小,所以不用担心哈! return cropped_image, labels, bboxes, distort_bbox