def preprocess_for_train(image, labels, bboxes, height, width, out_shape, data_format='NHWC', use_whiten=True, scope='textbox_process_train'): """Preprocesses the given image for training. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.image.convert_image_dtype(image, dtype=tf.float32) bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) image = tf_image.distorter(image) image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad2( image, bboxes, height[0], width[0]) image, labels, bboxes = tf_image.Random_crop(image, labels, bboxes) image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) image = tf.clip_by_value(image, 0., 1.) image, bboxes = tf_image.random_flip_left_right(image, bboxes) num = tf.reduce_sum(tf.cast(labels, tf.int32)) image.set_shape([out_shape[0], out_shape[1], 3]) tf_image.tf_summary_image(image, bboxes) image = image * 255. image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) #image = tf.subtract(image, 128.) #image = tf.multiply(image, 2.0) if data_format == 'NHWC': image = image else: image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, num
def preprocess_for_test(image, out_shape=IMAGE_SIZE, scope='ssd_preprocessing_test'): with tf.name_scope(scope): image = tf.to_float(image) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = tf_image.resize_image(image, out_shape) bbox_img = tf.constant([0., 0., 1., 1.]) return image, bbox_img
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', use_whiten=True, scope='textbox_process_train'): with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) bboxes = tf_image.clip_bboxes(bboxes) dst_image = image dst_image, labels, bboxes = tf_image.distorted_bounding_box_crop(image, labels, bboxes) dst_image = tf_image.distort_color(dst_image) dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) dst_image = tf_image.resize_image(dst_image, out_shape) num = tf.reduce_sum(tf.cast(labels, tf.int32)) image = dst_image*255.0 image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) return image, labels, bboxes, num
def preprocess_for_eval(image, labels, bboxes, height, width, out_shape=EVAL_SIZE, data_format='NHWC', use_whiten=True, difficults=None, resize=Resize.WARP_RESIZE, scope='ssd_preprocessing_train'): """Preprocess an image for evaluation. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') image = tf.to_float(image) num = 0 if labels is not None: num = tf.reduce_sum(tf.cast(labels, tf.int32)) # Add image rectangle to bboxes. bbox_img = tf.constant([[0., 0., 1., 1.]]) if bboxes is None: bboxes = bbox_img else: bboxes = tf.concat([bbox_img, bboxes], axis=0) if resize == Resize.NONE: # No resizing... pass elif resize == Resize.CENTRAL_CROP: # Central cropping of the image. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.PAD_AND_RESIZE: # Resize image first: find the correct factor... shape = tf.shape(image) factor = tf.minimum( tf.to_double(1.0), tf.minimum(tf.to_double(out_shape[0] / shape[0]), tf.to_double(out_shape[1] / shape[1]))) resize_shape = factor * tf.to_double(shape[0:2]) resize_shape = tf.cast(tf.floor(resize_shape), tf.int32) image = tf_image.resize_image( image, resize_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Pad to expected size. image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad( image, bboxes, out_shape[0], out_shape[1]) elif resize == Resize.WARP_RESIZE: # Warp resize of the image. image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) # Split back bounding boxes. bbox_img = bboxes[0] bboxes = bboxes[1:] # Remove difficult boxes. if difficults is not None: mask = tf.logical_not(tf.cast(difficults, tf.bool)) labels = tf.boolean_mask(labels, mask) bboxes = tf.boolean_mask(bboxes, mask) image = tf.clip_by_value(image, 0., 255.) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) #image = image/255. #image = tf.clip_by_value(image, 0., 255.) #image = tf.subtract(image, 128.) #image = tf.multiply(image, 2.0) if data_format == 'NHWC': image = image else: image = tf.transpose(image, perm=(2, 0, 1)) return image, labels, bboxes, bbox_img, num
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', scope='textbox_process_train'): """Preprocesses the given image for training. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) num = tf.reduce_sum(tf.cast(labels, tf.int32)) bboxes = tf.minimum(bboxes, 0.9999) bboxes = tf.maximum(bboxes, 0.0001) # Distort image and bounding boxes. image, labels, bboxes, distort_bbox ,num= \ distorted_bounding_box_crop(image, labels, bboxes, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image(image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) ''' dst_image ,bboxes = \ tf_image.resize_image_bboxes_with_crop_or_pad(image, bboxes, out_shape[0],out_shape[1]) ''' # Randomly flip the image horizontally. dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) bbox_image = tf.image.draw_bounding_boxes(tf.expand_dims(dst_image,0), tf.expand_dims(bboxes,0)) tf.summary.image('image_with_box', bbox_image) tf.add_to_collection('EXTRA_LOSSES', num) dst_image = tf_image.apply_with_random_selector( dst_image, lambda x, ordering: tf_image.distort_color_2(x, ordering, True), num_cases=4) tf_image.tf_summary_image(dst_image, bboxes, 'image_color_distorted') # Rescale to normal range image = dst_image * 255. dst_image.set_shape([out_shape[0], out_shape[1], 3]) image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) #dst_image = tf.cast(dst_image,tf.float32) return image, labels, bboxes,num
def preprocess_for_train(image, labels, bboxes, out_shape, data_format='NHWC', use_whiten=True, scope='textbox_process_train'): """Preprocesses the given image for training. Args: image: A `Tensor` representing an image of arbitrary size. labels : A Tensor inlcudes all labels bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4] out_shape : Image_size ,default is [300, 300] Returns: A preprocessed image. """ with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]): if image.get_shape().ndims != 3: raise ValueError('Input must be of size [height, width, C>0]') # Convert to float scaled [0, 1]. if image.dtype != tf.float32: image = tf.image.convert_image_dtype(image, dtype=tf.float32) num = tf.reduce_sum(tf.cast(labels, tf.int32)) bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) #image, boxes = zoom_out(image, boxes) # Distort image and bounding boxes. object_covered = np.random.randint(5) min_object_covered = OBJECT_COVERED[object_covered] image, labels, bboxes, distort_bbox ,num= \ distorted_bounding_box_crop(image, labels, bboxes, min_object_covered=min_object_covered, aspect_ratio_range=CROP_RATIO_RANGE) # Resize image to output size. dst_image = tf_image.resize_image( image, out_shape, method=tf.image.ResizeMethod.BILINEAR, align_corners=False) dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes) #dst_image.set_shape([out_shape[0], out_shape[1], 3]) #bbox_image = tf.image.draw_bounding_boxes(tf.expand_dims(dst_image,0), tf.expand_dims(bboxes,0)) #tf.summary.image('image_with_box', bbox_image) dst_image = tf_image.apply_with_random_selector( dst_image, lambda x, ordering: tf_image.distort_color_2(x, ordering, False), num_cases=4) # Rescale to normal range image = dst_image * 255 image.set_shape([out_shape[0], out_shape[1], 3]) if use_whiten: image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN]) image = image / 255.0 bboxes = tf.minimum(bboxes, 1.0) bboxes = tf.maximum(bboxes, 0.0) return image, labels, bboxes, num