Exemplo n.º 1
0
def preprocess_for_test(image, out_shape=IMAGE_SIZE, scope='ssd_preprocessing_test'):
    with tf.name_scope(scope):
        image = tf.to_float(image)
        image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        image = tf_image.resize_image(image, out_shape)
        bbox_img = tf.constant([0., 0., 1., 1.])
        return image, bbox_img
Exemplo n.º 2
0
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         height,
                         width,
                         out_shape,
                         data_format='NHWC',
                         use_whiten=True,
                         scope='textbox_process_train'):
    """Preprocesses the given image for training.
    Args:
        image: A `Tensor` representing an image of arbitrary size.
        labels : A Tensor inlcudes all labels
        bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
        out_shape : Image_size ,default is [300, 300]

    Returns:
        A preprocessed image.
    """

    with tf.name_scope(scope, 'textbox_process_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.image.convert_image_dtype(image, dtype=tf.float32)

        bboxes = tf.minimum(bboxes, 1.0)
        bboxes = tf.maximum(bboxes, 0.0)

        image = tf_image.distorter(image)
        image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad2(
            image, bboxes, height[0], width[0])

        image, labels, bboxes = tf_image.Random_crop(image, labels, bboxes)

        image = tf_image.resize_image(image,
                                      out_shape,
                                      method=tf.image.ResizeMethod.BILINEAR,
                                      align_corners=False)
        image = tf.clip_by_value(image, 0., 1.)
        image, bboxes = tf_image.random_flip_left_right(image, bboxes)
        num = tf.reduce_sum(tf.cast(labels, tf.int32))

        image.set_shape([out_shape[0], out_shape[1], 3])
        tf_image.tf_summary_image(image, bboxes)
        image = image * 255.

        image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        bboxes = tf.minimum(bboxes, 1.0)
        bboxes = tf.maximum(bboxes, 0.0)
        #image = tf.subtract(image, 128.)
        #image = tf.multiply(image, 2.0)
        if data_format == 'NHWC':
            image = image
        else:
            image = tf.transpose(image, perm=(2, 0, 1))

        return image, labels, bboxes, num
Exemplo n.º 3
0
def preprocess_for_train(image, labels, bboxes,
                         out_shape, data_format='NHWC', use_whiten=True,
                         scope='textbox_process_train'):
    with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]):
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        bboxes = tf_image.clip_bboxes(bboxes)
        dst_image = image
        dst_image, labels, bboxes = tf_image.distorted_bounding_box_crop(image, labels, bboxes)
        dst_image = tf_image.distort_color(dst_image)
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
        dst_image = tf_image.resize_image(dst_image, out_shape) 
        num = tf.reduce_sum(tf.cast(labels, tf.int32))
        image = dst_image*255.0
        image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        return image, labels, bboxes, num
def preprocess_for_eval(image, labels, bboxes, cord,
                        out_shape, data_format='NHWC',
                        scope='txt_preprocessing_test'):
    """Preprocess an image for evaluation.

    Args:
      image: A `Tensor` representing an image of arbitrary size.
      labels : A Tensor inlcudes all labels
      bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
      out_shape : Image_size ,default is [300, 300]

    Returns:
        A preprocessed image.
    """

    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        num = 0
        if labels is not None:
            num = tf.reduce_sum(tf.cast(labels, tf.int32))
        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)


        image = tf_image.resize_image(image, out_shape,
                                      method=tf.image.ResizeMethod.BILINEAR,
                                    align_corners=False)
        image.set_shape([out_shape[0], out_shape[1], 3])  
        image = image / 255.

        return image, labels, bboxes, cord, num
Exemplo n.º 5
0
def preprocess_for_eval(image,
                        labels,
                        bboxes,
                        out_shape=EVAL_SIZE,
                        data_format='NHWC',
                        difficults=None,
                        resize=Resize.WARP_RESIZE,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        out_shape: Output shape after pre-processing (if resize != None)
        resize: Resize strategy.

    Returns:
        A preprocessed image.
    """
    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)

        if resize == Resize.NONE:
            # No resizing...
            pass
        elif resize == Resize.CENTRAL_CROP:
            # Central cropping of the image.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.WARP_RESIZE:
            # Warp resize of the image.
            image = tf_image.resize_image(
                image,
                out_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        # Remove difficult boxes.
        if difficults is not None:
            mask = tf.logical_not(tf.cast(difficults, tf.bool))
            labels = tf.boolean_mask(labels, mask)
            bboxes = tf.boolean_mask(bboxes, mask)
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes, bbox_img
Exemplo n.º 6
0
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape,
                         data_format='NHWC',
                         scope='ssd_preprocessing_train'):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.name_scope(scope, 'ssd_preprocessing_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        tf_summary_image(image, bboxes, 'image_with_bboxes')

        # # Remove DontCare labels.
        # labels, bboxes = ssd_common.tf_bboxes_filter_labels(out_label,
        #                                                     labels,
        #                                                     bboxes)

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        # Resize image to output size.
        dst_image = tf_image.resize_image(
            dst_image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)
        tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
            dst_image,
            lambda x, ordering: distort_color(x, ordering, fast_mode),
            num_cases=4)
        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = dst_image * 255.
        image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(image, perm=(2, 0, 1))
        return image, labels, bboxes
Exemplo n.º 7
0
def preprocess_for_train(image, labels, bboxes, xs, ys,
                         out_shape, data_format='NHWC',
                         scope='ssd_preprocessing_train', clip=True, crop_area_range=AREA_RANGE):
    """Preprocesses the given image for training.

    Note that the actual resizing scale is sampled from
        [`resize_size_min`, `resize_size_max`].

    Args:
        image: A `Tensor` representing an image of arbitrary size.
        output_height: The height of the image after preprocessing.
        output_width: The width of the image after preprocessing.
        resize_side_min: The lower bound for the smallest side of the image for
            aspect-preserving resizing.
        resize_side_max: The upper bound for the smallest side of the image for
            aspect-preserving resizing.

    Returns:
        A preprocessed image.
    """
    fast_mode = False
    with tf.compat.v1.name_scope(scope, 'ssd_preprocessing_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        orig_dtype = image.dtype
        print('orig_dtype:', orig_dtype)
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        # tf_summary_image(image, bboxes, 'image_with_bboxes')

        # Distort image and bounding boxes.
        dst_image = image
        dst_image, labels, bboxes,xs, ys, distort_bbox = \
            distorted_bounding_box_crop(image, labels, bboxes,xs, ys,
                                        aspect_ratio_range=CROP_RATIO_RANGE,min_object_covered=MIN_OBJECT_COVERED,area_range=crop_area_range)
        # Resize image to output size.
        dst_image = tf_image.resize_image(dst_image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        #tf_summary_image(dst_image, bboxes, 'image_shape_distorted')

        # Randomly flip the image horizontally.
        #bboxes and xs ys all need to random 

        dst_image, bboxes, xs, ys = tf_image.random_flip_left_right(dst_image, bboxes, xs, ys)

        # Randomly distort the colors. There are 4 ways to do it.
        dst_image = apply_with_random_selector(
                dst_image,
                lambda x, ordering: distort_color(x, ordering, fast_mode),
                num_cases=4)

        tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to VGG input scale.
        image = tf.cast(tf.image.convert_image_dtype(dst_image, orig_dtype, saturate=True), dtype=tf.float32)
        # image = dst_image * 255.
        # image = tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        image = _mean_image_subtraction(image, [_R_MEAN, _G_MEAN, _B_MEAN] )
        # Image data format.
        if data_format == 'NCHW':
            image = tf.transpose(a=image, perm=(2, 0, 1))

        if clip:
            xy_clip_min = tf.constant([0., 0., 0., 0.])
            xy_clip_max = tf.constant([1., 1., 1., 1.])
            bbox_img_max = tf.constant([1., 1., 1. , 1.])
            bbox_img_min = tf.constant([0., 0., 0., 0.])

            bboxes = tf.minimum(bboxes, bbox_img_max)
            bboxes = tf.maximum(bboxes, bbox_img_min)


            xs = tf.maximum(xs, xy_clip_min)
            ys = tf.maximum(ys, xy_clip_min)
            xs = tf.minimum(xs, xy_clip_max)
            ys = tf.minimum(ys, xy_clip_max)

        tf_summary_image(image, bboxes, ' image whitened')
        # image = tf.Print(image, [image[0]], ' image: ', summarize=20)
        # xs = tf.Print(xs, [xs, tf.shape(xs)], '  xs  ', summarize=20)
        # ys = tf.Print(ys, [ys, tf.shape(ys)], '  ys  ', summarize=20)
        # bboxes = tf.Print(bboxes, [bboxes, tf.shape(bboxes)], '  bboxes ',summarize=20)
        return image, labels, bboxes, xs, ys
Exemplo n.º 8
0
def preprocess_for_eval(image,
                        labels,
                        bboxes,
                        height,
                        width,
                        out_shape=EVAL_SIZE,
                        data_format='NHWC',
                        use_whiten=True,
                        difficults=None,
                        resize=Resize.WARP_RESIZE,
                        scope='ssd_preprocessing_train'):
    """Preprocess an image for evaluation.

    Args:
      image: A `Tensor` representing an image of arbitrary size.
      labels : A Tensor inlcudes all labels
      bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
      out_shape : Image_size ,default is [300, 300]

    Returns:
        A preprocessed image.
    """

    with tf.name_scope(scope):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        image = tf.to_float(image)

        num = 0
        if labels is not None:
            num = tf.reduce_sum(tf.cast(labels, tf.int32))
        # Add image rectangle to bboxes.
        bbox_img = tf.constant([[0., 0., 1., 1.]])
        if bboxes is None:
            bboxes = bbox_img
        else:
            bboxes = tf.concat([bbox_img, bboxes], axis=0)

        if resize == Resize.NONE:
            # No resizing...
            pass
        elif resize == Resize.CENTRAL_CROP:
            # Central cropping of the image.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.PAD_AND_RESIZE:
            # Resize image first: find the correct factor...
            shape = tf.shape(image)
            factor = tf.minimum(
                tf.to_double(1.0),
                tf.minimum(tf.to_double(out_shape[0] / shape[0]),
                           tf.to_double(out_shape[1] / shape[1])))
            resize_shape = factor * tf.to_double(shape[0:2])
            resize_shape = tf.cast(tf.floor(resize_shape), tf.int32)

            image = tf_image.resize_image(
                image,
                resize_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)
            # Pad to expected size.
            image, bboxes = tf_image.resize_image_bboxes_with_crop_or_pad(
                image, bboxes, out_shape[0], out_shape[1])
        elif resize == Resize.WARP_RESIZE:
            # Warp resize of the image.
            image = tf_image.resize_image(
                image,
                out_shape,
                method=tf.image.ResizeMethod.BILINEAR,
                align_corners=False)

        # Split back bounding boxes.
        bbox_img = bboxes[0]
        bboxes = bboxes[1:]
        # Remove difficult boxes.
        if difficults is not None:
            mask = tf.logical_not(tf.cast(difficults, tf.bool))
            labels = tf.boolean_mask(labels, mask)
            bboxes = tf.boolean_mask(bboxes, mask)
        image = tf.clip_by_value(image, 0., 255.)
        image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])
        #image = image/255.
        #image = tf.clip_by_value(image, 0., 255.)
        #image = tf.subtract(image, 128.)
        #image = tf.multiply(image, 2.0)

        if data_format == 'NHWC':
            image = image
        else:
            image = tf.transpose(image, perm=(2, 0, 1))

        return image, labels, bboxes, bbox_img, num
Exemplo n.º 9
0
def preprocess_for_train(image, labels, bboxes,
                         out_shape, data_format='NHWC',
                         scope='textbox_process_train'):
    """Preprocesses the given image for training.
    Args:
        image: A `Tensor` representing an image of arbitrary size.
        labels : A Tensor inlcudes all labels
        bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
        out_shape : Image_size ,default is [300, 300]

    Returns:
        A preprocessed image.
    """

    with tf.name_scope(scope, 'textbox_process_train', [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        
        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        num = tf.reduce_sum(tf.cast(labels, tf.int32))
        bboxes = tf.minimum(bboxes, 0.9999)
        bboxes = tf.maximum(bboxes, 0.0001)
    
        # Distort image and bounding boxes.

        image, labels, bboxes, distort_bbox ,num= \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        aspect_ratio_range=CROP_RATIO_RANGE)
        
        # Resize image to output size.
        dst_image = tf_image.resize_image(image, out_shape,
                                          method=tf.image.ResizeMethod.BILINEAR,
                                          align_corners=False)
        '''
        dst_image ,bboxes = \
        tf_image.resize_image_bboxes_with_crop_or_pad(image, bboxes,
                                                    out_shape[0],out_shape[1])
        '''
        # Randomly flip the image horizontally.
        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)

        bbox_image = tf.image.draw_bounding_boxes(tf.expand_dims(dst_image,0), tf.expand_dims(bboxes,0))
        tf.summary.image('image_with_box', bbox_image)
        tf.add_to_collection('EXTRA_LOSSES', num)

        dst_image = tf_image.apply_with_random_selector(
                dst_image,
                lambda x, ordering: tf_image.distort_color_2(x, ordering, True),
                num_cases=4)
        tf_image.tf_summary_image(dst_image, bboxes, 'image_color_distorted')

        # Rescale to normal range
        image = dst_image * 255.
        dst_image.set_shape([out_shape[0], out_shape[1], 3])
        image = tf_image.tf_image_whitened(image, [_R_MEAN, _G_MEAN, _B_MEAN])

        #dst_image = tf.cast(dst_image,tf.float32)
        return image, labels, bboxes,num
def preprocess_for_train(image,
                         labels,
                         bboxes,
                         out_shape,
                         data_format='NHWC',
                         use_whiten=True,
                         scope='textbox_process_train'):
    """Preprocesses the given image for training.
    Args:
        image: A `Tensor` representing an image of arbitrary size.
        labels : A Tensor inlcudes all labels
        bboxes : A Tensor inlcudes cordinates of bbox in shape [N, 4]
        out_shape : Image_size ,default is [300, 300]

    Returns:
        A preprocessed image.
    """

    with tf.name_scope(scope, 'textbox_process_train',
                       [image, labels, bboxes]):
        if image.get_shape().ndims != 3:
            raise ValueError('Input must be of size [height, width, C>0]')

        # Convert to float scaled [0, 1].
        if image.dtype != tf.float32:
            image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        num = tf.reduce_sum(tf.cast(labels, tf.int32))
        bboxes = tf.minimum(bboxes, 1.0)
        bboxes = tf.maximum(bboxes, 0.0)

        #image, boxes = zoom_out(image, boxes)
        # Distort image and bounding boxes.
        object_covered = np.random.randint(5)
        min_object_covered = OBJECT_COVERED[object_covered]
        image, labels, bboxes, distort_bbox ,num= \
            distorted_bounding_box_crop(image, labels, bboxes,
                                        min_object_covered=min_object_covered,
                                        aspect_ratio_range=CROP_RATIO_RANGE)

        # Resize image to output size.

        dst_image = tf_image.resize_image(
            image,
            out_shape,
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=False)

        dst_image, bboxes = tf_image.random_flip_left_right(dst_image, bboxes)
        #dst_image.set_shape([out_shape[0], out_shape[1], 3])
        #bbox_image = tf.image.draw_bounding_boxes(tf.expand_dims(dst_image,0), tf.expand_dims(bboxes,0))
        #tf.summary.image('image_with_box', bbox_image)

        dst_image = tf_image.apply_with_random_selector(
            dst_image,
            lambda x, ordering: tf_image.distort_color_2(x, ordering, False),
            num_cases=4)

        # Rescale to normal range

        image = dst_image * 255
        image.set_shape([out_shape[0], out_shape[1], 3])
        if use_whiten:
            image = tf_image.tf_image_whitened(image,
                                               [_R_MEAN, _G_MEAN, _B_MEAN])
            image = image / 255.0
        bboxes = tf.minimum(bboxes, 1.0)
        bboxes = tf.maximum(bboxes, 0.0)
        return image, labels, bboxes, num