예제 #1
0
    def preprocess_data(self, example):
        """
        Applies preprocessing step to a single example
        """
        sample = tf.io.parse_single_example(example, image_feature_description)
        image = tf.image.decode_png(sample["image"])
        bbox = tf.cast(tf.io.decode_raw(sample["bbox"], out_type=tf.int64),
                       dtype=tf.float32)

        label = tf.io.decode_raw(sample["label"], out_type=tf.int64)
        bbox = to_xyxy(tf.reshape(bbox, (-1, 4)))

        if self.dynamic_size:
            shape = tf.shape(image)
            self.origin_width = shape[1]
            self.origin_height = shape[0]

        if not self.augment:
            image, bbox, label = self.random_crop(image, bbox, label)
            image = tf.image.resize(image,
                                    (self.origin_height, self.origin_width))
            if self.convert_xywh:
                bbox = convert_to_xywh(bbox)
            return image, bbox, label

        # Data augmentation
        image = augmentation.random_adjust_brightness(image)
        image = augmentation.random_adjust_contrast(image)
        # crop the region contain at least 1 bounding box
        has_smallb = has_small_bbox(bbox)
        if self.random_cropping and tf.logical_or(has_smallb,
                                                  tf.random.uniform(()) > 0.5):
            image, bbox, label = self.random_crop(image, bbox, label)

        bbox = normalize_bbox(bbox, self.origin_width, self.origin_height)
        image, bbox = augmentation.random_flip_horizontal(image, bbox)

        if not has_smallb:
            image = augmentation.random_gaussian_blur(image, 0.5)

        image, image_shape, _ = resize_and_pad_image(image, jitter=None)
        w, h = image_shape[0], image_shape[1]

        bbox = tf.stack([
            bbox[:, 0] * h,
            bbox[:, 1] * w,
            bbox[:, 2] * h,
            bbox[:, 3] * w,
        ],
                        axis=-1)

        if self.convert_xywh:
            bbox = convert_to_xywh(bbox)

        return image, bbox, label
예제 #2
0
def preprocess_data(sample, img_dims=384, pad_flag=True):
    """
    Applies preprocessing step to a single sample.
    Arguments:
      sample: A dict representing a single training sample.
    Returns:
      image: Resized and padded image with random horizontal flipping applied.
      bbox: Bounding boxes with the shape `(num_objects, 4)` where each box is
        of the format `[x, y, width, height]`.
      class_id: A tensor representing the class id of the objects, having
        shape `(num_objects,)`.
    """
    jitter = [sample["l_jitter"], sample["u_jitter"]]

    image = _parse_image(sample["image"])
    if not pad_flag:
        image = tf.image.resize(image, [img_dims, img_dims])

    bbox = tf.cast(sample["objects"]["bbox"], tf.float32)
    class_id = tf.cast(sample["objects"]["label"], dtype=tf.int32)

    image, bbox = random_flip_horizontal(image, bbox)
    if pad_flag:
        image, img_shp, ratio = \
            resize_and_pad_image(
                image, min_side=sample["min_side"],
                max_side=sample["max_side"], jitter=jitter)
    else:
        image = image / 127.5 - 1.0
        img_shp = tf.cast([img_dims, img_dims], tf.float32)

    bbox = swap_xy(bbox)
    bbox = convert_to_xywh(bbox)
    bbox = bbox.numpy()
    return image, tf.constant(bbox), class_id, img_shp
예제 #3
0
def resize_image(sample):
    image = _parse_image(sample["image"])
    image = tf.image.resize(image, [sample["min_side"], sample["min_side"]])
    image = image / 127.5 - 1.0

    bbox = tf.cast(swap_xy(sample["objects"]["bbox"]), tf.float32)
    bbox = convert_to_xywh(bbox)
    class_id = tf.cast(sample["objects"]["label"], dtype=tf.int32)
    return image, bbox, class_id
예제 #4
0
def prepare_data(datasets, annotations, threthoud_pos, threthoud_neg,
                 save_path):
    global COUNT_FACE, COUNT_BACKGROUND
    for i in range(len(datasets)):
        image_dir, num_of_faces, gts = datasets[i]
        gts = convert_to_xywh(ellipse_to_rectangle(num_of_faces, gts))

        for gt in gts:
            img = crop_image(image_dir, gt)
            if len(img) == 0:
                continue
            a, b, c = img.shape
            if a == 0 or b == 0 or c == 0:
                continue
            COUNT_FACE += 1
            path = ''.join(
                [save_path, '1/',
                 str(i), '_',
                 str(COUNT_FACE), '.jpg'])
            cv2.imwrite(path, img)

        for candidate in generate_selective_search(image_dir):
            x, y, w, h = candidate
            ious = []
            img = crop_image(image_dir, candidate)
            if len(img) == 0:
                continue
            for gt in gts:
                ious.append(
                    IOU_calculator(x + w / 2, y + h / 2, w, h,
                                   gt[0] + gt[2] / 2, gt[1] + gt[3] / 2, gt[2],
                                   gt[3]))
            if max(ious) > threthoud_pos:
                COUNT_FACE += 1
                path = ''.join(
                    [save_path, '1/',
                     str(i), '_',
                     str(COUNT_FACE), '.jpg'])
                cv2.imwrite(path, img)
            elif max(ious) < threthoud_neg:
                COUNT_BACKGROUND += 1
                path = ''.join([
                    save_path, '0/',
                    str(i), '_',
                    str(COUNT_BACKGROUND), '.jpg'
                ])
                cv2.imwrite(path, img)
        print(
            f"====>>> {i}/{len(datasets)}: Face: {COUNT_FACE}, Background: {COUNT_BACKGROUND}"
        )
예제 #5
0
 def _encode_sample(self, gt_boxes, cls_ids):
     """Создает боксы и классифициет таргеты для одиночного сэмпла"""
     anchor_boxes = self._anchor_box
     gt_boxes = tf.cast(gt_boxes, dtype=tf.float32)
     gt_boxes = tf.reshape(gt_boxes, ((1, ) + gt_boxes.shape))
     cls_ids = tf.cast(cls_ids, dtype=tf.float32)
     matched_gt_idx, positive_mask, ignore_mask = self._match_anchor_boxes(
         anchor_boxes, gt_boxes)
     gt_boxes = utils.convert_to_xywh(gt_boxes)
     box_target = self._compute_box_target(anchor_boxes, gt_boxes)
     cls_gt = tf.ones((self.num_boxes, 1), dtype=tf.float32)
     cls_bg = tf.cast(tf.equal(cls_gt, 0.), dtype=tf.float32)
     label = tf.concat([box_target, cls_gt], axis=1)
     label = tf.concat([label, cls_bg], axis=1)
     return label
예제 #6
0
    def decode_sample(self, example):
        sample = tf.io.parse_single_example(example, image_feature_description)
        image = tf.image.decode_png(sample["image"])
        bbox = tf.cast(
            tf.io.decode_raw(sample["bbox"], out_type=tf.int64), dtype=tf.float32
        )
        label = tf.io.decode_raw(sample["label"], out_type=tf.int64)
        bbox = tf.reshape(bbox, (-1, 4))

        shape = tf.shape(image)


        self.set_height(shape[0])
        self.set_width(shape[1])

        shape = tf.cast(shape, tf.float32)
        width = shape[1]
        height = shape[0]

        bbox = tf.stack([
            tf.maximum(bbox[:, 0], 0),
            tf.maximum(bbox[:, 1], 0),
            tf.minimum(bbox[:, 2], width),
            tf.minimum(bbox[:, 3], height), 
        ], axis=-1)

        
        if has_small_bbox(width, height, bbox) and tf.random.uniform(()) > 0.5:
            image, bbox, label = self.random_crop(image, bbox, label)
        
        if self.augment:
            image = random_adjust_brightness(image)
            image = random_adjust_contrast(image)

            if tf.random.uniform(()) >= 0.8:
                image = tf.image.random_hue(image, 0.1)

            if tf.random.uniform(()) >= 0.8:
                image = tf.image.random_saturation(image, 0.1, 0.5)

        bbox = normalize_bbox(bbox, width, height)

        image, bbox = random_flip_horizontal(image, bbox, 0.5)
        image, image_shape, _ = resize_and_pad_image(image,
                                                     self.resize,
                                                     self.resize, jitter=None)
        w, h = image_shape[0], image_shape[1]

        bbox = tf.stack([
            bbox[:, 0] * h,
            bbox[:, 1] * w,
            bbox[:, 2] * h,
            bbox[:, 3] * w,
        ], axis=-1)

        if self.iterator and not self.is_iter and tf.random.uniform(()) > 0.5:
            image_, bbox_, label_ = self.iterator.get_next()
            shape = tf.shape(image)
            shape_ = tf.shape(image_)

            # mixup
            if shape_[0] == shape[0] and shape[1] == shape_[1]:
                image = tf.cast(image, tf.float32)
                if tf.size(label_) > 0:
                    bbox = tf.concat([bbox, bbox_], axis=0)
                    label = tf.concat([label, label_], axis=0)
                    r = tf.random.uniform((), 0.35, 0.65)
                    image = image * r + image_ * (1 - r)

            # copy-paste
            # image, bbox, label = self.copy_paste(
            #     image, bbox, label,
            #     image_, bbox_, label_
            # )

        if self.convert and not self.is_iter:
            bbox = convert_to_xywh(bbox)

        return image, bbox, label