예제 #1
0
def process(img, augment):
    img = bytes_to_file(img)

    try:
        img_A, img_B = read_split_image(img)
        if augment:
            # augment the image by:
            # 1) enlarge the image
            # 2) random crop the image back to its original size
            # NOTE: image A and B needs to be in sync as how much
            # to be shifted
            w, h = img_A.shape
            multiplier = random.uniform(1.00, 1.20)
            # add an eps to prevent cropping issue
            nw = int(multiplier * w) + 1
            nh = int(multiplier * h) + 1
            shift_x = int(np.ceil(np.random.uniform(0.01, nw - w)))
            shift_y = int(np.ceil(np.random.uniform(0.01, nh - h)))
            img_A = shift_and_resize_image(img_A, shift_x, shift_y, nw, nh)
            img_B = shift_and_resize_image(img_B, shift_x, shift_y, nw, nh)

        img_A = normalize_image(img_A)
        img_B = normalize_image(img_B)

        # 2D to 3D matrix
        img_A = np.reshape(img_A, [img_A.shape[0], img_A.shape[1], 1])
        img_B = np.reshape(img_B, [img_B.shape[0], img_B.shape[1], 1])

        return np.concatenate([img_A, img_B], axis=2)
    finally:
        img.close()
예제 #2
0
def dataloader(images, labels, anchors, batch_size=64, augment=True):
    """
    images: [num_images, image_width, image_height, 3]\n
    labels: [num_labels, num_gt, 4(tcx, tcy, tw, th)]\n
    returns: ([batch_size, image_width, image_height, 3],
              [batch_size, num_boxes, 5(conf, tcx, tcy, tw, th)])
    """
    data_keys = np.arange(len(images))
    while True:
        selected_keys = np.random.choice(
            data_keys, replace=False, size=batch_size)

        image_batch = []
        label_batch = []
        for key in selected_keys:
            image = np.array(images[key], dtype=np.float32)
            label = np.array(labels[key], dtype=np.float32)

            # do augmentation
            if augment:
                image, label = random_flip(image, label)
                image, label = random_rotate(image, label)
                image = random_brightness(image)

            image = np.array(image, dtype=np.float32)
            image = normalize_image(image)
            image_batch.append(image)
            label_batch.append(label)

        gt_batch = generate_gt(label_batch, anchors)
        yield (np.array(image_batch, dtype=np.float32),
               np.array(gt_batch, dtype=np.float32))
예제 #3
0
    def _parse_train_data(self, data):
        is_crowds = data['gt_is_crowd']
        classes = data['gt_classes']
        boxes = data['gt_bboxes']
        masks = data['gt_masks']
        image_height = data['height']
        image_width = data['width']

        # Skips annotations with `is_crowd` = True.
        # Todo: Need to understand control_dependeicies and tf.gather
        # if self._skip_crowd_during_training and self._is_training:
        #     num_groundtrtuhs = tf.shape(input=classes)[0]
        #     with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
        #         indices = tf.cond(
        #             pred=tf.greater(tf.size(input=is_crowds), 0),
        #             true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
        #             false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.int64))
        #     classes = tf.gather(classes, indices)
        #     boxes = tf.gather(boxes, indices)
        #     masks = tf.gather(masks, indices)

        # read and normalize the image
        image = data['image']

        # convert image to range [0, 1], faciliate augmentation
        image = normalize_image(image)

        # we already resize the image when creating tfrecord
        # image = tf.image.resize(image, [self._output_size, self._output_size])

        # Ignore the gray image
        image = tf.cond(
            tf.equal(tf.shape(image)[-1], tf.constant(3)),
            true_fn=lambda: image,
            false_fn=lambda: tf.ones([image_width, image_height, 3])
        )

        # resize mask
        masks = tf.expand_dims(masks, axis=-1)
        masks = tf.image.resize(masks, [self._proto_output_size, self._proto_output_size],
                                method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        masks = tf.cast(masks + 0.5, tf.int64)
        masks = tf.squeeze(masks)
        masks = tf.cast(masks, tf.float32)

        # Todo: SSD data augmentation (Photometrics, expand, sample_crop, mirroring)
        # data augmentation randomly
        # image, boxes, masks, classes = augmentation.random_augmentation(image, boxes, masks, self._output_size,
        #                                                                 self._proto_output_size, classes)

        # remember to unnormalized the bbox
        boxes = boxes * self._output_size

        # number of object in training sample
        num_obj = tf.size(classes)

        # resized boxes for proto output size
        boxes_norm = boxes * (self._proto_output_size / self._output_size)

        # matching anchors
        cls_targets, box_targets, max_id_for_anchors, match_positiveness = self._anchor_instance.matching(
            self._match_threshold, self._unmatched_threshold, boxes, classes)

        # Padding classes and mask to fix length [None, num_max_fix_padding, ...]
        # Background --> 0
        num_padding = self._num_max_fix_padding - tf.shape(classes)[0]
        pad_classes = tf.zeros([num_padding], dtype=tf.int64)
        pad_boxes = tf.zeros([num_padding, 4])
        pad_masks = tf.zeros([num_padding, self._proto_output_size, self._proto_output_size])

        if tf.shape(classes)[0] == 1:
            masks = tf.expand_dims(masks, axis=0)

        masks = tf.concat([masks, pad_masks], axis=0)
        classes = tf.concat([classes, pad_classes], axis=0)
        boxes = tf.concat([boxes, pad_boxes], axis=0)
        boxes_norm = tf.concat([boxes_norm, pad_boxes], axis=0)

        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'bbox': boxes,
            'bbox_for_norm': boxes_norm,
            'positiveness': match_positiveness,
            'classes': classes,
            'num_obj': num_obj,
            'mask_target': masks,
            'max_id_for_anchors': max_id_for_anchors
        }
        return image, labels
예제 #4
0
    def _parse_eval_data(self, data):
        is_crowds = data['gt_is_crowd']
        classes = data['gt_classes']
        boxes = data['gt_bboxes']
        masks = data['gt_masks']
        image_height = data['height']
        image_width = data['width']

        # Skips annotations with `is_crowd` = True.
        # TODO: Need to understand control_dependeicies and tf.gather
        if self._skip_crowd_during_training and self._is_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            masks = tf.gather(masks, indices)

        # read and normalize the image
        image = data['image']
        image = normalize_image(image)

        # Ignore the gray image
        image = tf.cond(
            tf.equal(tf.shape(image)[-1], tf.constant(3)),
            true_fn=lambda: image,
            false_fn=lambda: tf.ones([image_width, image_height, 3]))

        # resize mask
        masks = tf.expand_dims(masks, axis=-1)
        # using nearest neighbor to make sure the mask still in binary
        masks = tf.image.resize(
            masks, [self._proto_output_size, self._proto_output_size],
            method=tf.image.ResizeMethod.BILINEAR)
        masks = tf.cast(masks + 0.5, tf.int64)
        masks = tf.squeeze(tf.cast(masks, tf.float32))

        # resize boxes for resized image
        boxes = boxes * self._output_size

        # number of object in training sample
        num_obj = tf.size(classes)

        # resized boxes for proto output size
        boxes_norm = boxes * (self._proto_output_size / self._output_size)

        # matching anchors
        cls_targets, box_targets, max_id_for_anchors, match_positiveness = self._anchor_instance.matching(
            self._match_threshold, self._unmatched_threshold, boxes, classes)

        # Padding classes and mask to fix length [None, num_max_fix_padding, ...]
        num_padding = self._num_max_fix_padding - tf.shape(classes)[0]
        pad_classes = tf.zeros([num_padding], dtype=tf.int64)
        pad_boxes = tf.zeros([num_padding, 4])
        pad_masks = tf.zeros(
            [num_padding, self._proto_output_size, self._proto_output_size])

        if tf.shape(classes)[0] == 1:
            masks = tf.expand_dims(masks, axis=0)

        masks = tf.concat([masks, pad_masks], axis=0)
        classes = tf.concat([classes, pad_classes], axis=0)
        boxes = tf.concat([boxes, pad_boxes], axis=0)
        boxes_norm = tf.concat([boxes_norm, pad_boxes], axis=0)

        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'bbox': boxes,
            'bbox_for_norm': boxes_norm,
            'positiveness': match_positiveness,
            'classes': classes,
            'num_obj': num_obj,
            'mask_target': masks,
            'max_id_for_anchors': max_id_for_anchors
        }
        return image, labels