def preprocess_data(self, example): """ Applies preprocessing step to a single example """ sample = tf.io.parse_single_example(example, image_feature_description) image = tf.image.decode_png(sample["image"]) bbox = tf.cast(tf.io.decode_raw(sample["bbox"], out_type=tf.int64), dtype=tf.float32) label = tf.io.decode_raw(sample["label"], out_type=tf.int64) bbox = to_xyxy(tf.reshape(bbox, (-1, 4))) if self.dynamic_size: shape = tf.shape(image) self.origin_width = shape[1] self.origin_height = shape[0] if not self.augment: image, bbox, label = self.random_crop(image, bbox, label) image = tf.image.resize(image, (self.origin_height, self.origin_width)) if self.convert_xywh: bbox = convert_to_xywh(bbox) return image, bbox, label # Data augmentation image = augmentation.random_adjust_brightness(image) image = augmentation.random_adjust_contrast(image) # crop the region contain at least 1 bounding box has_smallb = has_small_bbox(bbox) if self.random_cropping and tf.logical_or(has_smallb, tf.random.uniform(()) > 0.5): image, bbox, label = self.random_crop(image, bbox, label) bbox = normalize_bbox(bbox, self.origin_width, self.origin_height) image, bbox = augmentation.random_flip_horizontal(image, bbox) if not has_smallb: image = augmentation.random_gaussian_blur(image, 0.5) image, image_shape, _ = resize_and_pad_image(image, jitter=None) w, h = image_shape[0], image_shape[1] bbox = tf.stack([ bbox[:, 0] * h, bbox[:, 1] * w, bbox[:, 2] * h, bbox[:, 3] * w, ], axis=-1) if self.convert_xywh: bbox = convert_to_xywh(bbox) return image, bbox, label
def preprocess_data(sample, img_dims=384, pad_flag=True): """ Applies preprocessing step to a single sample. Arguments: sample: A dict representing a single training sample. Returns: image: Resized and padded image with random horizontal flipping applied. bbox: Bounding boxes with the shape `(num_objects, 4)` where each box is of the format `[x, y, width, height]`. class_id: A tensor representing the class id of the objects, having shape `(num_objects,)`. """ jitter = [sample["l_jitter"], sample["u_jitter"]] image = _parse_image(sample["image"]) if not pad_flag: image = tf.image.resize(image, [img_dims, img_dims]) bbox = tf.cast(sample["objects"]["bbox"], tf.float32) class_id = tf.cast(sample["objects"]["label"], dtype=tf.int32) image, bbox = random_flip_horizontal(image, bbox) if pad_flag: image, img_shp, ratio = \ resize_and_pad_image( image, min_side=sample["min_side"], max_side=sample["max_side"], jitter=jitter) else: image = image / 127.5 - 1.0 img_shp = tf.cast([img_dims, img_dims], tf.float32) bbox = swap_xy(bbox) bbox = convert_to_xywh(bbox) bbox = bbox.numpy() return image, tf.constant(bbox), class_id, img_shp
def resize_image(sample): image = _parse_image(sample["image"]) image = tf.image.resize(image, [sample["min_side"], sample["min_side"]]) image = image / 127.5 - 1.0 bbox = tf.cast(swap_xy(sample["objects"]["bbox"]), tf.float32) bbox = convert_to_xywh(bbox) class_id = tf.cast(sample["objects"]["label"], dtype=tf.int32) return image, bbox, class_id
def prepare_data(datasets, annotations, threthoud_pos, threthoud_neg, save_path): global COUNT_FACE, COUNT_BACKGROUND for i in range(len(datasets)): image_dir, num_of_faces, gts = datasets[i] gts = convert_to_xywh(ellipse_to_rectangle(num_of_faces, gts)) for gt in gts: img = crop_image(image_dir, gt) if len(img) == 0: continue a, b, c = img.shape if a == 0 or b == 0 or c == 0: continue COUNT_FACE += 1 path = ''.join( [save_path, '1/', str(i), '_', str(COUNT_FACE), '.jpg']) cv2.imwrite(path, img) for candidate in generate_selective_search(image_dir): x, y, w, h = candidate ious = [] img = crop_image(image_dir, candidate) if len(img) == 0: continue for gt in gts: ious.append( IOU_calculator(x + w / 2, y + h / 2, w, h, gt[0] + gt[2] / 2, gt[1] + gt[3] / 2, gt[2], gt[3])) if max(ious) > threthoud_pos: COUNT_FACE += 1 path = ''.join( [save_path, '1/', str(i), '_', str(COUNT_FACE), '.jpg']) cv2.imwrite(path, img) elif max(ious) < threthoud_neg: COUNT_BACKGROUND += 1 path = ''.join([ save_path, '0/', str(i), '_', str(COUNT_BACKGROUND), '.jpg' ]) cv2.imwrite(path, img) print( f"====>>> {i}/{len(datasets)}: Face: {COUNT_FACE}, Background: {COUNT_BACKGROUND}" )
def _encode_sample(self, gt_boxes, cls_ids): """Создает боксы и классифициет таргеты для одиночного сэмпла""" anchor_boxes = self._anchor_box gt_boxes = tf.cast(gt_boxes, dtype=tf.float32) gt_boxes = tf.reshape(gt_boxes, ((1, ) + gt_boxes.shape)) cls_ids = tf.cast(cls_ids, dtype=tf.float32) matched_gt_idx, positive_mask, ignore_mask = self._match_anchor_boxes( anchor_boxes, gt_boxes) gt_boxes = utils.convert_to_xywh(gt_boxes) box_target = self._compute_box_target(anchor_boxes, gt_boxes) cls_gt = tf.ones((self.num_boxes, 1), dtype=tf.float32) cls_bg = tf.cast(tf.equal(cls_gt, 0.), dtype=tf.float32) label = tf.concat([box_target, cls_gt], axis=1) label = tf.concat([label, cls_bg], axis=1) return label
def decode_sample(self, example): sample = tf.io.parse_single_example(example, image_feature_description) image = tf.image.decode_png(sample["image"]) bbox = tf.cast( tf.io.decode_raw(sample["bbox"], out_type=tf.int64), dtype=tf.float32 ) label = tf.io.decode_raw(sample["label"], out_type=tf.int64) bbox = tf.reshape(bbox, (-1, 4)) shape = tf.shape(image) self.set_height(shape[0]) self.set_width(shape[1]) shape = tf.cast(shape, tf.float32) width = shape[1] height = shape[0] bbox = tf.stack([ tf.maximum(bbox[:, 0], 0), tf.maximum(bbox[:, 1], 0), tf.minimum(bbox[:, 2], width), tf.minimum(bbox[:, 3], height), ], axis=-1) if has_small_bbox(width, height, bbox) and tf.random.uniform(()) > 0.5: image, bbox, label = self.random_crop(image, bbox, label) if self.augment: image = random_adjust_brightness(image) image = random_adjust_contrast(image) if tf.random.uniform(()) >= 0.8: image = tf.image.random_hue(image, 0.1) if tf.random.uniform(()) >= 0.8: image = tf.image.random_saturation(image, 0.1, 0.5) bbox = normalize_bbox(bbox, width, height) image, bbox = random_flip_horizontal(image, bbox, 0.5) image, image_shape, _ = resize_and_pad_image(image, self.resize, self.resize, jitter=None) w, h = image_shape[0], image_shape[1] bbox = tf.stack([ bbox[:, 0] * h, bbox[:, 1] * w, bbox[:, 2] * h, bbox[:, 3] * w, ], axis=-1) if self.iterator and not self.is_iter and tf.random.uniform(()) > 0.5: image_, bbox_, label_ = self.iterator.get_next() shape = tf.shape(image) shape_ = tf.shape(image_) # mixup if shape_[0] == shape[0] and shape[1] == shape_[1]: image = tf.cast(image, tf.float32) if tf.size(label_) > 0: bbox = tf.concat([bbox, bbox_], axis=0) label = tf.concat([label, label_], axis=0) r = tf.random.uniform((), 0.35, 0.65) image = image * r + image_ * (1 - r) # copy-paste # image, bbox, label = self.copy_paste( # image, bbox, label, # image_, bbox_, label_ # ) if self.convert and not self.is_iter: bbox = convert_to_xywh(bbox) return image, bbox, label