def test_box_conversions(self, num_boxes): boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4)) expected_shape = np.array([num_boxes, 4]) xywh_box = box_ops.yxyx_to_xcycwh(boxes) yxyx_box = box_ops.xcycwh_to_yxyx(boxes) self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape) self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
def parse_prediction_path(self, key, inputs): shape_ = tf.shape(inputs) shape = inputs.get_shape().as_list() batchsize, height, width = shape_[0], shape[1], shape[2] if height is None or width is None: height, width = shape_[1], shape_[2] generator = self._generator[key] len_mask = self._len_mask[key] scale_xy = self._scale_xy[key] # reshape the yolo output to (batchsize, # width, # height, # number_anchors, # remaining_points) data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5]) # use the grid generator to get the formatted anchor boxes and grid points # in shape [1, height, width, 2] centers, anchors = generator(height, width, batchsize, dtype=data.dtype) # split the yolo detections into boxes, object score map, classes boxes, obns_scores, class_scores = tf.split( data, [4, 1, self._classes], axis=-1) # determine the number of classes classes = class_scores.get_shape().as_list()[-1] # configurable to use the new coordinates in scaled Yolo v4 or not _, _, boxes = loss_utils.get_predicted_box( tf.cast(height, data.dtype), tf.cast(width, data.dtype), boxes, anchors, centers, scale_xy, stride=self._path_scale[key], darknet=False, box_type=self._box_type[key]) # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax) boxes = box_ops.xcycwh_to_yxyx(boxes) # activate and detection map obns_scores = tf.math.sigmoid(obns_scores) # convert detection map to class detection probabailities class_scores = tf.math.sigmoid(class_scores) * obns_scores # platten predictions to [batchsize, N, -1] for non max supression fill = height * width * len_mask boxes = tf.reshape(boxes, [-1, fill, 4]) class_scores = tf.reshape(class_scores, [-1, fill, classes]) obns_scores = tf.reshape(obns_scores, [-1, fill]) return obns_scores, boxes, class_scores
def fit_preserve_aspect_ratio(image, boxes, width=None, height=None, target_dim=None): """Resizes the image while peserving the image aspect ratio. Args: image: a `Tensor` representing the image. boxes: a `Tensor` representing the boxes. width: int for the image width. height: int for the image height. target_dim: list or a Tensor of height and width. Returns: image: a `Tensor` representing the image. box: a `Tensor` representing the boxes. """ if width is None or height is None: shape = tf.shape(image) if tf.shape(shape)[0] == 4: width = shape[1] height = shape[2] else: width = shape[0] height = shape[1] clipper = tf.math.maximum(width, height) if target_dim is None: target_dim = clipper pad_width = clipper - width pad_height = clipper - height image = tf.image.pad_to_bounding_box(image, pad_width // 2, pad_height // 2, clipper, clipper) boxes = box_ops.yxyx_to_xcycwh(boxes) x, y, w, h = tf.split(boxes, 4, axis=-1) y *= tf.cast(width / clipper, tf.float32) x *= tf.cast(height / clipper, tf.float32) y += tf.cast((pad_width / clipper) / 2, tf.float32) x += tf.cast((pad_height / clipper) / 2, tf.float32) h *= tf.cast(width / clipper, tf.float32) w *= tf.cast(height / clipper, tf.float32) boxes = tf.concat([x, y, w, h], axis=-1) boxes = box_ops.xcycwh_to_yxyx(boxes) image = tf.image.resize(image, (target_dim, target_dim)) return image, boxes
def testResizeImageBoxes(self, bbox, bbox_result, height, width, target_dim, preserve_aspect_ratio): image = tf.random.uniform((height, width, 3)) bbox = [ bbox[0] * width, bbox[1] * height, bbox[2] * width, bbox[3] * height ] bbox = box_ops.xcycwh_to_yxyx(bbox) new_image, new_bbox = yolo_ops.resize_image_and_bboxes( image=image, bboxes=bbox, target_size=target_dim, preserve_aspect_ratio=preserve_aspect_ratio) self.assertAllClose(new_bbox, bbox_result) self.assertAllEqual(new_image.shape[:2], target_dim)
def serve(self, images): """Cast image to float and run inference. Args: images: uint8 Tensor of shape [batch_size, None, None, 3] Returns: Tensor holding classification output logits. """ with tf.device('cpu:0'): images = tf.cast(images, dtype=tf.float32) images = tf.nest.map_structure( tf.identity, tf.map_fn(self._build_inputs, elems=images, fn_output_signature=tf.TensorSpec( shape=self._input_image_size + [3], dtype=tf.float32), parallel_iterations=32)) outputs = self.inference_step( images) # tf.keras.Model's __call__ method num_classes = outputs['predictions']['0'].shape[-1] - 5 bbox_tensors, _, prob_tensors = yolo_ops.concat_tensor_dict( tensor_dict=outputs['predictions'], num_classes=num_classes) boxes = tf.concat(bbox_tensors, axis=1) boxes = tf.squeeze(yolo_box_ops.xcycwh_to_yxyx(boxes)) scores = tf.concat(prob_tensors, axis=1) scores = tf.squeeze(tf.math.reduce_max(scores, axis=-1)) classes = tf.argmax(prob_tensors, axis=-1) indices = tf.image.non_max_suppression(boxes=boxes, scores=scores, max_output_size=20, iou_threshold=0.5, score_threshold=0.25) boxes = tf.expand_dims(tf.gather(boxes, indices), axis=0) boxes = box_ops.normalize_boxes(boxes, self._input_image_size) scores = tf.expand_dims(tf.gather(scores, indices), axis=0) classes = tf.gather(classes, indices, axis=1) return {'boxes': boxes, 'classes': classes, 'scores': scores}
def _decode_boxes(self, parsed_tensors): """Concat box coordinates in the format of [x, y, width, height].""" x = parsed_tensors['bbox/x'] y = parsed_tensors['bbox/y'] w = parsed_tensors['bbox/w'] h = parsed_tensors['bbox/h'] if not self.is_bbox_in_pixels: x = x * tf.cast(parsed_tensors['image/width'], tf.float32) y = y * tf.cast(parsed_tensors['image/height'], tf.float32) w = w * tf.cast(parsed_tensors['image/width'], tf.float32) h = h * tf.cast(parsed_tensors['image/height'], tf.float32) bbox = tf.stack([x, y, w, h], axis=-1) if self.is_xywh: bbox = yolo_box_ops.xcycwh_to_yxyx(bbox) return bbox
def parse_prediction_path(self, key, inputs): shape = inputs.get_shape().as_list() height, width = shape[1], shape[2] len_mask = self._len_mask[key] # reshape the yolo output to (batchsize, # width, # height, # number_anchors, # remaining_points) data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5]) # split the yolo detections into boxes, object score map, classes boxes, obns_scores, class_scores = tf.split(data, [4, 1, self._classes], axis=-1) # determine the number of classes classes = class_scores.get_shape().as_list()[-1] # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax) boxes = box_ops.xcycwh_to_yxyx(boxes) # activate and detection map obns_scores = tf.math.sigmoid(obns_scores) # threshold the detection map obns_mask = tf.cast(obns_scores > self._thresh, obns_scores.dtype) # convert detection map to class detection probabailities class_scores = tf.math.sigmoid(class_scores) * obns_mask * obns_scores class_scores *= tf.cast(class_scores > self._thresh, class_scores.dtype) fill = height * width * len_mask # platten predictions to [batchsize, N, -1] for non max supression boxes = tf.reshape(boxes, [-1, fill, 4]) class_scores = tf.reshape(class_scores, [-1, fill, classes]) obns_scores = tf.reshape(obns_scores, [-1, fill]) return obns_scores, boxes, class_scores
def serve(self, images: tf.Tensor) -> Mapping[str, tf.Tensor]: """Cast image to float and run inference. Args: images: uint8 Tensor of shape [batch_size, None, None, 3] Returns: Tensor holding classification output logits. """ # Removing nest.map_structure, as it adds a while node that is not static if images.shape[0] > 1: with tf.device('cpu:0'): images = tf.cast(images, dtype=tf.float32) images = tf.nest.map_structure( tf.identity, tf.map_fn(self._build_inputs, elems=images, fn_output_signature=tf.TensorSpec( shape=self._input_image_size + [3], dtype=tf.float32), parallel_iterations=32)) else: images = tf.cast(images, dtype=tf.float32) images = tf.squeeze(images) images = self._build_inputs(images) images = tf.expand_dims(images, axis=0) outputs = self.inference_step(images) processed_outputs = {} for name, output in outputs.items(): if 'classification' in name: if self._argmax_outputs: output = tf.math.argmax(output, -1) else: output = tf.nn.softmax(output) processed_outputs[name] = output elif 'segmentation' in name: num_classes = output.shape[-1] if self._class_present_outputs: flattened_output = tf.math.argmax( tf.reshape(output, [-1, num_classes]), -1) one_hotted = tf.one_hot(flattened_output, 19, axis=0) class_counts = tf.reduce_sum(one_hotted, axis=-1) processed_outputs[name + '_class_count'] = class_counts output = tf.image.resize(output, self._input_image_size, method='bilinear') if self._argmax_outputs: output = tf.math.argmax(output, -1) processed_outputs[name] = output if self._visualise_outputs and len(output.shape) == 3: colormap = get_colormap(cmap_type='cityscapes_int') processed_outputs[name + '_visualised'] = tf.gather( colormap, tf.cast(tf.squeeze(output), tf.int32)) elif 'yolo' in name: num_classes = output['predictions']['0'].shape[-1] - 5 bbox_tensors, _, prob_tensors = yolo_ops.concat_tensor_dict( tensor_dict=output['predictions'], num_classes=num_classes) boxes = tf.concat(bbox_tensors, axis=1) boxes = tf.squeeze(yolo_box_ops.xcycwh_to_yxyx(boxes)) scores = tf.concat(prob_tensors, axis=1) scores = tf.squeeze(tf.math.reduce_max(scores, axis=-1)) classes = tf.squeeze(tf.math.argmax(prob_tensors, axis=-1)) indices = tf.image.non_max_suppression(boxes=boxes, scores=scores, max_output_size=20, iou_threshold=0.25, score_threshold=0.25) boxes = tf.gather(boxes, indices) scores = tf.gather(scores, indices) classes = tf.gather(classes, indices) processed_outputs[name + 'boxes'] = boxes processed_outputs[name + 'classes'] = classes processed_outputs[name + 'scores'] = scores else: raise NotImplementedError('Task type %s is not implemented.' + \ 'Try renaming the task routine.' %name) return processed_outputs
def _get_anchor_free(self, key, boxes, classes, height, width, stride, center_radius): """Find the box assignements in an anchor free paradigm.""" level_limits = self.anchor_free_level_limits[key] gen = loss_utils.GridGenerator(anchors=[[1, 1]], scale_anchors=stride) grid_points = gen(width, height, 1, boxes.dtype)[0] grid_points = tf.squeeze(grid_points, axis=0) box_list = boxes class_list = classes grid_points = (grid_points + 0.5) * stride x_centers, y_centers = grid_points[..., 0], grid_points[..., 1] boxes *= (tf.convert_to_tensor([width, height, width, height]) * stride) tlbr_boxes = box_ops.xcycwh_to_yxyx(boxes) boxes = tf.reshape(boxes, [1, 1, -1, 4]) tlbr_boxes = tf.reshape(tlbr_boxes, [1, 1, -1, 4]) if self.use_tie_breaker: area = tf.reduce_prod(boxes[..., 2:], axis=-1) # check if the box is in the receptive feild of the this fpn level b_t = y_centers - tlbr_boxes[..., 0] b_l = x_centers - tlbr_boxes[..., 1] b_b = tlbr_boxes[..., 2] - y_centers b_r = tlbr_boxes[..., 3] - x_centers box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1) if level_limits is not None: max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1) gt_min = max_reg_targets_per_im >= level_limits[0] gt_max = max_reg_targets_per_im <= level_limits[1] is_in_boxes = tf.logical_and(gt_min, gt_max) else: is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0 is_in_boxes_all = tf.reduce_any(is_in_boxes, axis=(0, 1), keepdims=True) # check if the center is in the receptive feild of the this fpn level c_t = y_centers - (boxes[..., 1] - center_radius * stride) c_l = x_centers - (boxes[..., 0] - center_radius * stride) c_b = (boxes[..., 1] + center_radius * stride) - y_centers c_r = (boxes[..., 0] + center_radius * stride) - x_centers centers_delta = tf.stack([c_t, c_l, c_b, c_r], axis=-1) is_in_centers = tf.reduce_min(centers_delta, axis=-1) > 0.0 is_in_centers_all = tf.reduce_any(is_in_centers, axis=(0, 1), keepdims=True) # colate all masks to get the final locations is_in_index = tf.logical_or(is_in_boxes_all, is_in_centers_all) is_in_boxes_and_center = tf.logical_and(is_in_boxes, is_in_centers) is_in_boxes_and_center = tf.logical_and(is_in_index, is_in_boxes_and_center) if self.use_tie_breaker: boxes_all = tf.cast(is_in_boxes_and_center, area.dtype) boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF)) boxes_min = tf.reduce_min(boxes_all, axis=-1, keepdims=True) boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min) is_in_boxes_and_center = boxes_all == boxes_min # construct the index update grid reps = tf.reduce_sum(tf.cast(is_in_boxes_and_center, tf.int16), axis=-1) indexes = tf.cast(tf.where(is_in_boxes_and_center), tf.int32) y, x, t = tf.split(indexes, 3, axis=-1) boxes = tf.gather_nd(box_list, t) classes = tf.cast(tf.gather_nd(class_list, t), boxes.dtype) reps = tf.gather_nd(reps, tf.concat([y, x], axis=-1)) reps = tf.cast(tf.expand_dims(reps, axis=-1), boxes.dtype) classes = tf.cast(tf.expand_dims(classes, axis=-1), boxes.dtype) conf = tf.ones_like(classes) # return the samples and the indexes samples = tf.concat([boxes, conf, classes], axis=-1) indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1) return indexes, samples
def get_best_anchor(y_true, anchors, stride, width=1, height=1, iou_thresh=0.25, best_match_only=False, use_tie_breaker=True): """Get the correct anchor that is assoiciated with each box using IOU. Args: y_true: tf.Tensor[] for the list of bounding boxes in the yolo format. anchors: list or tensor for the anchor boxes to be used in prediction found via Kmeans. stride: `int` stride for the anchors. width: int for the image width. height: int for the image height. iou_thresh: `float` the minimum iou threshold to use for selecting boxes for each level. best_match_only: `bool` if the box only has one match and it is less than the iou threshold, when set to True, this match will be dropped as no anchors can be linked to it. use_tie_breaker: `bool` if there is many anchors for a given box, then attempt to use all of them, if False, only the first matching box will be used. Returns: tf.Tensor: y_true with the anchor associated with each ground truth box known """ with tf.name_scope('get_best_anchor'): width = tf.cast(width, dtype=tf.float32) height = tf.cast(height, dtype=tf.float32) scaler = tf.convert_to_tensor([width, height]) # scale to levels houts width and height true_wh = tf.cast(y_true[..., 2:4], dtype=tf.float32) * scaler # scale down from large anchor to small anchor type anchors = tf.cast(anchors, dtype=tf.float32) / stride k = tf.shape(anchors)[0] anchors = tf.concat([tf.zeros_like(anchors), anchors], axis=-1) truth_comp = tf.concat([tf.zeros_like(true_wh), true_wh], axis=-1) if iou_thresh >= 1.0: anchors = tf.expand_dims(anchors, axis=-2) truth_comp = tf.expand_dims(truth_comp, axis=-3) aspect = truth_comp[..., 2:4] / anchors[..., 2:4] aspect = tf.where(tf.math.is_nan(aspect), tf.zeros_like(aspect), aspect) aspect = tf.maximum(aspect, 1 / aspect) aspect = tf.where(tf.math.is_nan(aspect), tf.zeros_like(aspect), aspect) aspect = tf.reduce_max(aspect, axis=-1) values, indexes = tf.math.top_k(tf.transpose(-aspect, perm=[1, 0]), k=tf.cast(k, dtype=tf.int32), sorted=True) values = -values ind_mask = tf.cast(values < iou_thresh, dtype=indexes.dtype) else: truth_comp = box_ops.xcycwh_to_yxyx(truth_comp) anchors = box_ops.xcycwh_to_yxyx(anchors) iou_raw = box_ops.aggregated_comparitive_iou( truth_comp, anchors, iou_type=3, ) values, indexes = tf.math.top_k(iou_raw, k=tf.cast(k, dtype=tf.int32), sorted=True) ind_mask = tf.cast(values >= iou_thresh, dtype=indexes.dtype) # pad the indexs such that all values less than the thresh are -1 # add one, multiply the mask to zeros all the bad locations # subtract 1 makeing all the bad locations 0. if best_match_only: iou_index = ((indexes[..., 0:] + 1) * ind_mask[..., 0:]) - 1 elif use_tie_breaker: iou_index = tf.concat([ tf.expand_dims(indexes[..., 0], axis=-1), ((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1 ], axis=-1) else: iou_index = tf.concat([ tf.expand_dims(indexes[..., 0], axis=-1), tf.zeros_like(indexes[..., 1:]) - 1 ], axis=-1) return tf.cast(iou_index, dtype=tf.float32), tf.cast(values, dtype=tf.float32)