def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] image_meta = inputs[3] # Get windows of images in normalized coordinates. Windows are the area # in the image that excludes the padding. # Use the shape of the first image in the batch to normalize the window # because we know that all images get resized to the same size. m = parse_image_meta_graph(image_meta) image_shape = m['image_shape'][0] window = norm_boxes_graph(m['window'], image_shape[:2]) # Run detection refinement graph on each item in the batch detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox, window], lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), self.config.IMAGES_PER_GPU) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape( detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def call(self, inputs): proposals = inputs[0] gt_boxes = inputs[1] # Slice the batch and run a graph for each slice # TODO: Rename target_bbox to target_deltas for clarity names = ["rois", "target_class_ids", "target_bbox"] outputs = utils.batch_slice( [proposals, gt_boxes], lambda x, y: detection_targets_graph( x, y, self.config), self.config.IMAGES_PER_GPU, names=names) return outputs
def call(self, inputs): rois = inputs[0] mrcnn_class = inputs[1] mrcnn_bbox = inputs[2] # Run detection refinement graph on each item in the batch detections_batch = utils.batch_slice( [rois, mrcnn_class, mrcnn_bbox], lambda x, y, z: refine_detections_graph(x, y, z, self.config), self.config.IMAGES_PER_GPU) # Reshape output # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in # normalized coordinates return tf.reshape( detections_batch, [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
def call(self, inputs): # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] scores = inputs[0][:, :, 1] # Box deltas [batch, num_rois, 4] deltas = inputs[1] deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) # Anchors anchors = inputs[2] # Improve performance by trimming to top anchors by score # and doing the rest on the smaller subset. pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, name="top_anchors").indices scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), self.config.IMAGES_PER_GPU) pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x), self.config.IMAGES_PER_GPU, names=["pre_nms_anchors"]) # Apply deltas to anchors to get refined anchors. # [batch, N, (y1, x1, y2, x2)] boxes = utils.batch_slice([pre_nms_anchors, deltas], lambda x, y: apply_box_deltas_graph(x, y), self.config.IMAGES_PER_GPU, names=["refined_anchors"]) # Clip to image boundaries. Since we're in normalized coordinates, # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] window = np.array([0, 0, 1, 1], dtype=np.float32) boxes = utils.batch_slice(boxes, lambda x: clip_boxes_graph(x, window), self.config.IMAGES_PER_GPU, names=["refined_anchors_clipped"]) # Filter out small boxes # According to Xinlei Chen's paper, this reduces detection accuracy # for small objects, so we're skipping it. # Non-max suppression def nms(boxes, scores): indices = tf.image.non_max_suppression( boxes, scores, self.proposal_count, self.nms_threshold, name="rpn_non_max_suppression") proposals = tf.gather(boxes, indices) # Pad if needed padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) proposals = tf.pad(proposals, [(0, padding), (0, 0)]) return proposals proposals = utils.batch_slice([boxes, scores], nms, self.config.IMAGES_PER_GPU) return proposals