Exemplo n.º 1
0
    def fast_rcnn_net(self):

        with tf.variable_scope('fast_rcnn_net'):
            with slim.arg_scope([slim.fully_connected],
                                weights_regularizer=slim.l2_regularizer(
                                    self.weight_decay)):

                flatten_rois_features = slim.flatten(
                    self.fast_rcnn_all_level_rois)

                net = slim.fully_connected(flatten_rois_features,
                                           1024,
                                           scope='fc_1')
                if self.use_dropout:
                    net = slim.dropout(net,
                                       keep_prob=0.5,
                                       is_training=self.is_training,
                                       scope='dropout')

                net = slim.fully_connected(net, 1024, scope='fc_2')

                fast_rcnn_scores = slim.fully_connected(net,
                                                        self.num_classes + 1,
                                                        activation_fn=None,
                                                        scope='classifier')

                fast_rcnn_encode_boxes = slim.fully_connected(
                    net,
                    self.num_classes * 4,
                    activation_fn=None,
                    scope='regressor')
            if DEBUG:
                print_tensors(fast_rcnn_encode_boxes, 'fast_rcnn_encode_bxes')

            return fast_rcnn_encode_boxes, fast_rcnn_scores
Exemplo n.º 2
0
    def get_rois(self):
        '''
           1)get roi from feature map
           2)roi align or roi pooling. Here is roi align
           :return:
           all_level_rois: [N, 7, 7, C]
           all_level_proposals : [N, 4]
           all_level_proposals is matched with all_level_rois
        '''
        levels = self.assign_level()

        all_level_roi_list = []
        all_level_proposal_list = []
        if DEBUG:
            print_tensors(levels, 'levels')
        with tf.variable_scope('fast_rcnn_roi'):
            # P6 is not used by the Fast R-CNN detector.
            for i in range(self.min_level, self.max_level + 1):
                level_i_proposal_indices = tf.reshape(
                    tf.where(tf.equal(levels, i)), [-1])
                level_i_proposals = tf.gather(self.rpn_proposals_boxes,
                                              level_i_proposal_indices)

                level_i_proposals = tf.cond(
                    tf.equal(tf.shape(level_i_proposals)[0], 0),
                    lambda: tf.constant([[0, 0, 0, 0]], dtype=tf.float32),
                    lambda: level_i_proposals
                )  # to avoid level_i_proposals batch is 0, or it will broken when gradient BP

                all_level_proposal_list.append(level_i_proposals)

                ymin, xmin, ymax, xmax = tf.unstack(level_i_proposals, axis=1)
                img_h, img_w = tf.cast(self.img_shape[1], tf.float32), tf.cast(
                    self.img_shape[2], tf.float32)
                normalize_ymin = ymin / img_h
                normalize_xmin = xmin / img_w
                normalize_ymax = ymax / img_h
                normalize_xmax = xmax / img_w

                level_i_cropped_rois = tf.image.crop_and_resize(
                    self.feature_pyramid['P%d' % i],
                    boxes=tf.transpose(
                        tf.stack([
                            normalize_ymin, normalize_xmin, normalize_ymax,
                            normalize_xmax
                        ])),
                    box_ind=tf.zeros(shape=[
                        tf.shape(level_i_proposals)[0],
                    ],
                                     dtype=tf.int32),
                    crop_size=[self.roi_size, self.roi_size])
                level_i_rois = slim.max_pool2d(
                    level_i_cropped_rois,
                    [self.roi_pool_kernel_size, self.roi_pool_kernel_size],
                    stride=self.roi_pool_kernel_size)
                all_level_roi_list.append(level_i_rois)

            all_level_rois = tf.concat(all_level_roi_list, axis=0)
            all_level_proposals = tf.concat(all_level_proposal_list, axis=0)
            return all_level_rois, all_level_proposals
Exemplo n.º 3
0
        def batch_slice_rcnn_proposals(rpn_proposal_bbox, encode_boxes,
                                       categories, scores, image_window,
                                       config):
            """
            mutilclass NMS
            :param rpn_proposal_bbox: (N, 4)
            :param encode_boxes: (N, 4)
            :param categories:(N, )
            :param scores: (N, )
            :param image_window:(y1, x1, y2, x2) the boundary of image
            :return:
            detection_boxes_scores_labels : (-1, 6)[y1, x1, y2, x2, scores, labels]
            """
            with tf.variable_scope('fast_rcnn_proposals'):
                # trim the zero graph
                rpn_proposal_bbox, non_zeros = boxes_utils.trim_zeros_graph(
                    rpn_proposal_bbox, name="trim_proposals_detection")
                encode_boxes = tf.boolean_mask(encode_boxes, non_zeros)
                categories = tf.boolean_mask(categories, non_zeros)
                scores = tf.boolean_mask(scores, non_zeros)
                fast_rcnn_decode_boxes = encode_and_decode.decode_boxes(
                    encode_boxes=encode_boxes,
                    reference_boxes=rpn_proposal_bbox,
                    dev_factors=config.BBOX_STD_DEV)
                fast_rcnn_decode_boxes = boxes_utils.clip_boxes_to_img_boundaries(
                    fast_rcnn_decode_boxes, image_window)

                # remove the background
                keep = tf.cast(tf.where(categories > 0)[:, 0], tf.int32)
                if DEBUG:
                    print_categories = tf.gather(categories, keep)
                    print_scores = tf.gather(scores, keep)
                    num_item = tf.minimum(tf.shape(print_scores)[0], 50)
                    print_scores_vision, print_index = tf.nn.top_k(
                        print_scores, k=num_item)
                    print_categories_vision = tf.gather(
                        print_categories, print_index)
                    print_tensors(print_categories_vision, "categories")
                    print_tensors(print_scores_vision, "scores")
                # Filter out low confidence boxes
                if config.FINAL_SCORE_THRESHOLD:
                    conf_keep = tf.cast(
                        tf.where(scores >= config.FINAL_SCORE_THRESHOLD)[:, 0],
                        tf.int32)
                    keep = tf.sets.set_intersection(
                        tf.expand_dims(keep, 0), tf.expand_dims(conf_keep, 0))
                    keep = tf.sparse_tensor_to_dense(keep)[0]

                pre_nms_class_ids = tf.gather(categories, keep)
                pre_nms_scores = tf.gather(scores, keep)
                pre_nms_rois = tf.gather(fast_rcnn_decode_boxes, keep)
                unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]

                def nms_keep_map(class_id):
                    """Apply Non-Maximum Suppression on ROIs of the given class."""
                    # Indices of ROIs of the given class
                    ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
                    # Apply NMS
                    class_keep = tf.image.non_max_suppression(
                        tf.gather(pre_nms_rois, ixs),
                        tf.gather(pre_nms_scores, ixs),
                        max_output_size=config.DETECTION_MAX_INSTANCES,
                        iou_threshold=config.FAST_RCNN_NMS_IOU_THRESHOLD)
                    # Map indicies
                    class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
                    # Pad with -1 so returned tensors have the same shape
                    gap = config.DETECTION_MAX_INSTANCES - tf.shape(
                        class_keep)[0]
                    class_keep = tf.pad(class_keep, [(0, gap)],
                                        mode='CONSTANT',
                                        constant_values=-1)
                    # Set shape so map_fn() can infer result shape
                    class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
                    return class_keep

                # 2. Map over class IDs
                nms_keep = tf.map_fn(nms_keep_map,
                                     unique_pre_nms_class_ids,
                                     dtype=tf.int32)
                # 3. Merge results into one list, and remove -1 padding
                nms_keep = tf.reshape(nms_keep, [-1])
                nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
                # 4. Compute intersection between keep and nms_keep
                keep = tf.sets.set_intersection(tf.expand_dims(keep, 0),
                                                tf.expand_dims(nms_keep, 0))
                keep = tf.sparse_tensor_to_dense(keep)[0]
                # Keep top detections
                roi_count = config.DETECTION_MAX_INSTANCES
                class_scores_keep = tf.gather(scores, keep)
                num_keep = tf.minimum(
                    tf.shape(class_scores_keep)[0], roi_count)
                top_ids = tf.nn.top_k(class_scores_keep,
                                      k=num_keep,
                                      sorted=True)[1]
                keep = tf.gather(keep, top_ids)

                # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
                # Coordinates are normalized.
                detections = tf.concat([
                    tf.gather(fast_rcnn_decode_boxes, keep),
                    tf.to_float(tf.gather(categories, keep))[..., tf.newaxis],
                    tf.gather(scores, keep)[..., tf.newaxis]
                ],
                                       axis=1)

                # Pad with zeros if detections < DETECTION_MAX_INSTANCES
                gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
                detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT")

                return detections
Exemplo n.º 4
0
    def rpn_losses(self):
        with tf.variable_scope('rpn_losses'):
            minibatch_indices, minibatch_anchor_matched_gtboxes, object_mask, minibatch_labels_one_hot = \
                self.make_minibatch(self.anchors)

            minibatch_anchors = tf.gather(self.anchors, minibatch_indices)
            minibatch_encode_boxes = tf.gather(self.rpn_encode_boxes,
                                               minibatch_indices)
            minibatch_boxes_scores = tf.gather(self.rpn_scores,
                                               minibatch_indices)

            # encode gtboxes
            minibatch_encode_gtboxes = encode_and_decode.encode_boxes(
                unencode_boxes=minibatch_anchor_matched_gtboxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            positive_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(object_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 1.0)))[0])

            negative_mask = tf.cast(
                tf.logical_not(tf.cast(object_mask, tf.bool)), tf.float32)
            negative_anchors_in_img = draw_box_with_color(
                self.img_batch,
                minibatch_anchors * tf.expand_dims(negative_mask, 1),
                text=tf.shape(tf.where(tf.equal(object_mask, 0.0)))[0])

            minibatch_decode_boxes = encode_and_decode.decode_boxes(
                encode_boxes=minibatch_encode_boxes,
                reference_boxes=minibatch_anchors,
                scale_factors=self.scale_factors)

            tf.summary.image('/positive_anchors', positive_anchors_in_img)
            tf.summary.image('/negative_anchors', negative_anchors_in_img)
            # print_tensors(minibatch_boxes_softmax_scores, "minibatch_boxes_softmax_scores")
            last_dim_num = tf.shape(minibatch_boxes_scores)[-2]
            k_shown = tf.minimum(30, last_dim_num)
            # print(k_shown, last_dim_num)
            top_k_scores, top_k_indices = tf.nn.top_k(
                minibatch_boxes_scores[:, 1], k=k_shown)
            print_tensors(minibatch_boxes_scores, "minibatch_boxes_scores")

            top_detections_in_img = draw_box_with_color(
                self.img_batch,
                tf.gather(minibatch_decode_boxes, top_k_indices),
                text=tf.shape(top_k_scores)[0])
            tf.summary.image('/top_5', top_detections_in_img)

            # losses
            with tf.variable_scope('rpn_location_loss'):
                location_loss = losses.l1_smooth_losses(
                    predict_boxes=minibatch_encode_boxes,
                    gtboxes=minibatch_encode_gtboxes,
                    object_weights=object_mask)
                slim.losses.add_loss(
                    location_loss)  # add smooth l1 loss to losses collection

            with tf.variable_scope('rpn_classification_loss'):
                classification_loss = slim.losses.softmax_cross_entropy(
                    logits=minibatch_boxes_scores,
                    onehot_labels=minibatch_labels_one_hot)

            return location_loss, classification_loss