示例#1
0
    def softmax_cross_entropy_loss(self):
        """
        Compute the softmax cross entropy loss for box classification.

        Returns:
            scalar Tensor
        """
        self._log_accuracy()
        wsummary.variable_summaries_v2(self.gt_classes, "gt_classes")
        wsummary.variable_summaries_v2(self.pred_class_logits,
                                       "pred_class_logits")
        if self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD > 1e-3:
            with tf.name_scope("modify_gtclasses"):
                threshold = self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD
                scores = tf.reshape(self.proposals[ED_SCORES], [-1])
                gt_classes = self.gt_classes
                gt_classes = tf.where(tf.greater(scores, threshold),
                                      gt_classes, tf.zeros_like(gt_classes))
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.MEAN)
        else:
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=self.gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.MEAN)

        wsummary.histogram_or_scalar(classes_loss, "fast_rcnn/classes_loss")
        return classes_loss * self.cfg.MODEL.ROI_HEADS.BOX_CLS_LOSS_SCALE
示例#2
0
    def softmax_cross_entropy_loss(self):
        self._log_accuracy()
        wsummary.variable_summaries_v2(self.gt_classes, "gt_classes")
        wsummary.variable_summaries_v2(self.pred_class_logits,
                                       "pred_class_logits")
        scores = tf.stop_gradient(tf.reshape(self.proposals[ED_SCORES], [-1]))
        #weights = tf.abs(scores-0.5)*4
        weights = tf.minimum(tf.pow(tf.abs(scores - 0.5), 2) * 100, 1.0)
        weights = tf.stop_gradient(weights)
        wsummary.histogram_or_scalar(weights, "cls_loss_weights")
        if self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD > 1e-3:
            with tf.name_scope("modify_gtclasses"):
                threshold = self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD
                gt_classes = self.gt_classes
                gt_classes = tf.where(tf.greater(scores, threshold),
                                      gt_classes, tf.zeros_like(gt_classes))
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.NONE)
        else:
            classes_loss = tf.losses.sparse_softmax_cross_entropy(
                logits=self.pred_class_logits,
                labels=self.gt_classes,
                loss_collection=None,
                reduction=tf.losses.Reduction.NONE)

        classes_loss = weights * classes_loss
        classes_loss = tf.reduce_mean(classes_loss)
        wsummary.histogram_or_scalar(classes_loss, "fast_rcnn/classes_loss")
        return classes_loss * self.cfg.MODEL.ROI_HEADS.BOX_CLS_LOSS_SCALE
示例#3
0
    def forward(self, boxes, gboxes, glabels, glength, *args, **kwargs):
        '''
        :param boxes: [1,X,4] or [batch_size,X,4] proposal boxes
        :param gboxes: [batch_size,Y,4] groundtruth boxes
        :param glabels: [batch_size,Y] groundtruth labels
        :param glength: [batch_size] boxes size
        :return:
        labels: [batch_size,X,4], the label of boxes, -1 indict ignored box, which will not calculate loss,
        0 is background
        scores: [batch_size,X], the overlap score with boxes' match gt box
        indices: [batch_size,X] the index of matched gt boxes when it's a positive anchor box, else it's -1
        '''
        with tf.name_scope("ATTSMatcher4"):
            iou_matrix = odb.batch_bboxes_pair_wrapv2(gboxes,
                                                      boxes,
                                                      fn=odb.get_iou_matrix,
                                                      len0=glength,
                                                      scope="get_iou_matrix")
            is_center_in_gtboxes = odb.batch_bboxes_pair_wrapv2(
                gboxes,
                boxes,
                fn=odb.is_center_in_boxes,
                len0=glength,
                dtype=tf.bool,
                scope="get_is_center_in_gtbboxes")
            wsummary.variable_summaries_v2(iou_matrix, "iou_matrix")

            with tf.device("/cpu:0"):
                iou_threshold = self.get_threshold(iou_matrix)
                iou_threshold = tf.minimum(iou_threshold, self.thresholds[-1])
                iou_matrix = tf.where(is_center_in_gtboxes, iou_matrix,
                                      tf.zeros_like(iou_matrix))
                scores, index = tf.nn.top_k(tf.transpose(iou_matrix,
                                                         perm=[0, 2, 1]),
                                            k=1)
                B, Y, _ = btf.combined_static_and_dynamic_shape(gboxes)
                index = tf.squeeze(index, axis=-1)
                scores = tf.squeeze(scores, axis=-1)
                threshold = wmlt.batch_gather(iou_threshold, index)
                labels = wmlt.batch_gather(glabels,
                                           index,
                                           name="gather_labels",
                                           parallel_iterations=B,
                                           back_prop=False)
                is_good_score = tf.greater(scores, self.MIN_IOU_THRESHOLD)
                is_good_score = tf.logical_and(is_good_score,
                                               scores >= threshold)
                labels = tf.where(is_good_score, labels, tf.zeros_like(labels))
                index = tf.where(is_good_score, index,
                                 tf.ones_like(index) * -1)

            if self.same_pos_label:
                labels = tf.where(tf.greater(labels, 0),
                                  tf.ones_like(labels) * self.same_pos_label,
                                  labels)

            return tf.stop_gradient(labels), tf.stop_gradient(
                scores), tf.stop_gradient(index)
示例#4
0
    def get_box_in_a_single_layer(self, datas, num_dets, img_size, K):
        '''
        '''
        #wsummary.variable_summaries_v2(datas['heatmaps_tl'],"hm_tl")
        h_tl = tf.nn.sigmoid(datas['heatmaps_tl'])
        h_br = tf.nn.sigmoid(datas['heatmaps_br'])
        h_ct = tf.nn.sigmoid(datas['heatmaps_ct'])
        #wsummary.variable_summaries_v2(h_tl,"hm_a_tl")

        B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_tl)

        h_tl = self.pixel_nms(h_tl)
        h_br = self.pixel_nms(h_br)
        h_ct = self.pixel_nms(h_ct)
        tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = self._topk(h_tl, K=K)
        br_scores, br_inds, br_clses, br_ys, br_xs = self._topk(h_br, K=K)
        ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, K=K)
        tl_ys = tf.tile(tf.reshape(tl_ys, [B, K, 1]), [1, 1, K])
        tl_xs = tf.tile(tf.reshape(tl_xs, [B, K, 1]), [1, 1, K])
        br_ys = tf.tile(tf.reshape(br_ys, [B, 1, K]), [1, K, 1])
        br_xs = tf.tile(tf.reshape(br_xs, [B, 1, K]), [1, K, 1])
        ct_ys = tf.reshape(ct_ys, [B, K])
        ct_xs = tf.reshape(ct_xs, [B, K])
        ct_scores = tf.reshape(ct_scores, [B, K])
        if 'offset_tl' in datas:
            tl_regr = wmlt.batch_gather(datas['offset_tl'], tl_inds)
            br_regr = wmlt.batch_gather(datas['offset_br'], br_inds)
            ct_regr = wmlt.batch_gather(datas['offset_ct'], br_inds)
            tl_regr = tf.reshape(tl_regr, [B, K, 1, 2])
            br_regr = tf.reshape(br_regr, [B, 1, K, 2])
            ct_regr = tf.reshape(ct_regr, [B, K, 2])
            tl_xs = tl_xs + tl_regr[..., 0]
            tl_ys = tl_ys + tl_regr[..., 1]
            br_xs = br_xs + br_regr[..., 0]
            br_ys = br_ys + br_regr[..., 1]
            ct_xs = ct_xs + ct_regr[..., 0]
            ct_ys = ct_ys + ct_regr[..., 1]

        bboxes = tf.stack([tl_ys, tl_xs, br_ys, br_xs], axis=-1)
        #bboxes = tf.Print(bboxes,["box0",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100)
        #wsummary.detection_image_summary(self.inputs[IMAGE],
        #boxes=odbox.tfabsolutely_boxes_to_relative_boxes(tf.reshape(bboxes,[B,-1,4]),width=W,height=H),
        #name="box0")
        tl_tag = wmlt.batch_gather(datas['tag_tl'], tl_inds)
        br_tag = wmlt.batch_gather(datas['tag_br'], br_inds)
        tl_tag = tf.expand_dims(tl_tag, axis=2)
        br_tag = tf.expand_dims(br_tag, axis=1)
        tl_tag = tf.tile(tl_tag, [1, 1, K, 1])
        br_tag = tf.tile(br_tag, [1, K, 1, 1])
        dists = tf.abs(tl_tag - br_tag)
        dists = tf.squeeze(dists, axis=-1)
        dis_inds = (dists > self.dis_threshold)

        tl_scores = tf.tile(tf.reshape(tl_scores, [B, K, 1]), [1, 1, K])
        br_scores = tf.tile(tf.reshape(br_scores, [B, 1, K]), [1, K, 1])
        scores = (tl_scores + br_scores) / 2

        tl_clses = tf.tile(tf.reshape(tl_clses, [B, K, 1]), [1, 1, K])
        br_clses = tf.tile(tf.reshape(br_clses, [B, 1, K]), [1, K, 1])
        cls_inds = tf.not_equal(tl_clses, br_clses)

        width_inds = (br_xs < tl_xs)
        height_inds = (br_ys < tl_ys)

        all_inds = tf.logical_or(cls_inds, dis_inds)
        all_inds = tf.logical_or(all_inds, width_inds)
        all_inds = tf.logical_or(all_inds, height_inds)
        #all_inds = cls_inds
        scores = tf.where(all_inds, tf.zeros_like(scores), scores)
        scores, inds = tf.nn.top_k(tf.reshape(scores, [B, -1]), num_dets)
        wsummary.variable_summaries_v2(scores, "scores")
        wsummary.variable_summaries_v2(tl_scores, "tl_scores")
        wsummary.variable_summaries_v2(br_scores, "br_scores")

        bboxes = tf.reshape(bboxes, [B, -1, 4])
        bboxes = wmlt.batch_gather(bboxes, inds)
        #bboxes = tf.Print(bboxes,["box1",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100)
        #wsummary.detection_image_summary(self.inputs[IMAGE],
        #                                 boxes=odbox.tfabsolutely_boxes_to_relative_boxes(tf.reshape(bboxes,[B,-1,4]),width=W,height=H),
        #                                 name="box1")

        clses = tf.reshape(tl_clses, [B, -1])
        clses = wmlt.batch_gather(clses, inds)
        '''tl_scores = tf.reshape(tl_scores,[B,-1,1])
        tl_scores = wmlt.batch_gather(tl_scores,inds)

        br_scores = tf.reshape(br_scores,[B,-1,1])
        br_scores = wmlt.batch_gather(br_scores,inds)'''

        ct = tf.stack([ct_ys / tf.to_float(H), ct_xs / tf.to_float(W)],
                      axis=-1)
        bboxes = odbox.tfabsolutely_boxes_to_relative_boxes(bboxes,
                                                            width=W,
                                                            height=H)
        sizes = tf.convert_to_tensor(self.size_threshold, dtype=tf.float32)
        relative_size = sizes * tf.rsqrt(
            tf.cast(img_size[0] * img_size[1], tf.float32))
        _, box_nr, _ = wmlt.combined_static_and_dynamic_shape(bboxes)
        length = tf.ones([B], tf.int32) * box_nr
        #bboxes = tf.Print(bboxes,["bboxes",tf.reduce_min(bboxes),tf.reduce_max(bboxes),tf.reduce_min(ct),tf.reduce_max(ct)],summarize=100)
        center_index = tfop.center_boxes_filter(bboxes=bboxes,
                                                bboxes_clses=clses,
                                                center_points=ct,
                                                center_clses=ct_clses,
                                                size_threshold=relative_size,
                                                bboxes_length=length,
                                                nrs=[3, 5])

        def fn(bboxes, scores, clses, ct_score, c_index):
            ct_score = tf.gather(ct_score, tf.nn.relu(c_index))
            scores = (scores * 2 + ct_score) / 3  #变成三个点的平均
            mask = tf.logical_and(tf.greater_equal(c_index, 0),
                                  tf.greater(scores, self.score_threshold))
            mask = tf.logical_and(tf.greater_equal(ct_score, 0.001), mask)
            bboxes = tf.boolean_mask(bboxes, mask)
            scores = tf.boolean_mask(scores, mask)
            clses = tf.boolean_mask(clses, mask)
            len = tf.reduce_sum(tf.cast(mask, tf.int32))
            bboxes = tf.pad(bboxes, [[0, box_nr - len], [0, 0]])
            scores = tf.pad(scores, [[0, box_nr - len]])
            clses = tf.pad(clses, [[0, box_nr - len]])
            return bboxes, scores, clses, len

        bboxes, scores, clses, length = tf.map_fn(
            lambda x: fn(x[0], x[1], x[2], x[3], x[4]),
            elems=(bboxes, scores, clses, ct_scores, center_index),
            dtype=(tf.float32, tf.float32, tf.int32, tf.int32))
        #bboxes = tf.Print(bboxes,["box2",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100)
        #wsummary.detection_image_summary(self.inputs[IMAGE],
        #                                 boxes=tf.reshape(bboxes,[B,-1,4]),lengths=length,
        #                                 name="box2")
        return bboxes, scores, clses, length
示例#5
0
    def smooth_l1_loss(self):
        """
        Compute the smooth L1 loss for box regression.

        Returns:
            scalar Tensor
        """
        with tf.name_scope("box_regression_loss"):
            gt_proposal_deltas = wmlt.batch_gather(
                self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices))
            batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape(
                gt_proposal_deltas)
            gt_proposal_deltas = tf.reshape(gt_proposal_deltas,
                                            [batch_size * box_nr, box_dim])
            ious = tf.reshape(self.proposals.scores, [batch_size * box_nr])
            proposal_bboxes = tf.reshape(self.proposals.boxes,
                                         [batch_size * box_nr, box_dim])
            cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape(
            ).as_list()[-1] == box_dim
            num_classes = self.pred_class_logits.get_shape().as_list()[-1]
            fg_num_classes = num_classes - 1

            # Box delta loss is only computed between the prediction for the gt class k
            # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
            # for non-gt classes and background.
            # Empty fg_inds produces a valid loss of zero as long as the size_average
            # arg to smooth_l1_loss is False (otherwise it uses mean internally
            # and would produce a nan loss).
            fg_inds = tf.greater(self.gt_classes, 0)
            gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds)
            pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas,
                                                   fg_inds)
            proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds)
            gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds)
            ious = tf.boolean_mask(ious, fg_inds)
            if not cls_agnostic_bbox_reg:
                pred_proposal_deltas = tf.reshape(
                    pred_proposal_deltas, [-1, fg_num_classes, box_dim])
                pred_proposal_deltas = wmlt.select_2thdata_by_index_v2(
                    pred_proposal_deltas, gt_logits_i - 1)

            pred_bboxes = self.box2box_transform.apply_deltas(
                pred_proposal_deltas, boxes=proposal_bboxes)
            loss_box_reg = odl.giou_loss(pred_bboxes, gt_proposal_deltas)
            #neg_scale = self.cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION/(1.0-self.cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION)
            #scale = tf.where(tf.greater(ious,0.5),ious,ious*neg_scale)
            scale = tf.where(tf.greater(ious, 0.5), tf.ones_like(ious), ious)
            scale = tf.stop_gradient(scale)
            wsummary.variable_summaries_v2(scale, "giou_loss_scale")
            loss_box_reg = tf.reduce_sum(loss_box_reg * scale)
            num_samples = wmlt.num_elements(self.gt_classes)
            # The loss is normalized using the total number of regions (R), not the number
            # of foreground regions even though the box regression loss is only defined on
            # foreground regions. Why? Because doing so gives equal training influence to
            # each foreground example. To see how, consider two different minibatches:
            #  (1) Contains a single foreground region
            #  (2) Contains 100 foreground regions
            # If we normalize by the number of foreground regions, the single example in
            # minibatch (1) will be given 100 times as much influence as each foreground
            # example in minibatch (2). Normalizing by the total number of regions, R,
            # means that the single example in minibatch (1) and each of the 100 examples
            # in minibatch (2) are given equal influence.
            loss_box_reg = loss_box_reg / num_samples

        wsummary.histogram_or_scalar(loss_box_reg, "fast_rcnn/box_reg_loss")

        return loss_box_reg * self.cfg.MODEL.ROI_HEADS.BOX_REG_LOSS_SCALE
示例#6
0
    def losses(self):
        """
        Args:
            For `gt_classes` and `gt_anchors_deltas` parameters, see
                :meth:`RetinaNet.get_ground_truth`.
            Their shapes are (N, R) and (N, R, 4), respectively, where R is
            the total number of anchors across levels, i.e. sum(Hi x Wi x A)
            For `pred_class_logits` and `pred_anchor_deltas`, see
                :meth:`RetinaNetHead.forward`.

        Returns:
            dict[str: Tensor]:
                mapping from a named loss to a scalar tensor
                storing the loss. Used during training only. The dict keys are:
                "loss_cls" and "loss_box_reg"
        """
        assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim"
        assert len(
            self.pred_anchor_deltas[0].get_shape()) == 4, "error anchors dim"
        gt_classes, gt_anchors_deltas = self._get_ground_truth()
        pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat(
            self.pred_logits, self.pred_anchor_deltas,
            self.num_classes)  # Shapes: (N, R, K) and (N, R, 4), respectively.

        valid_idxs = gt_classes >= 0
        foreground_idxs = (gt_classes > 0)
        num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32))
        #num_foreground = tf.Print(num_foreground,[tf.to_float(num_foreground)/tf.to_float(tf.reduce_prod(tf.shape(gt_classes)))*100,"XXX"])

        gt_classes_target = tf.boolean_mask(gt_classes, valid_idxs)
        wsummary.variable_summaries_v2(tf.to_float(gt_classes_target),
                                       "gt_classes_target")
        gt_classes_target = tf.one_hot(gt_classes_target,
                                       depth=self.num_classes + 1)
        gt_classes_target = gt_classes_target[:,
                                              1:]  #RetinaNet中没有背景, 因为背景index=0, 所以要在one hot 后去掉背景
        pred_class_logits = tf.boolean_mask(pred_class_logits, valid_idxs)

        # logits loss
        loss_cls = tf.reduce_sum(
            wnn.sigmoid_cross_entropy_with_logits_FL(
                labels=gt_classes_target,
                logits=pred_class_logits,
                alpha=self.focal_loss_alpha,
                gamma=self.focal_loss_gamma,
            )) / tf.cast(tf.maximum(1, num_foreground), tf.float32)

        # regression loss
        pred_anchor_deltas = tf.boolean_mask(pred_anchor_deltas,
                                             foreground_idxs)
        gt_anchors_deltas = tf.boolean_mask(gt_anchors_deltas, foreground_idxs)
        loss_box_reg = tf.losses.huber_loss(
            pred_anchor_deltas,
            gt_anchors_deltas,
            loss_collection=None,
            reduction=tf.losses.Reduction.SUM,
        ) / tf.cast(tf.maximum(1, num_foreground), tf.float32)

        loss_cls = loss_cls * self.cfg.BOX_CLS_LOSS_SCALE
        loss_box_reg = loss_box_reg * self.cfg.BOX_REG_LOSS_SCALE

        return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
示例#7
0
    def forward(self, features):
        """
        Arguments:
            features (list[Tensor]): FPN feature map tensors in high to low resolution.
                Each tensor in the list correspond to different feature levels.

        Returns:
            logits (list[Tensor]): #lvl tensors, each has shape (N, Hi, Wi,AxK).
                The tensor predicts the classification probability
                at each spatial position for each of the A anchors and K object
                classes.
            bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Hi, Wi, Ax4).
                The tensor predicts 4-vector (dx,dy,dw,dh) box
                regression values for every anchor. These values are the
                relative offset between the anchor and the ground truth box.
        """
        cfg = self.cfg
        num_classes = cfg.NUM_CLASSES
        num_convs = cfg.NUM_CONVS
        prior_prob = cfg.PRIOR_PROB
        bias_value = -math.log((1 - prior_prob) / prior_prob)
        logits = []
        bbox_reg = []
        center_ness = []
        for j, feature in enumerate(features):
            channels = feature.get_shape().as_list()[-1]
            with tf.variable_scope("WeightSharedConvolutionalBoxPredictor",
                                   reuse=tf.AUTO_REUSE):
                net = feature
                with tf.variable_scope("BoxPredictionTower"):
                    for i in range(num_convs):
                        net = slim.conv2d(
                            net,
                            channels, [3, 3],
                            activation_fn=None,
                            normalizer_fn=None,
                            biases_initializer=None if self.normalizer_fn
                            is not None else tf.zeros_initializer(),
                            scope=f"conv2d_{i}")
                        if self.normalizer_fn is not None:
                            with tf.variable_scope(f"conv2d_{i}"):
                                net = self.normalizer_fn(
                                    net,
                                    scope=f'{self.norm_scope_name}/feature_{j}',
                                    **self.norm_params)
                        if self.activation_fn is not None:
                            net = self.activation_fn(net)
                _bbox_reg = slim.conv2d(net,
                                        4, [3, 3],
                                        activation_fn=None,
                                        normalizer_fn=None,
                                        scope="BoxPredictor")
                _bbox_reg = _bbox_reg * wnnl.scale_gradient(
                    tf.get_variable(name=f"gamma_{j}",
                                    shape=(),
                                    initializer=tf.ones_initializer()), 0.2)
                #_bbox_reg = self.clip_exp(_bbox_reg)
                #_bbox_reg = _bbox_reg*math.pow(2,j)
                _bbox_reg = tf.nn.relu(_bbox_reg)
                _bbox_reg = _bbox_reg * math.pow(2, j) * 16
                wsummary.variable_summaries_v2(_bbox_reg, "bbox_reg_net")
                '''net = feature
                with tf.variable_scope("CenterPredictionTower"):
                    for i in range(num_convs):
                        net = slim.conv2d(net,channels,[3,3],
                                          activation_fn=None,
                                          normalizer_fn=None,
                                          biases_initializer=None if self.normalizer_fn is not None else tf.zeros_initializer(),
                                          scope=f"conv2d_{i}")
                        if self.normalizer_fn is not None:
                            with tf.variable_scope(f"conv2d_{i}"):
                                net = self.normalizer_fn(net, scope=f'{self.norm_scope_name}/feature_{j}',**self.norm_params)
                        if self.activation_fn is not None:
                            net = self.activation_fn(net)'''
                _center_ness = slim.conv2d(net,
                                           1, [3, 3],
                                           activation_fn=None,
                                           normalizer_fn=None,
                                           scope="CenterNessPredictor")
                _center_ness = tf.squeeze(_center_ness, axis=-1)

                net = feature
                with tf.variable_scope("ClassPredictionTower"):
                    for i in range(num_convs):
                        net = slim.conv2d(
                            net,
                            channels, [3, 3],
                            activation_fn=None,
                            normalizer_fn=None,
                            biases_initializer=None if self.normalizer_fn
                            is not None else tf.zeros_initializer(),
                            scope=f"conv2d_{i}")
                        if self.normalizer_fn is not None:
                            with tf.variable_scope(f"conv2d_{i}"):
                                net = self.normalizer_fn(
                                    net,
                                    scope=f'{self.norm_scope_name}/feature_{j}',
                                    **self.norm_params)
                        if self.activation_fn is not None:
                            net = self.activation_fn(net)
                _logits = slim.conv2d(
                    net,
                    num_classes, [3, 3],
                    activation_fn=None,
                    normalizer_fn=None,
                    biases_initializer=tf.constant_initializer(
                        value=bias_value),
                    scope="ClassPredictor")

            logits.append(_logits)
            bbox_reg.append(_bbox_reg)
            center_ness.append(_center_ness)
        return logits, bbox_reg, center_ness
示例#8
0
    def forward(self, x,scope="BoxPredictor",fwd_type=BoxesForwardType.ALL):
        with tf.variable_scope(scope):
            if not isinstance(x,tf.Tensor) and isinstance(x,Iterable):
                if self.cfg.MODEL.ROI_HEADS.PRED_IOU:
                    assert len(x)==3, "error x length."
                else:
                    assert len(x) == 2, "error x length."

                def trans(net):
                    if len(net.get_shape()) > 2:
                        shape = wmlt.combined_static_and_dynamic_shape(net)
                        dim = 1
                        for x in shape[1:]:
                            dim *= x
                        return tf.reshape(net,[shape[0],dim])
                    else:
                        return net
                x = [trans(v) for v in x]
                if fwd_type&BoxesForwardType.CLASSES:
                    scores = slim.fully_connected(x[0],self.num_classes+1,activation_fn=None,
                                                  normalizer_fn=None,scope="cls_score")
                else:
                    scores = None

                if fwd_type&BoxesForwardType.BBOXES:
                    foreground_num_classes = self.num_classes
                    num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes
                    proposal_deltas = slim.fully_connected(x[1],self.box_dim*num_bbox_reg_classes,activation_fn=None,
                                                           normalizer_fn=None,scope="bbox_pred")
                else:
                    proposal_deltas = None

                if self.cfg.MODEL.ROI_HEADS.PRED_IOU and fwd_type&BoxesForwardType.IOUS:
                    iou_logits = slim.fully_connected(x[2],1,
                                                      activation_fn=None,
                                                      normalizer_fn=None,
                                                      scope="iou_pred")
                else:
                    iou_logits = None
            else:
                if len(x.get_shape()) > 2:
                    shape = wmlt.combined_static_and_dynamic_shape(x)
                    x = tf.reshape(x,[shape[0],-1])
                if fwd_type&BoxesForwardType.CLASSES:
                    scores = slim.fully_connected(x,self.num_classes+1,activation_fn=None,
                                                  normalizer_fn=None,scope="cls_score")
                else:
                    scores = None

                if fwd_type&BoxesForwardType.BBOXES:
                    foreground_num_classes = self.num_classes
                    num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes
                    proposal_deltas = slim.fully_connected(x,self.box_dim*num_bbox_reg_classes,activation_fn=None,
                                                  normalizer_fn=None,scope="bbox_pred")
                else:
                    proposal_deltas = None

                if self.cfg.MODEL.ROI_HEADS.PRED_IOU and fwd_type&BoxesForwardType.IOUS:
                    iou_logits = slim.fully_connected(x,1,
                                                      activation_fn=None,
                                                      normalizer_fn=None,
                                                      scope="iou_pred")
                else:
                    iou_logits = None

            wsummary.variable_summaries_v2(proposal_deltas,"proposal_deltas")
            if self.cfg.MODEL.ROI_HEADS.PRED_IOU:
                return scores, proposal_deltas,iou_logits
            else:
                return scores, proposal_deltas
示例#9
0
    def forward(self, x,scope="BoxPredictor"):
        with tf.variable_scope(scope):
            if not isinstance(x,tf.Tensor) and isinstance(x,Iterable):
                if self.cfg.MODEL.ROI_HEADS.PRED_IOU:
                    assert len(x)==3, "error x length."
                else:
                    assert len(x) == 2, "error x length."


                if len(x[0].get_shape()) == 2:
                    scores = slim.fully_connected(x[0],self.num_classes+1,activation_fn=None,
                                              normalizer_fn=None,scope="cls_score")
                else:
                    scores = slim.conv2d(x[0], self.num_classes + 1, [1,1],
                                         activation_fn=None,
                                         normalizer_fn=None, scope="cls_score")
                    scores = tf.reduce_mean(scores,axis=[1,2],keepdims=False,
                                            name="cls_score")
                foreground_num_classes = self.num_classes
                num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes

                if len(x[1].get_shape()) == 2:
                    proposal_deltas = slim.fully_connected(x[1],self.box_dim*num_bbox_reg_classes,activation_fn=None,
                                                       normalizer_fn=None,scope="bbox_pred")
                else:
                    proposal_deltas = slim.conv2d(x[1], self.box_dim*num_bbox_reg_classes, [1,1],
                                         activation_fn=None,
                                         normalizer_fn=None, scope="bbox_pred")
                    proposal_deltas = tf.reduce_mean(proposal_deltas,axis=[1,2],keepdims=False,
                                            name="bbox_pred")
                if self.cfg.MODEL.ROI_HEADS.PRED_IOU:
                    if len(x[2].get_shape()) == 2:
                        if btf.channel(x[2]) != 1:
                            iou_logits = slim.fully_connected(x[2], 1,
                                                              activation_fn=None,
                                                              normalizer_fn=None,
                                                              scope="iou_pred")
                        else:
                            iou_logits = x[2]
                    else:
                        iou_logits = slim.conv2d(x[2], 1, [1,1],
                                                      activation_fn=None,
                                                      normalizer_fn=None, scope="iou_pred")
                        iou_logits = tf.reduce_mean(iou_logits,axis=[1,2],
                                                    keepdims=False,
                                                    name="iou_pred")
            else:
                if len(x.get_shape()) > 2:
                    shape = wmlt.combined_static_and_dynamic_shape(x)
                    x = tf.reshape(x,[shape[0],-1])
                scores = slim.fully_connected(x,self.num_classes+1,activation_fn=None,
                                              normalizer_fn=None,scope="cls_score")
                foreground_num_classes = self.num_classes
                num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes
                proposal_deltas = slim.fully_connected(x,self.box_dim*num_bbox_reg_classes,activation_fn=None,
                                                       normalizer_fn=None,scope="bbox_pred")
                if self.cfg.MODEL.ROI_HEADS.PRED_IOU:
                    iou_logits = slim.fully_connected(x,1,
                                                      activation_fn=None,
                                                      normalizer_fn=None,
                                                      scope="iou_pred")

            wsummary.variable_summaries_v2(proposal_deltas,"proposal_deltas")
            if self.cfg.MODEL.ROI_HEADS.PRED_IOU:
                return scores, proposal_deltas,iou_logits
            else:
                return scores, proposal_deltas