def softmax_cross_entropy_loss(self): """ Compute the softmax cross entropy loss for box classification. Returns: scalar Tensor """ self._log_accuracy() wsummary.variable_summaries_v2(self.gt_classes, "gt_classes") wsummary.variable_summaries_v2(self.pred_class_logits, "pred_class_logits") if self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD > 1e-3: with tf.name_scope("modify_gtclasses"): threshold = self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD scores = tf.reshape(self.proposals[ED_SCORES], [-1]) gt_classes = self.gt_classes gt_classes = tf.where(tf.greater(scores, threshold), gt_classes, tf.zeros_like(gt_classes)) classes_loss = tf.losses.sparse_softmax_cross_entropy( logits=self.pred_class_logits, labels=gt_classes, loss_collection=None, reduction=tf.losses.Reduction.MEAN) else: classes_loss = tf.losses.sparse_softmax_cross_entropy( logits=self.pred_class_logits, labels=self.gt_classes, loss_collection=None, reduction=tf.losses.Reduction.MEAN) wsummary.histogram_or_scalar(classes_loss, "fast_rcnn/classes_loss") return classes_loss * self.cfg.MODEL.ROI_HEADS.BOX_CLS_LOSS_SCALE
def softmax_cross_entropy_loss(self): self._log_accuracy() wsummary.variable_summaries_v2(self.gt_classes, "gt_classes") wsummary.variable_summaries_v2(self.pred_class_logits, "pred_class_logits") scores = tf.stop_gradient(tf.reshape(self.proposals[ED_SCORES], [-1])) #weights = tf.abs(scores-0.5)*4 weights = tf.minimum(tf.pow(tf.abs(scores - 0.5), 2) * 100, 1.0) weights = tf.stop_gradient(weights) wsummary.histogram_or_scalar(weights, "cls_loss_weights") if self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD > 1e-3: with tf.name_scope("modify_gtclasses"): threshold = self.cfg.MODEL.ROI_HEADS.POS_LABELS_THRESHOLD gt_classes = self.gt_classes gt_classes = tf.where(tf.greater(scores, threshold), gt_classes, tf.zeros_like(gt_classes)) classes_loss = tf.losses.sparse_softmax_cross_entropy( logits=self.pred_class_logits, labels=gt_classes, loss_collection=None, reduction=tf.losses.Reduction.NONE) else: classes_loss = tf.losses.sparse_softmax_cross_entropy( logits=self.pred_class_logits, labels=self.gt_classes, loss_collection=None, reduction=tf.losses.Reduction.NONE) classes_loss = weights * classes_loss classes_loss = tf.reduce_mean(classes_loss) wsummary.histogram_or_scalar(classes_loss, "fast_rcnn/classes_loss") return classes_loss * self.cfg.MODEL.ROI_HEADS.BOX_CLS_LOSS_SCALE
def forward(self, boxes, gboxes, glabels, glength, *args, **kwargs): ''' :param boxes: [1,X,4] or [batch_size,X,4] proposal boxes :param gboxes: [batch_size,Y,4] groundtruth boxes :param glabels: [batch_size,Y] groundtruth labels :param glength: [batch_size] boxes size :return: labels: [batch_size,X,4], the label of boxes, -1 indict ignored box, which will not calculate loss, 0 is background scores: [batch_size,X], the overlap score with boxes' match gt box indices: [batch_size,X] the index of matched gt boxes when it's a positive anchor box, else it's -1 ''' with tf.name_scope("ATTSMatcher4"): iou_matrix = odb.batch_bboxes_pair_wrapv2(gboxes, boxes, fn=odb.get_iou_matrix, len0=glength, scope="get_iou_matrix") is_center_in_gtboxes = odb.batch_bboxes_pair_wrapv2( gboxes, boxes, fn=odb.is_center_in_boxes, len0=glength, dtype=tf.bool, scope="get_is_center_in_gtbboxes") wsummary.variable_summaries_v2(iou_matrix, "iou_matrix") with tf.device("/cpu:0"): iou_threshold = self.get_threshold(iou_matrix) iou_threshold = tf.minimum(iou_threshold, self.thresholds[-1]) iou_matrix = tf.where(is_center_in_gtboxes, iou_matrix, tf.zeros_like(iou_matrix)) scores, index = tf.nn.top_k(tf.transpose(iou_matrix, perm=[0, 2, 1]), k=1) B, Y, _ = btf.combined_static_and_dynamic_shape(gboxes) index = tf.squeeze(index, axis=-1) scores = tf.squeeze(scores, axis=-1) threshold = wmlt.batch_gather(iou_threshold, index) labels = wmlt.batch_gather(glabels, index, name="gather_labels", parallel_iterations=B, back_prop=False) is_good_score = tf.greater(scores, self.MIN_IOU_THRESHOLD) is_good_score = tf.logical_and(is_good_score, scores >= threshold) labels = tf.where(is_good_score, labels, tf.zeros_like(labels)) index = tf.where(is_good_score, index, tf.ones_like(index) * -1) if self.same_pos_label: labels = tf.where(tf.greater(labels, 0), tf.ones_like(labels) * self.same_pos_label, labels) return tf.stop_gradient(labels), tf.stop_gradient( scores), tf.stop_gradient(index)
def get_box_in_a_single_layer(self, datas, num_dets, img_size, K): ''' ''' #wsummary.variable_summaries_v2(datas['heatmaps_tl'],"hm_tl") h_tl = tf.nn.sigmoid(datas['heatmaps_tl']) h_br = tf.nn.sigmoid(datas['heatmaps_br']) h_ct = tf.nn.sigmoid(datas['heatmaps_ct']) #wsummary.variable_summaries_v2(h_tl,"hm_a_tl") B, H, W, C = wmlt.combined_static_and_dynamic_shape(h_tl) h_tl = self.pixel_nms(h_tl) h_br = self.pixel_nms(h_br) h_ct = self.pixel_nms(h_ct) tl_scores, tl_inds, tl_clses, tl_ys, tl_xs = self._topk(h_tl, K=K) br_scores, br_inds, br_clses, br_ys, br_xs = self._topk(h_br, K=K) ct_scores, ct_inds, ct_clses, ct_ys, ct_xs = self._topk(h_ct, K=K) tl_ys = tf.tile(tf.reshape(tl_ys, [B, K, 1]), [1, 1, K]) tl_xs = tf.tile(tf.reshape(tl_xs, [B, K, 1]), [1, 1, K]) br_ys = tf.tile(tf.reshape(br_ys, [B, 1, K]), [1, K, 1]) br_xs = tf.tile(tf.reshape(br_xs, [B, 1, K]), [1, K, 1]) ct_ys = tf.reshape(ct_ys, [B, K]) ct_xs = tf.reshape(ct_xs, [B, K]) ct_scores = tf.reshape(ct_scores, [B, K]) if 'offset_tl' in datas: tl_regr = wmlt.batch_gather(datas['offset_tl'], tl_inds) br_regr = wmlt.batch_gather(datas['offset_br'], br_inds) ct_regr = wmlt.batch_gather(datas['offset_ct'], br_inds) tl_regr = tf.reshape(tl_regr, [B, K, 1, 2]) br_regr = tf.reshape(br_regr, [B, 1, K, 2]) ct_regr = tf.reshape(ct_regr, [B, K, 2]) tl_xs = tl_xs + tl_regr[..., 0] tl_ys = tl_ys + tl_regr[..., 1] br_xs = br_xs + br_regr[..., 0] br_ys = br_ys + br_regr[..., 1] ct_xs = ct_xs + ct_regr[..., 0] ct_ys = ct_ys + ct_regr[..., 1] bboxes = tf.stack([tl_ys, tl_xs, br_ys, br_xs], axis=-1) #bboxes = tf.Print(bboxes,["box0",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100) #wsummary.detection_image_summary(self.inputs[IMAGE], #boxes=odbox.tfabsolutely_boxes_to_relative_boxes(tf.reshape(bboxes,[B,-1,4]),width=W,height=H), #name="box0") tl_tag = wmlt.batch_gather(datas['tag_tl'], tl_inds) br_tag = wmlt.batch_gather(datas['tag_br'], br_inds) tl_tag = tf.expand_dims(tl_tag, axis=2) br_tag = tf.expand_dims(br_tag, axis=1) tl_tag = tf.tile(tl_tag, [1, 1, K, 1]) br_tag = tf.tile(br_tag, [1, K, 1, 1]) dists = tf.abs(tl_tag - br_tag) dists = tf.squeeze(dists, axis=-1) dis_inds = (dists > self.dis_threshold) tl_scores = tf.tile(tf.reshape(tl_scores, [B, K, 1]), [1, 1, K]) br_scores = tf.tile(tf.reshape(br_scores, [B, 1, K]), [1, K, 1]) scores = (tl_scores + br_scores) / 2 tl_clses = tf.tile(tf.reshape(tl_clses, [B, K, 1]), [1, 1, K]) br_clses = tf.tile(tf.reshape(br_clses, [B, 1, K]), [1, K, 1]) cls_inds = tf.not_equal(tl_clses, br_clses) width_inds = (br_xs < tl_xs) height_inds = (br_ys < tl_ys) all_inds = tf.logical_or(cls_inds, dis_inds) all_inds = tf.logical_or(all_inds, width_inds) all_inds = tf.logical_or(all_inds, height_inds) #all_inds = cls_inds scores = tf.where(all_inds, tf.zeros_like(scores), scores) scores, inds = tf.nn.top_k(tf.reshape(scores, [B, -1]), num_dets) wsummary.variable_summaries_v2(scores, "scores") wsummary.variable_summaries_v2(tl_scores, "tl_scores") wsummary.variable_summaries_v2(br_scores, "br_scores") bboxes = tf.reshape(bboxes, [B, -1, 4]) bboxes = wmlt.batch_gather(bboxes, inds) #bboxes = tf.Print(bboxes,["box1",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100) #wsummary.detection_image_summary(self.inputs[IMAGE], # boxes=odbox.tfabsolutely_boxes_to_relative_boxes(tf.reshape(bboxes,[B,-1,4]),width=W,height=H), # name="box1") clses = tf.reshape(tl_clses, [B, -1]) clses = wmlt.batch_gather(clses, inds) '''tl_scores = tf.reshape(tl_scores,[B,-1,1]) tl_scores = wmlt.batch_gather(tl_scores,inds) br_scores = tf.reshape(br_scores,[B,-1,1]) br_scores = wmlt.batch_gather(br_scores,inds)''' ct = tf.stack([ct_ys / tf.to_float(H), ct_xs / tf.to_float(W)], axis=-1) bboxes = odbox.tfabsolutely_boxes_to_relative_boxes(bboxes, width=W, height=H) sizes = tf.convert_to_tensor(self.size_threshold, dtype=tf.float32) relative_size = sizes * tf.rsqrt( tf.cast(img_size[0] * img_size[1], tf.float32)) _, box_nr, _ = wmlt.combined_static_and_dynamic_shape(bboxes) length = tf.ones([B], tf.int32) * box_nr #bboxes = tf.Print(bboxes,["bboxes",tf.reduce_min(bboxes),tf.reduce_max(bboxes),tf.reduce_min(ct),tf.reduce_max(ct)],summarize=100) center_index = tfop.center_boxes_filter(bboxes=bboxes, bboxes_clses=clses, center_points=ct, center_clses=ct_clses, size_threshold=relative_size, bboxes_length=length, nrs=[3, 5]) def fn(bboxes, scores, clses, ct_score, c_index): ct_score = tf.gather(ct_score, tf.nn.relu(c_index)) scores = (scores * 2 + ct_score) / 3 #变成三个点的平均 mask = tf.logical_and(tf.greater_equal(c_index, 0), tf.greater(scores, self.score_threshold)) mask = tf.logical_and(tf.greater_equal(ct_score, 0.001), mask) bboxes = tf.boolean_mask(bboxes, mask) scores = tf.boolean_mask(scores, mask) clses = tf.boolean_mask(clses, mask) len = tf.reduce_sum(tf.cast(mask, tf.int32)) bboxes = tf.pad(bboxes, [[0, box_nr - len], [0, 0]]) scores = tf.pad(scores, [[0, box_nr - len]]) clses = tf.pad(clses, [[0, box_nr - len]]) return bboxes, scores, clses, len bboxes, scores, clses, length = tf.map_fn( lambda x: fn(x[0], x[1], x[2], x[3], x[4]), elems=(bboxes, scores, clses, ct_scores, center_index), dtype=(tf.float32, tf.float32, tf.int32, tf.int32)) #bboxes = tf.Print(bboxes,["box2",tf.reduce_max(bboxes),tf.reduce_min(bboxes),W,H],summarize=100) #wsummary.detection_image_summary(self.inputs[IMAGE], # boxes=tf.reshape(bboxes,[B,-1,4]),lengths=length, # name="box2") return bboxes, scores, clses, length
def smooth_l1_loss(self): """ Compute the smooth L1 loss for box regression. Returns: scalar Tensor """ with tf.name_scope("box_regression_loss"): gt_proposal_deltas = wmlt.batch_gather( self.proposals.gt_boxes, tf.nn.relu(self.proposals.indices)) batch_size, box_nr, box_dim = wmlt.combined_static_and_dynamic_shape( gt_proposal_deltas) gt_proposal_deltas = tf.reshape(gt_proposal_deltas, [batch_size * box_nr, box_dim]) ious = tf.reshape(self.proposals.scores, [batch_size * box_nr]) proposal_bboxes = tf.reshape(self.proposals.boxes, [batch_size * box_nr, box_dim]) cls_agnostic_bbox_reg = self.pred_proposal_deltas.get_shape( ).as_list()[-1] == box_dim num_classes = self.pred_class_logits.get_shape().as_list()[-1] fg_num_classes = num_classes - 1 # Box delta loss is only computed between the prediction for the gt class k # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions # for non-gt classes and background. # Empty fg_inds produces a valid loss of zero as long as the size_average # arg to smooth_l1_loss is False (otherwise it uses mean internally # and would produce a nan loss). fg_inds = tf.greater(self.gt_classes, 0) gt_proposal_deltas = tf.boolean_mask(gt_proposal_deltas, fg_inds) pred_proposal_deltas = tf.boolean_mask(self.pred_proposal_deltas, fg_inds) proposal_bboxes = tf.boolean_mask(proposal_bboxes, fg_inds) gt_logits_i = tf.boolean_mask(self.gt_classes, fg_inds) ious = tf.boolean_mask(ious, fg_inds) if not cls_agnostic_bbox_reg: pred_proposal_deltas = tf.reshape( pred_proposal_deltas, [-1, fg_num_classes, box_dim]) pred_proposal_deltas = wmlt.select_2thdata_by_index_v2( pred_proposal_deltas, gt_logits_i - 1) pred_bboxes = self.box2box_transform.apply_deltas( pred_proposal_deltas, boxes=proposal_bboxes) loss_box_reg = odl.giou_loss(pred_bboxes, gt_proposal_deltas) #neg_scale = self.cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION/(1.0-self.cfg.MODEL.ROI_HEADS.POSITIVE_FRACTION) #scale = tf.where(tf.greater(ious,0.5),ious,ious*neg_scale) scale = tf.where(tf.greater(ious, 0.5), tf.ones_like(ious), ious) scale = tf.stop_gradient(scale) wsummary.variable_summaries_v2(scale, "giou_loss_scale") loss_box_reg = tf.reduce_sum(loss_box_reg * scale) num_samples = wmlt.num_elements(self.gt_classes) # The loss is normalized using the total number of regions (R), not the number # of foreground regions even though the box regression loss is only defined on # foreground regions. Why? Because doing so gives equal training influence to # each foreground example. To see how, consider two different minibatches: # (1) Contains a single foreground region # (2) Contains 100 foreground regions # If we normalize by the number of foreground regions, the single example in # minibatch (1) will be given 100 times as much influence as each foreground # example in minibatch (2). Normalizing by the total number of regions, R, # means that the single example in minibatch (1) and each of the 100 examples # in minibatch (2) are given equal influence. loss_box_reg = loss_box_reg / num_samples wsummary.histogram_or_scalar(loss_box_reg, "fast_rcnn/box_reg_loss") return loss_box_reg * self.cfg.MODEL.ROI_HEADS.BOX_REG_LOSS_SCALE
def losses(self): """ Args: For `gt_classes` and `gt_anchors_deltas` parameters, see :meth:`RetinaNet.get_ground_truth`. Their shapes are (N, R) and (N, R, 4), respectively, where R is the total number of anchors across levels, i.e. sum(Hi x Wi x A) For `pred_class_logits` and `pred_anchor_deltas`, see :meth:`RetinaNetHead.forward`. Returns: dict[str: Tensor]: mapping from a named loss to a scalar tensor storing the loss. Used during training only. The dict keys are: "loss_cls" and "loss_box_reg" """ assert len(self.pred_logits[0].get_shape()) == 4, "error logits dim" assert len( self.pred_anchor_deltas[0].get_shape()) == 4, "error anchors dim" gt_classes, gt_anchors_deltas = self._get_ground_truth() pred_class_logits, pred_anchor_deltas = permute_all_cls_and_box_to_N_HWA_K_and_concat( self.pred_logits, self.pred_anchor_deltas, self.num_classes) # Shapes: (N, R, K) and (N, R, 4), respectively. valid_idxs = gt_classes >= 0 foreground_idxs = (gt_classes > 0) num_foreground = tf.reduce_sum(tf.cast(foreground_idxs, tf.int32)) #num_foreground = tf.Print(num_foreground,[tf.to_float(num_foreground)/tf.to_float(tf.reduce_prod(tf.shape(gt_classes)))*100,"XXX"]) gt_classes_target = tf.boolean_mask(gt_classes, valid_idxs) wsummary.variable_summaries_v2(tf.to_float(gt_classes_target), "gt_classes_target") gt_classes_target = tf.one_hot(gt_classes_target, depth=self.num_classes + 1) gt_classes_target = gt_classes_target[:, 1:] #RetinaNet中没有背景, 因为背景index=0, 所以要在one hot 后去掉背景 pred_class_logits = tf.boolean_mask(pred_class_logits, valid_idxs) # logits loss loss_cls = tf.reduce_sum( wnn.sigmoid_cross_entropy_with_logits_FL( labels=gt_classes_target, logits=pred_class_logits, alpha=self.focal_loss_alpha, gamma=self.focal_loss_gamma, )) / tf.cast(tf.maximum(1, num_foreground), tf.float32) # regression loss pred_anchor_deltas = tf.boolean_mask(pred_anchor_deltas, foreground_idxs) gt_anchors_deltas = tf.boolean_mask(gt_anchors_deltas, foreground_idxs) loss_box_reg = tf.losses.huber_loss( pred_anchor_deltas, gt_anchors_deltas, loss_collection=None, reduction=tf.losses.Reduction.SUM, ) / tf.cast(tf.maximum(1, num_foreground), tf.float32) loss_cls = loss_cls * self.cfg.BOX_CLS_LOSS_SCALE loss_box_reg = loss_box_reg * self.cfg.BOX_REG_LOSS_SCALE return {"loss_cls": loss_cls, "loss_box_reg": loss_box_reg}
def forward(self, features): """ Arguments: features (list[Tensor]): FPN feature map tensors in high to low resolution. Each tensor in the list correspond to different feature levels. Returns: logits (list[Tensor]): #lvl tensors, each has shape (N, Hi, Wi,AxK). The tensor predicts the classification probability at each spatial position for each of the A anchors and K object classes. bbox_reg (list[Tensor]): #lvl tensors, each has shape (N, Hi, Wi, Ax4). The tensor predicts 4-vector (dx,dy,dw,dh) box regression values for every anchor. These values are the relative offset between the anchor and the ground truth box. """ cfg = self.cfg num_classes = cfg.NUM_CLASSES num_convs = cfg.NUM_CONVS prior_prob = cfg.PRIOR_PROB bias_value = -math.log((1 - prior_prob) / prior_prob) logits = [] bbox_reg = [] center_ness = [] for j, feature in enumerate(features): channels = feature.get_shape().as_list()[-1] with tf.variable_scope("WeightSharedConvolutionalBoxPredictor", reuse=tf.AUTO_REUSE): net = feature with tf.variable_scope("BoxPredictionTower"): for i in range(num_convs): net = slim.conv2d( net, channels, [3, 3], activation_fn=None, normalizer_fn=None, biases_initializer=None if self.normalizer_fn is not None else tf.zeros_initializer(), scope=f"conv2d_{i}") if self.normalizer_fn is not None: with tf.variable_scope(f"conv2d_{i}"): net = self.normalizer_fn( net, scope=f'{self.norm_scope_name}/feature_{j}', **self.norm_params) if self.activation_fn is not None: net = self.activation_fn(net) _bbox_reg = slim.conv2d(net, 4, [3, 3], activation_fn=None, normalizer_fn=None, scope="BoxPredictor") _bbox_reg = _bbox_reg * wnnl.scale_gradient( tf.get_variable(name=f"gamma_{j}", shape=(), initializer=tf.ones_initializer()), 0.2) #_bbox_reg = self.clip_exp(_bbox_reg) #_bbox_reg = _bbox_reg*math.pow(2,j) _bbox_reg = tf.nn.relu(_bbox_reg) _bbox_reg = _bbox_reg * math.pow(2, j) * 16 wsummary.variable_summaries_v2(_bbox_reg, "bbox_reg_net") '''net = feature with tf.variable_scope("CenterPredictionTower"): for i in range(num_convs): net = slim.conv2d(net,channels,[3,3], activation_fn=None, normalizer_fn=None, biases_initializer=None if self.normalizer_fn is not None else tf.zeros_initializer(), scope=f"conv2d_{i}") if self.normalizer_fn is not None: with tf.variable_scope(f"conv2d_{i}"): net = self.normalizer_fn(net, scope=f'{self.norm_scope_name}/feature_{j}',**self.norm_params) if self.activation_fn is not None: net = self.activation_fn(net)''' _center_ness = slim.conv2d(net, 1, [3, 3], activation_fn=None, normalizer_fn=None, scope="CenterNessPredictor") _center_ness = tf.squeeze(_center_ness, axis=-1) net = feature with tf.variable_scope("ClassPredictionTower"): for i in range(num_convs): net = slim.conv2d( net, channels, [3, 3], activation_fn=None, normalizer_fn=None, biases_initializer=None if self.normalizer_fn is not None else tf.zeros_initializer(), scope=f"conv2d_{i}") if self.normalizer_fn is not None: with tf.variable_scope(f"conv2d_{i}"): net = self.normalizer_fn( net, scope=f'{self.norm_scope_name}/feature_{j}', **self.norm_params) if self.activation_fn is not None: net = self.activation_fn(net) _logits = slim.conv2d( net, num_classes, [3, 3], activation_fn=None, normalizer_fn=None, biases_initializer=tf.constant_initializer( value=bias_value), scope="ClassPredictor") logits.append(_logits) bbox_reg.append(_bbox_reg) center_ness.append(_center_ness) return logits, bbox_reg, center_ness
def forward(self, x,scope="BoxPredictor",fwd_type=BoxesForwardType.ALL): with tf.variable_scope(scope): if not isinstance(x,tf.Tensor) and isinstance(x,Iterable): if self.cfg.MODEL.ROI_HEADS.PRED_IOU: assert len(x)==3, "error x length." else: assert len(x) == 2, "error x length." def trans(net): if len(net.get_shape()) > 2: shape = wmlt.combined_static_and_dynamic_shape(net) dim = 1 for x in shape[1:]: dim *= x return tf.reshape(net,[shape[0],dim]) else: return net x = [trans(v) for v in x] if fwd_type&BoxesForwardType.CLASSES: scores = slim.fully_connected(x[0],self.num_classes+1,activation_fn=None, normalizer_fn=None,scope="cls_score") else: scores = None if fwd_type&BoxesForwardType.BBOXES: foreground_num_classes = self.num_classes num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes proposal_deltas = slim.fully_connected(x[1],self.box_dim*num_bbox_reg_classes,activation_fn=None, normalizer_fn=None,scope="bbox_pred") else: proposal_deltas = None if self.cfg.MODEL.ROI_HEADS.PRED_IOU and fwd_type&BoxesForwardType.IOUS: iou_logits = slim.fully_connected(x[2],1, activation_fn=None, normalizer_fn=None, scope="iou_pred") else: iou_logits = None else: if len(x.get_shape()) > 2: shape = wmlt.combined_static_and_dynamic_shape(x) x = tf.reshape(x,[shape[0],-1]) if fwd_type&BoxesForwardType.CLASSES: scores = slim.fully_connected(x,self.num_classes+1,activation_fn=None, normalizer_fn=None,scope="cls_score") else: scores = None if fwd_type&BoxesForwardType.BBOXES: foreground_num_classes = self.num_classes num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes proposal_deltas = slim.fully_connected(x,self.box_dim*num_bbox_reg_classes,activation_fn=None, normalizer_fn=None,scope="bbox_pred") else: proposal_deltas = None if self.cfg.MODEL.ROI_HEADS.PRED_IOU and fwd_type&BoxesForwardType.IOUS: iou_logits = slim.fully_connected(x,1, activation_fn=None, normalizer_fn=None, scope="iou_pred") else: iou_logits = None wsummary.variable_summaries_v2(proposal_deltas,"proposal_deltas") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: return scores, proposal_deltas,iou_logits else: return scores, proposal_deltas
def forward(self, x,scope="BoxPredictor"): with tf.variable_scope(scope): if not isinstance(x,tf.Tensor) and isinstance(x,Iterable): if self.cfg.MODEL.ROI_HEADS.PRED_IOU: assert len(x)==3, "error x length." else: assert len(x) == 2, "error x length." if len(x[0].get_shape()) == 2: scores = slim.fully_connected(x[0],self.num_classes+1,activation_fn=None, normalizer_fn=None,scope="cls_score") else: scores = slim.conv2d(x[0], self.num_classes + 1, [1,1], activation_fn=None, normalizer_fn=None, scope="cls_score") scores = tf.reduce_mean(scores,axis=[1,2],keepdims=False, name="cls_score") foreground_num_classes = self.num_classes num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes if len(x[1].get_shape()) == 2: proposal_deltas = slim.fully_connected(x[1],self.box_dim*num_bbox_reg_classes,activation_fn=None, normalizer_fn=None,scope="bbox_pred") else: proposal_deltas = slim.conv2d(x[1], self.box_dim*num_bbox_reg_classes, [1,1], activation_fn=None, normalizer_fn=None, scope="bbox_pred") proposal_deltas = tf.reduce_mean(proposal_deltas,axis=[1,2],keepdims=False, name="bbox_pred") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: if len(x[2].get_shape()) == 2: if btf.channel(x[2]) != 1: iou_logits = slim.fully_connected(x[2], 1, activation_fn=None, normalizer_fn=None, scope="iou_pred") else: iou_logits = x[2] else: iou_logits = slim.conv2d(x[2], 1, [1,1], activation_fn=None, normalizer_fn=None, scope="iou_pred") iou_logits = tf.reduce_mean(iou_logits,axis=[1,2], keepdims=False, name="iou_pred") else: if len(x.get_shape()) > 2: shape = wmlt.combined_static_and_dynamic_shape(x) x = tf.reshape(x,[shape[0],-1]) scores = slim.fully_connected(x,self.num_classes+1,activation_fn=None, normalizer_fn=None,scope="cls_score") foreground_num_classes = self.num_classes num_bbox_reg_classes = 1 if self.cls_agnostic_bbox_reg else foreground_num_classes proposal_deltas = slim.fully_connected(x,self.box_dim*num_bbox_reg_classes,activation_fn=None, normalizer_fn=None,scope="bbox_pred") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: iou_logits = slim.fully_connected(x,1, activation_fn=None, normalizer_fn=None, scope="iou_pred") wsummary.variable_summaries_v2(proposal_deltas,"proposal_deltas") if self.cfg.MODEL.ROI_HEADS.PRED_IOU: return scores, proposal_deltas,iou_logits else: return scores, proposal_deltas