Exemplo n.º 1
0
def dense(x, fmaps, gain=1, use_wscale=True, lrmul=1, name='dense'):
    shape = x.get_shape().as_list()
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        if len(shape) > 2:
            cin = np.prod(shape[1:])
            x = tf.reshape(x, [-1, cin])
        else:
            assert len(shape) == 2
            cin = shape[1]
        w = get_weight([cin, fmaps],
                       gain=gain,
                       use_wscale=use_wscale,
                       lrmul=lrmul,
                       name='W')
        return tf.matmul(x, tf.cast(w, x.dtype)), w
Exemplo n.º 2
0
    def build_graph(self, *inputs):
        inputs = dict(zip(self.input_names, inputs))
        if "gt_masks_packed" in inputs:
            gt_masks = tf.cast(unpackbits_masks(inputs.pop("gt_masks_packed")),
                               tf.uint8,
                               name="gt_masks")
            inputs["gt_masks"] = gt_masks

        image = self.preprocess(inputs['image'])  # 1CHW

        features = self.backbone(image)
        anchor_inputs = {
            k: v
            for k, v in inputs.items() if k.startswith('anchor_')
        }
        proposals, rpn_losses = self.rpn(image, features,
                                         anchor_inputs)  # inputs?

        targets = [
            inputs[k] for k in ['gt_boxes', 'gt_labels', 'gt_masks']
            if k in inputs
        ]
        gt_boxes_area = tf.reduce_mean(tf_area(inputs["gt_boxes"]),
                                       name='mean_gt_box_area')
        add_moving_summary(gt_boxes_area)
        head_losses = self.roi_heads(image, features, proposals, targets)

        if self.training:
            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.TRAIN.WEIGHT_DECAY),
                                      name='wd_cost')
            total_cost = tf.add_n(rpn_losses + head_losses + [wd_cost],
                                  'total_cost')
            add_moving_summary(total_cost, wd_cost)
            return total_cost
        else:
            # Check that the model defines the tensors it declares for inference
            # For existing models, they are defined in "fastrcnn_predictions(name_scope='output')"
            G = tf.get_default_graph()
            ns = G.get_name_scope()
            for name in self.get_inference_tensor_names()[1]:
                try:
                    name = '/'.join([ns, name]) if ns else name
                    G.get_tensor_by_name(name + ':0')
                except KeyError:
                    raise KeyError(
                        "Your model does not define the tensor '{}' in inference context."
                        .format(name))
Exemplo n.º 3
0
def mod_conv2d(x,
               y,
               fmaps,
               kernel,
               demodulate=True,
               gain=1,
               use_wscale=True,
               lrmul=1,
               fused_modconv=True,
               eps=1e-8,
               padding='SAME',
               name="mod_conv2d"):
    shape = x.get_shape().as_list()  # [n, h, w, c]
    cin = shape[-1]
    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        # Get weight
        w = get_weight([kernel, kernel, cin, fmaps],
                       gain=gain,
                       use_wscale=use_wscale,
                       lrmul=lrmul,
                       name='W')
        ww = w[tf.newaxis]  # introduce minibatch dimension

        # Modulate
        s = get_bias(
            cin, base_std=0, use_wscale=use_wscale, lrmul=lrmul, name='bs') + 1
        vh = VariableHolder(W=w, bs=s)
        s = tf.tile(s[tf.newaxis],
                    [tf.shape(x)[0], 1])  # introduce minibatch dimension
        if y is not None:
            y_style, w_style = dense(y,
                                     cin,
                                     gain=gain,
                                     use_wscale=use_wscale,
                                     lrmul=lrmul)
            s = s + y_style
            vh.Ws = w_style
        ww = ww * tf.cast(s[:, tf.newaxis, tf.newaxis, :, tf.newaxis],
                          w.dtype)  # scale input feature maps

        # Demodulate
        if demodulate:
            d = tf.rsqrt(
                tf.reduce_sum(tf.square(ww), axis=[1, 2, 3], keepdims=True) +
                eps)  # scaling factor
            ww = ww * d

        # Reshape/scale input
        if fused_modconv:
            x = tf.reshape(tf.transpose(x, [0, 3, 1, 2]),
                           [1, -1, shape[1], shape[2]])  # [1, n*cin, h, w]
            w = tf.reshape(tf.transpose(ww, [1, 2, 3, 0, 4]),
                           [kernel, kernel, cin, -1])  # [k, k, cin, n*cout]
            x = tf.nn.conv2d(x,
                             tf.cast(w, x.dtype),
                             data_format='NCHW',
                             strides=[1, 1, 1, 1],
                             padding=padding)
            out_shape = x.get_shape().as_list()
            x = tf.transpose(
                tf.reshape(x, [-1, fmaps, out_shape[2], out_shape[3]]),
                [0, 2, 3, 1])
        else:
            x = x * tf.cast(s[:, tf.newaxis, tf.newaxis, :], x.dtype)
            x = tf.nn.conv2d(x,
                             tf.cast(w, x.dtype),
                             data_format='NHWC',
                             strides=[1, 1, 1, 1],
                             padding=padding)
            if demodulate:
                x = x * tf.cast(tf.reshape(d, [-1, 1, 1, fmaps]), x.dtype)
        ret = tf.identity(x)
        ret.variables = vh
        return ret
Exemplo n.º 4
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        assert len(features) == 5, "Features have to be P23456!"
        gt_boxes, gt_labels, *_ = targets

        if self.training:
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)

        fastrcnn_head_func = getattr(model_frcnn, cfg.FPN.FRCNN_HEAD_FUNC)
        if not cfg.FPN.CASCADE:
            roi_feature_fastrcnn = multilevel_roi_align(
                features[:4], proposals.boxes, 7)

            head_feature = fastrcnn_head_func('fastrcnn', roi_feature_fastrcnn)
            fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
                'fastrcnn/outputs', head_feature, cfg.DATA.NUM_CATEGORY)
            fastrcnn_head = FastRCNNHead(
                proposals, fastrcnn_box_logits, fastrcnn_label_logits,
                gt_boxes,
                tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))
        else:

            def roi_func(boxes):
                return multilevel_roi_align(features[:4], boxes, 7)

            fastrcnn_head = CascadeRCNNHead(proposals, roi_func,
                                            fastrcnn_head_func,
                                            (gt_boxes, gt_labels),
                                            image_shape2d,
                                            cfg.DATA.NUM_CATEGORY)

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4],
                    proposals.fg_boxes(),
                    14,
                    name_scope='multilevel_roi_align_mask')
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    28,
                    pad_border=False)  # fg x 1x28x28
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')
            if cfg.MODE_MASK:
                # Cascade inference needs roi transform with refined boxes.
                roi_feature_maskrcnn = multilevel_roi_align(
                    features[:4], final_boxes, 14)
                maskrcnn_head_func = getattr(model_mrcnn,
                                             cfg.FPN.MRCNN_HEAD_FUNC)
                mask_logits = maskrcnn_head_func(
                    'maskrcnn', roi_feature_maskrcnn,
                    cfg.DATA.NUM_CATEGORY)  # #fg x #cat x 28 x 28
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.cast(final_labels, tf.int32) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx28x28
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []
Exemplo n.º 5
0
    def roi_heads(self, image, features, proposals, targets):
        image_shape2d = tf.shape(image)[2:]  # h,w
        featuremap = features[0]

        gt_boxes, gt_labels, *_ = targets

        if self.training:
            # sample proposal boxes in training
            proposals = sample_fast_rcnn_targets(proposals.boxes, gt_boxes,
                                                 gt_labels)
        # The boxes to be used to crop RoIs.
        # Use all proposal boxes in inference

        boxes_on_featuremap = proposals.boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE)
        roi_resized = roi_align(featuremap, boxes_on_featuremap, 14)

        feature_fastrcnn = resnet_conv5(
            roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])  # nxcx7x7
        # Keep C5 feature to be shared with mask branch
        feature_gap = GlobalAvgPooling('gap',
                                       feature_fastrcnn,
                                       data_format='channels_first')
        fastrcnn_label_logits, fastrcnn_box_logits = fastrcnn_outputs(
            'fastrcnn', feature_gap, cfg.DATA.NUM_CATEGORY)

        fastrcnn_head = FastRCNNHead(
            proposals, fastrcnn_box_logits, fastrcnn_label_logits, gt_boxes,
            tf.constant(cfg.FRCNN.BBOX_REG_WEIGHTS, dtype=tf.float32))

        if self.training:
            all_losses = fastrcnn_head.losses()

            if cfg.MODE_MASK:
                gt_masks = targets[2]
                # maskrcnn loss
                # In training, mask branch shares the same C5 feature.
                fg_feature = tf.gather(feature_fastrcnn, proposals.fg_inds())
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', fg_feature, cfg.DATA.NUM_CATEGORY,
                    num_convs=0)  # #fg x #cat x 14x14

                target_masks_for_fg = crop_and_resize(
                    tf.expand_dims(gt_masks, 1),
                    proposals.fg_boxes(),
                    proposals.fg_inds_wrt_gt,
                    14,
                    pad_border=False)  # nfg x 1x14x14
                target_masks_for_fg = tf.squeeze(target_masks_for_fg, 1,
                                                 'sampled_fg_mask_targets')
                all_losses.append(
                    maskrcnn_loss(mask_logits, proposals.fg_labels(),
                                  target_masks_for_fg))
            return all_losses
        else:
            decoded_boxes = fastrcnn_head.decoded_output_boxes()
            decoded_boxes = clip_boxes(decoded_boxes,
                                       image_shape2d,
                                       name='fastrcnn_all_boxes')
            label_scores = fastrcnn_head.output_scores(
                name='fastrcnn_all_scores')
            final_boxes, final_scores, final_labels = fastrcnn_predictions(
                decoded_boxes, label_scores, name_scope='output')

            if cfg.MODE_MASK:
                roi_resized = roi_align(
                    featuremap, final_boxes * (1.0 / cfg.RPN.ANCHOR_STRIDE),
                    14)
                feature_maskrcnn = resnet_conv5(
                    roi_resized, cfg.BACKBONE.RESNET_NUM_BLOCKS[-1])
                mask_logits = maskrcnn_upXconv_head(
                    'maskrcnn', feature_maskrcnn, cfg.DATA.NUM_CATEGORY,
                    0)  # #result x #cat x 14x14
                indices = tf.stack([
                    tf.range(tf.size(final_labels)),
                    tf.cast(final_labels, tf.int32) - 1
                ],
                                   axis=1)
                final_mask_logits = tf.gather_nd(mask_logits,
                                                 indices)  # #resultx14x14
                tf.sigmoid(final_mask_logits, name='output/masks')
            return []