def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_category(int): num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits (N x num_category x 2s x 2s): """ assert norm in [None, 'GN'], norm l = feature with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_category, 1) return l
def fastrcnn_Xconv1fc_head(feature, num_convs, norm=None): """ Args: feature (NCHW): num_classes(int): num_category + 1 num_convs (int): number of conv layers norm (str or None): either None or 'GN' Returns: 2D head feature """ assert norm in [None, 'GN'], norm l = feature with argscope(Conv2D, data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): for k in range(num_convs): l = Conv2D('conv{}'.format(k), l, cfg.FPN.FRCNN_CONV_HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = FullyConnected( 'fc', l, cfg.FPN.FRCNN_FC_HEAD_DIM, kernel_initializer=tf.variance_scaling_initializer(), activation=tf.nn.relu) return l
def fpn_model(features): """ Args: features ([tf.Tensor]): ResNet features c2-c5 Returns: [tf.Tensor]: FPN features p2-p6 """ assert len(features) == 4, features num_channel = cfg.FPN.NUM_CHANNEL use_gn = cfg.FPN.NORM == 'GN' def upsample2x(name, x): return FixedUnPooling( name, x, 2, unpool_mat=np.ones((2, 2), dtype='float32'), data_format='channels_first') # tf.image.resize is, again, not aligned. # with tf.name_scope(name): # shape2d = tf.shape(x)[2:] # x = tf.transpose(x, [0, 2, 3, 1]) # x = tf.image.resize_nearest_neighbor(x, shape2d * 2, align_corners=True) # x = tf.transpose(x, [0, 3, 1, 2]) # return x with argscope(Conv2D, data_format='channels_first', activation=tf.identity, use_bias=True, kernel_initializer=tf.variance_scaling_initializer(scale=1.)): lat_2345 = [Conv2D('lateral_1x1_c{}'.format(i + 2), c, num_channel, 1) for i, c in enumerate(features)] if use_gn: lat_2345 = [GroupNorm('gn_c{}'.format(i + 2), c) for i, c in enumerate(lat_2345)] lat_sum_5432 = [] for idx, lat in enumerate(lat_2345[::-1]): if idx == 0: lat_sum_5432.append(lat) else: lat = lat + upsample2x('upsample_lat{}'.format(6 - idx), lat_sum_5432[-1]) lat_sum_5432.append(lat) p2345 = [Conv2D('posthoc_3x3_p{}'.format(i + 2), c, num_channel, 3) for i, c in enumerate(lat_sum_5432[::-1])] if use_gn: p2345 = [GroupNorm('gn_p{}'.format(i + 2), c) for i, c in enumerate(p2345)] p6 = MaxPooling('maxpool_p6', p2345[-1], pool_size=1, strides=2, data_format='channels_first', padding='VALID') return p2345 + [p6]