def aux_branch(name, main_in, up_kernel, up_strides): ch = main_in.get_shape().as_list()[1] # NCHW with tf.variable_scope(name): # preserve the depth a = Conv2DTranspose('up', main_in, ch, up_kernel, strides=up_strides, padding='same', use_bias=True, activation=tf.identity) a = Conv2DTranspose('up4', a, ch, 5, strides=(1, 1), padding='valid', use_bias=True, activation=tf.identity) a = Conv2D('conv', a, self.nr_types if self.type_classification else self.nr_classes, 3, padding='valid', activation=tf.nn.relu) a = tf.layers.dropout(a, rate=0.5, seed=5, training=is_training) return a
def up_branch(name, main_in, aux_in, ch): with tf.variable_scope(name): a = Conv2DTranspose('up1', main_in, ch, 2, strides=(2, 2), padding='same', use_bias=True, activation=tf.identity) a = Conv2D('conv1', a, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) a = Conv2D('conv2', a, ch, 3, padding='valid', use_bias=True, activation=tf.nn.relu) # stride 1 is no different from normal 5x5 conv, 'valid' to gain extrapolated border pixels b1 = Conv2DTranspose('up2', a, ch, 5, strides=(1, 1), padding='valid', use_bias=True, activation=tf.identity) b2 = Conv2DTranspose('up3', aux_in, ch, 5, strides=(1, 1), padding='valid', use_bias=True, activation=tf.identity) b = tf.concat([b1, b2], axis=1) b = Conv2D('conv3', b, ch, 1, padding='same', use_bias=True, activation=tf.nn.relu) return b
def maskrcnn_upXconv_head(feature, num_category, num_convs, norm=None): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_category(int): num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits (N x num_category x 2s x 2s): """ assert norm in [None, 'GN'], norm l = feature with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu) if norm is not None: l = GroupNorm('gn{}'.format(k), l) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_category, 1) return l
def maskrcnn_upXconv_head(feature, num_category, seed_gen, num_convs, norm=None, fp16=False): """ Args: feature: roi feature maps, Num_boxes x NumChannel x H_roi x W_roi, num_category(int): Number of total classes num_convs (int): number of convolution layers norm (str or None): either None or 'GN' Returns: mask_logits: Num_boxes x num_category x (2 * H_roi) x (2 * W_roi) """ assert norm in [None, 'GN'], norm l = feature if fp16: l = tf.cast(l, tf.float16) with mixed_precision_scope(mixed=fp16): with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', seed=seed_gen.next(), distribution='untruncated_normal' if get_tf_version_tuple() >= (1, 12) else 'normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, cfg.MRCNN.HEAD_DIM, 3, activation=tf.nn.relu, seed=seed_gen.next()) if norm is not None: if fp16: l = tf.cast(l, tf.float32) l = GroupNorm('gn{}'.format(k), l) if fp16: l = tf.cast(l, tf.float16) l = Conv2DTranspose('deconv', l, cfg.MRCNN.HEAD_DIM, 2, strides=2, activation=tf.nn.relu, seed=seed_gen.next()) # 2x upsampling l = Conv2D('conv', l, num_category, 1, seed=seed_gen.next()) if fp16: l = tf.cast(l, tf.float32) return l
def maskrcnn_upXconv_head(feature, num_class, num_convs): """ Args: feature (NxCx s x s): size is 7 in C4 models and 14 in FPN models. num_classes(int): num_category + 1 num_convs (int): number of convolution layers Returns: mask_logits (N x num_category x 2s x 2s): """ l = feature with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out for k in range(num_convs): l = Conv2D('fcn{}'.format(k), l, config.MASKRCNN_HEAD_DIM, 3, activation=tf.nn.relu) l = Conv2DTranspose('deconv', l, config.MASKRCNN_HEAD_DIM, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_class - 1, 1) return l
def preresnet_basicblock( l: tf.Tensor, ch_out: int, stride: int, preact: str, isDownsampling: bool, dilation: int = 1, withDropout: bool = False, ): l, shortcut = apply_preactivation("p1", l, preact) if isDownsampling: l = Conv2D("conv1", l, ch_out, 3, strides=stride, dilation_rate=dilation) else: l = Conv2DTranspose("tconv1", l, ch_out, 3, stride=stride) if withDropout: l = Dropout(l) l, _ = apply_preactivation("p2", l, preact) l = Conv2D("conv2", l, ch_out, 3, dilation_rate=dilation) return l + resnet_shortcut(shortcut, ch_out, stride, isDownsampling)
def resnet_shortcut(l: tf.Tensor, n_out: int, stride: int, isDownsampling: bool, activation=tf.identity): data_format = get_arg_scope()["Conv2D"]["data_format"] n_in = l.get_shape().as_list()[1 if data_format in ["NCHW", "channels_first"] else 3] if n_in != n_out or stride != 1: # change dimension when channel is not the same if isDownsampling: return Conv2D("convshortcut", l, n_out, 1, strides=stride, activation=activation) else: return Conv2DTranspose("convshortcut", l, n_out, 1, strides=stride, activation=activation) else: return l
def up_conv_block(name, l, shorcut, channel, nr_blks, stride=2): with tf.variable_scope(name): if stride != 1: up_channel = l.get_shape().as_list()[1] # NCHW assert stride == 2, 'U-Net supports stride 2 up-sample only' l = Conv2DTranspose('deconv', l, up_channel, 2, strides=2) l = tf.concat([l, shorcut], axis=1) for idx in range(0, nr_blks): l = Conv2D('conv_%d' % idx, l, channel, 3, padding='valid', strides=1, activation=BNReLU) return l
def maskrcnn_head(feature, num_class): """ Args: feature (NxCx7x7): num_classes(int): num_category + 1 Returns: mask_logits (N x num_category x 14 x 14): """ with argscope([Conv2D, Conv2DTranspose], data_format='channels_first', kernel_initializer=tf.variance_scaling_initializer( scale=2.0, mode='fan_out', distribution='normal')): # c2's MSRAFill is fan_out l = Conv2DTranspose('deconv', feature, 256, 2, strides=2, activation=tf.nn.relu) l = Conv2D('conv', l, num_class - 1, 1) return l
def network_architecture(self, *args): loss_img, diffuse, specular, roughness, normal, depth, mask, sgs = args batch_size = tf.shape(loss_img)[0] layers_needed = 3 with argscope( [Conv2D, Conv2DTranspose, BatchNorm], data_format="channels_last" ): with tf.variable_scope("refine_net"): with tf.variable_scope("prepare"): onesTensor = tf.ones_like(mask[:, :, :, 0:1]) sgs_expanded = tf.reshape( sgs, [-1, 1, 1, sgs.shape[1] * sgs.shape[2]] ) sgs_to_add = onesTensor * sgs_expanded brdfInput = tf.concat( [ loss_img, diffuse, specular, roughness, normal, depth, sgs_to_add, mask[:, :, :, 0:1], ], axis=-1, name="input_stack", ) with tf.variable_scope("enc"): l = brdfInput skips = [] for i in range(layers_needed): skips.append(l) l = Conv2D( "conv%d" % (i + 1), l, min(self.base_nf * (2 ** i), 512), 4, strides=2, activation=INReLU, ) ####=============#### ####RESNET Blocks#### ####=============#### resnet_blocks = 4 l = preresnet_group( "resnet_blocks", l, preresnet_basicblock, 256, resnet_blocks, 1, True, ) ####==============#### ####Start Decoding#### ####==============#### with tf.variable_scope("dec"): for i in range(layers_needed): with tf.variable_scope("up%d" % (i + 1)): inv_i = layers_needed - i nf = min(self.base_nf * (2 ** (inv_i - 1)), 512) l = Conv2DTranspose( "tconv%d" % (i + 1), l, nf, 4, strides=2, activation=INReLU, ) l = tf.concat( [l, skips[inv_i - 1]], -1, name="skip%d" % (i + 1) ) l = Conv2D("conv%d" % (i + 1), l, nf, 3, activation=INReLU) params = Conv2D("output", l, 11, 5, activation=tf.nn.sigmoid) with tf.variable_scope("refine_predictions"): diffuse = tf.clip_by_value(params[:, :, :, 0:3], 0.0, 1.0) specular = tf.identity( tf.clip_by_value(params[:, :, :, 3:6], 0.0, 1.0) * mask, "specular" ) # Ensure energy conversation diffuse = tf.identity( (diffuse * (tf.ones_like(diffuse) - specular)) * mask, "diffuse" ) roughness = tf.identity( tf.clip_by_value(params[:, :, :, 6:7], 0.004, 1.0) * mask[:, :, :, 0:1], "roughness", ) normal = tf.identity( tf.clip_by_value(params[:, :, :, 7:10], 0.0, 1.0) * mask, "normal" ) depth = tf.identity( tf.clip_by_value(params[:, :, :, 10:11], 0.0, 1.0) * mask[:, :, :, 0:1], "depth", ) return (diffuse, specular, roughness, normal, depth)