def build(self): print("Building the ShuffleNet..") with tf.variable_scope('shufflenet_encoder'): with tf.name_scope('Pre_Processing'): red, green, blue = tf.split(self.x_input, num_or_size_splits=3, axis=3) preprocessed_input = tf.concat([ tf.subtract(blue, ShuffleNet.MEAN[0]) / tf.constant(255.0), tf.subtract(green, ShuffleNet.MEAN[1]) / tf.constant(255.0), tf.subtract(red, ShuffleNet.MEAN[2]) / tf.constant(255.0), ], 3) self.conv1 = conv2d('conv1', x=preprocessed_input, w=None, num_filters=self.output_channels['conv1'], kernel_size=(3, 3), stride=(2, 2), l2_strength=self.wd, bias=self.bias, batchnorm_enabled=self.batchnorm_enabled, is_training=self.train_flag, activation=tf.nn.relu, padding='VALID') _debug(self.conv1) padded = tf.pad(self.conv1, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.max_pool = max_pool_2d(padded, size=(3, 3), stride=(2, 2), name='max_pool') _debug(self.max_pool) self.stage2 = self.stage(self.max_pool, stage=2, repeat=3) _debug(self.stage2) self.stage3 = self.stage(self.stage2, stage=3, repeat=7) _debug(self.stage3) self.stage4 = self.stage(self.stage3, stage=4, repeat=3) _debug(self.stage4) self.feed1 = self.stage3 self.feed2 = self.stage2 # First Experiment is to use the regular conv2d self.score_fr = conv2d('conv_1c_1x1', self.stage4, num_filters=self.num_classes, l2_strength=self.wd, kernel_size=(1, 1)) print("\nEncoder ShuffleNet is built successfully\n\n")
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscore_2s'): shape = self.encoder.score_fr.shape.as_list()[1:3] upscore2_upsample = tf.image.resize_images( self.encoder.score_fr, (2 * shape[0], 2 * shape[1])) self.upscore2 = conv2d('upscore2', x=upscore2_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): shape = self.fuse_feed1.shape.as_list()[1:3] upscore4_upsample = tf.image.resize_images( self.fuse_feed1, (2 * shape[0], 2 * shape[1])) self.upscore4 = conv2d('upscore4', x=upscore4_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): shape = self.fuse_feed2.shape.as_list()[1:3] upscore8_upsample = tf.image.resize_images( self.fuse_feed2, (8 * shape[0], 8 * shape[1])) self.upscore8 = conv2d('upscore8', x=upscore8_upsample, num_filters=self.params.num_classes, l2_strength=self.encoder.wd) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.encoder = VGG16(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() _debug(self.encoder.score_fr) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose('upscore2', x=self.encoder.score_fr, output_shape=self.encoder.feed1.shape.as_list()[0:3] + [ self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscore2) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) _debug(self.fuse_feed1) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose('upscore4', x=self.fuse_feed1, output_shape=self.encoder.feed2.shape.as_list()[0:3] + [ self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscore4) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) _debug(self.fuse_feed2) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose('upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) _debug(self.upscore8) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() with tf.name_scope('dilation_2'): self.stage3 = self.encoder.stage(self.encoder.stage2, stage=3, repeat=7, dilation=2) _debug(self.stage3) self.stage4 = self.encoder.stage(self.stage3, stage=4, repeat=3, dilation=4) _debug(self.stage4) self.score_fr = conv2d('score_fr_dil', x=self.stage4, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.score_fr) if self.targets_resize < 8: self.targets_resize = 8 // self.targets_resize self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.y_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(self.targets_resize * 2, self.targets_resize * 2), stride=(self.targets_resize, self.targets_resize), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8 else: self.logits = self.score_fr
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = RESNET18(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): with tf.variable_scope('conv4_x_dil'): self.conv4 = self.encoder._residual_block('conv4_1_dil', self.encoder.conv3, 256, pool_first=False, strides=1, dilation= 2) _debug(self.conv4) self.conv4 = self.encoder._residual_block('conv4_2_dil', self.conv4, 256) _debug(self.conv4) with tf.variable_scope('conv5_x_dil'): self.conv5 = self.encoder._residual_block('conv5_1_dil', self.conv4, 512, pool_first=False, strides=1, dilation=4) _debug(self.conv5) self.conv5 = self.encoder._residual_block('conv5_2_dil', self.conv5, 512) _debug(self.conv5) self.score_fr = conv2d('score_fr_dil', x=self.conv5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training ) _debug(self.score_fr) self.upscore8 = conv2d_transpose('upscore8', x=self.score_fr, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd, is_training= self.is_training) _debug(self.upscore8) self.logits= self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.app_encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = ShuffleNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.score_fr, self.motion_encoder.score_fr) self.combined_feed1 = tf.multiply(self.app_encoder.feed1, self.motion_encoder.feed1) self.combined_feed2 = tf.multiply(self.app_encoder.feed2, self.motion_encoder.feed2) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.combined_score, output_shape=self.combined_feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore2') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.score_feed1 = conv2d( 'score_feed1', x=self.combined_feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) currvars = get_vars_underscope(tf.get_variable_scope().name, 'score_feed1') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.combined_feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore4') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.score_feed2 = conv2d( 'score_feed2', x=self.combined_feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) currvars = get_vars_underscope(tf.get_variable_scope().name, 'score_feed2') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore8') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.logits = self.upscore8
def load_dense_layer(reduced_flag, x, name, pretrained_weights, num_classes=20, activation=None, dropout=-1, train=False, trainable=True, l2_strength=0.0): """ Load fully connected layers from pretrained weights in case of full vgg in case of reduced vgg initialize randomly """ if not reduced_flag: if name == 'fc6': w = get_dense_weight_reshape(name, pretrained_weights, [7, 7, 512, 4096], trainable=trainable) elif name == 'score_fr': name = 'fc8' w = get_dense_weight_reshape(name, pretrained_weights, [1, 1, 4096, 1000], num_classes=num_classes, trainable=trainable) else: w = get_dense_weight_reshape(name, pretrained_weights, [1, 1, 4096, 4096], trainable=trainable) biases = load_bias(name, pretrained_weights, num_classes=num_classes, trainable=trainable) return conv2d(name, x=x, w=w, l2_strength=l2_strength, bias=biases, activation=activation, dropout_keep_prob=dropout, is_training=train) else: if name == 'fc6': num_channels = 512 kernel_size = (7, 7) elif name == 'score_fr': name = 'fc8' num_channels = num_classes kernel_size = (1, 1) else: num_channels = 512 kernel_size = (1, 1) return conv2d(name, x=x, num_filters=num_channels, kernel_size=kernel_size, l2_strength=l2_strength, activation=activation, dropout_keep_prob=dropout, is_training=train)
def encoder_build(self): print("Building the MobileNet..") with tf.variable_scope('mobilenet_encoder'): with tf.name_scope('Pre_Processing'): red, green, blue = tf.split(self.x_input, num_or_size_splits=3, axis=3) preprocessed_input = tf.concat([ (blue - MobileNet.MEAN[0]) / 255.0, (green - MobileNet.MEAN[1]) / 255.0, (red - MobileNet.MEAN[2]) / 255.0, ], 3) self.conv1_1 = conv2d('conv_1', preprocessed_input, num_filters=int(round(32 * self.width_multiplier)), kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv1_1) self.conv2_1 = depthwise_separable_conv2d('conv_ds_2', self.conv1_1, width_multiplier=self.width_multiplier, num_filters=64, kernel_size=(3, 3), padding='SAME', stride=(1, 1), batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd, activation=tf.nn.relu6) self._debug(self.conv2_1) self.conv2_2 = depthwise_separable_conv2d('conv_ds_3', self.conv2_1, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv2_2) self.conv3_1 = depthwise_separable_conv2d('conv_ds_4', self.conv2_2, width_multiplier=self.width_multiplier, num_filters=128, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv3_1) self.conv3_2 = depthwise_separable_conv2d('conv_ds_5', self.conv3_1, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv3_2) self.conv4_1 = depthwise_separable_conv2d('conv_ds_6', self.conv3_2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv4_1) self.conv4_2 = depthwise_separable_conv2d('conv_ds_7', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d('conv_ds_8', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d('conv_ds_9', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d('conv_ds_10', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d('conv_ds_11', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d('conv_ds_12', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_5) self.conv5_6 = depthwise_separable_conv2d('conv_ds_13', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d('conv_ds_14', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.train_flag, l2_strength=self.wd) self._debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1', self.conv6_1, num_filters=self.num_classes, l2_strength=self.wd, kernel_size=(1, 1)) self._debug(self.score_fr) self.feed1 = self.conv4_2 self.feed2 = self.conv3_2 print("\nEncoder MobileNet is built successfully\n\n")
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): self.conv4_2 = atrous_conv2d('conv_ds_7_dil', self.encoder.conv4_1, num_filters=512, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=2, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_dil', self.conv4_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_dil', self.conv5_1, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_dil', self.conv5_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_dil', self.conv5_3, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_dil', self.conv5_4, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_5) self.conv5_6 = atrous_conv2d('conv_ds_13_dil', self.conv5_5, num_filters=1024, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=4, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_dil', self.conv5_6, width_multiplier=self.encoder.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_dil', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.wd, kernel_size=(1, 1)) _debug(self.score_fr) if self.targets_resize < 8: self.targets_resize = 8 // self.targets_resize self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.y_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(self.targets_resize * 2, self.targets_resize * 2), stride=(self.targets_resize, self.targets_resize), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8 else: self.logits = self.score_fr
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.encoder = VGG16(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.upscale1 = conv2d_transpose( 'upscale0', x=self.encoder.conv5_3, output_shape=self.encoder.conv4_3.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale1) self.concat1 = tf.add(self.upscale1, self.encoder.conv4_3) _debug(self.concat1) self.expand11 = conv2d( 'expand1_1', x=self.concat1, num_filters=self.encoder.conv4_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand11) self.expand12 = conv2d( 'expand1_2', x=self.expand11, num_filters=self.encoder.conv4_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand12) with tf.name_scope('upscale_2'): self.upscale2 = conv2d_transpose( 'upscale2', x=self.expand12, output_shape=self.encoder.conv3_3.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale2) self.concat2 = tf.add(self.upscale2, self.encoder.conv3_3) _debug(self.concat2) self.expand21 = conv2d( 'expand2_1', x=self.concat2, num_filters=self.encoder.conv3_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand21) self.expand22 = conv2d( 'expand2_2', x=self.expand21, num_filters=self.encoder.conv3_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand22) with tf.name_scope('upscale_3'): self.upscale3 = conv2d_transpose( 'upscale3', x=self.expand22, output_shape=self.encoder.conv2_2.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale3) self.concat3 = tf.add(self.upscale3, self.encoder.conv2_2) _debug(self.concat3) self.expand31 = conv2d( 'expand3_1', x=self.concat3, num_filters=self.encoder.conv2_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand31) self.expand32 = conv2d( 'expand3_2', x=self.expand31, num_filters=self.encoder.conv2_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand32) with tf.name_scope('upscale_4'): self.upscale4 = conv2d_transpose( 'upscale4', x=self.expand32, output_shape=self.encoder.conv1_2.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale4) self.concat4 = tf.add(self.upscale4, self.encoder.conv1_2) _debug(self.concat4) self.expand41 = conv2d( 'expand4_1', x=self.concat4, num_filters=self.encoder.conv1_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand41) self.expand42 = conv2d( 'expand4_2', x=self.expand41, num_filters=self.encoder.conv1_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose( 'upscale5', x=self.expand42, output_shape=self.encoder.conv1_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale5) self.concat5 = tf.add(self.upscale5, self.encoder.conv1_1) _debug(self.concat5) self.expand51 = conv2d( 'expand5_1', x=self.concat5, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand51) self.expand52 = conv2d( 'expand5_2', x=self.expand51, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand52) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand52, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) _debug(self.fscore) self.logits = self.fscore
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() print("Building the Decoder FCN8s..") # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.encoder.score_fr, output_shape=self.encoder.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) self._debug(self.fuse_feed1) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, output_shape=self.encoder.feed2.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) self._debug(self.fuse_feed2) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) self._debug(self.upscore8) self.logits = self.upscore8 print("\nDecoder FCN8s is built successfully\n\n")
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.app_encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, mean_path=self.args.data_dir + 'mean.npy', train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) self.motion_encoder = MobileNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, mean_path=self.args.data_dir + 'flo_mean.npy', train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.feed2 = tf.multiply(self.app_encoder.conv3_2, self.motion_encoder.conv3_2) self.width_multiplier = 1.0 self.conv4_1 = depthwise_separable_conv2d( 'conv_ds_6_1', self.feed2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv4_1) self.conv4_2 = depthwise_separable_conv2d( 'conv_ds_7_1', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_1', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_1', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_1', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_1', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_1', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_5) self.conv5_6 = depthwise_separable_conv2d( 'conv_ds_13_1', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_1', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_1', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.args.weight_decay, kernel_size=(1, 1)) self.feed1 = self.conv4_2 # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.score_fr, output_shape=self.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], is_training=self.is_training, kernel_size=(16, 16), stride=(8, 8), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore8) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.expand11 = conv2d('expand1_1', x=self.encoder.stage4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose('upscale1', x=self.expand11, is_training=self.is_training, output_shape=self.encoder.stage3.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.stage3) self._debug(self.add1) self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose('upscale2', x=self.expand21, is_training=self.is_training, output_shape=self.encoder.stage2.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.stage2) self._debug(self.add2) self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand31) self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=True, is_training=self.is_training, output_shape=[self.encoder.max_pool.shape[0], self.encoder.max_pool.shape.as_list()[1] + 1, self.encoder.max_pool.shape.as_list()[2] + 1, self.encoder.max_pool.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale3) padded = tf.pad(self.encoder.max_pool, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.add3 = tf.add(self.upscale3, padded) self._debug(self.add3) self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand41) self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=True, is_training=self.is_training, output_shape=[self.encoder.conv1.shape[0], self.encoder.conv1.shape.as_list()[1] + 1, self.encoder.conv1.shape.as_list()[2] + 1, self.encoder.conv1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale4) padded2 = tf.pad(self.encoder.conv1, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.add4 = tf.add(self.upscale4, padded2) self._debug(self.add4) self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [ self.encoder.conv1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), dropout_keep_prob=0.5, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.app_encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = ShuffleNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.stage2, self.motion_encoder.stage2) # self.combined_score= tf.concat((self.app_encoder.stage2, self.motion_encoder.stage2), axis=3) # _debug(self.combined_score) # self.combined_score = conv2d('combined_score', self.combined_score, num_filters= 240, l2_strength=self.args.weight_decay, # kernel_size=(1, 1)) self.stage3 = self.app_encoder.stage(self.combined_score, stage=3, repeat=7) _debug(self.stage3) self.stage4 = self.app_encoder.stage(self.stage3, stage=4, repeat=3) _debug(self.stage4) self.feed1 = self.stage3 self.feed2 = self.combined_score # First Experiment is to use the regular conv2d self.score_fr = conv2d('combined_conv_1c_1x1', self.stage4, num_filters=self.params.num_classes, l2_strength=self.args.weight_decay, kernel_size=(1, 1)) _debug(self.score_fr) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.score_fr, output_shape=self.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], is_training=self.is_training, kernel_size=(16, 16), stride=(8, 8), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore8) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.app_encoder = VGG16(x_input=self.x_pl, prefix='app_', num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = VGG16(x_input=self.flo_pl, prefix='mot_', num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.score_fr, self.motion_encoder.score_fr) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.combined_score, output_shape=self.app_encoder.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) self.app_score_feed1 = conv2d( 'app_score_feed1', x=self.app_encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) self.app_score_feed1 = tf.nn.relu(self.app_score_feed1) self.mot_score_feed1 = conv2d( 'mot_score_feed1', x=self.motion_encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.motion_encoder.wd) self.mot_score_feed1 = tf.nn.relu(self.mot_score_feed1) self.score_feed1 = tf.multiply(self.app_score_feed1, self.mot_score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.app_encoder.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) self.app_score_feed2 = conv2d( 'app_score_feed2', x=self.app_encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) self.app_score_feed2 = tf.nn.relu(self.app_score_feed2) self.mot_score_feed2 = conv2d( 'mot_score_feed2', x=self.motion_encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.motion_encoder.wd) self.mot_score_feed2 = tf.nn.relu(self.mot_score_feed2) self.score_feed2 = tf.multiply(self.app_score_feed2, self.mot_score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.app_encoder.wd, bias=self.args.bias) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.expand11 = conv2d( 'expand1_1', x=self.encoder.conv5_6, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose( 'upscale1', x=self.expand11, is_training=self.is_training, output_shape=self.encoder.conv5_5.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.conv5_5) self._debug(self.add1) self.expand12 = conv2d( 'expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): self.expand21 = conv2d( 'expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose( 'upscale2', x=self.expand21, is_training=self.is_training, output_shape=self.encoder.conv4_1.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.conv4_1) self._debug(self.add2) self.expand22 = conv2d( 'expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): self.expand31 = conv2d( 'expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand31) self.upscale3 = conv2d_transpose( 'upscale3', x=self.expand31, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.encoder.conv3_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale3) self.add3 = tf.add(self.upscale3, self.encoder.conv3_1) self._debug(self.add3) self.expand32 = conv2d( 'expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): self.expand41 = conv2d( 'expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand41) self.upscale4 = conv2d_transpose( 'upscale4', x=self.expand41, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.encoder.conv2_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale4) self.add4 = tf.add(self.upscale4, self.encoder.conv2_1) self._debug(self.add4) self.expand42 = conv2d( 'expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose( 'upscale5', x=self.expand42, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [self.encoder.conv2_1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) self.expand5 = conv2d( 'expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(1, 1), dropout_keep_prob=0.5, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.encoder = VGG16(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): self.conv4_3_dil = conv2d('conv4_3_dil', x=self.encoder.conv4_2, num_filters=512, kernel_size=(3, 3), activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.conv5_1_dil = atrous_conv2d('conv5_1_dil', x=self.conv4_3_dil, num_filters=512, kernel_size=(3, 3), dilation_rate=2, activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.conv5_2_dil = atrous_conv2d('conv5_2_dil', x=self.conv5_1_dil, num_filters=512, kernel_size=(3, 3), dilation_rate=2, activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.conv5_3_dil = atrous_conv2d('conv5_3_dil', x=self.conv5_2_dil, num_filters=512, kernel_size=(3, 3), dilation_rate=2, activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.fc6_dil = atrous_conv2d('fc6_dil', x=self.conv5_3_dil, num_filters=1024, kernel_size=(7, 7), dilation_rate=4, activation=tf.nn.relu, l2_strength=self.encoder.wd, dropout_keep_prob=0.5, is_training=self.is_training) self.fc7_dil = conv2d('fc7_dil', x=self.fc6_dil, num_filters=1024, kernel_size=(1, 1), activation=tf.nn.relu, dropout_keep_prob=0.5, l2_strength=self.encoder.wd, is_training=self.is_training) self.score_fr = conv2d('score_fr_dil', x=self.fc7_dil, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) self.logits = self.upscore8