def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.encoder = VGG16(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() _debug(self.encoder.score_fr) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose('upscore2', x=self.encoder.score_fr, output_shape=self.encoder.feed1.shape.as_list()[0:3] + [ self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscore2) self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) _debug(self.fuse_feed1) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose('upscore4', x=self.fuse_feed1, output_shape=self.encoder.feed2.shape.as_list()[0:3] + [ self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscore4) self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) _debug(self.fuse_feed2) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose('upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) _debug(self.upscore8) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() with tf.name_scope('dilation_2'): self.stage3 = self.encoder.stage(self.encoder.stage2, stage=3, repeat=7, dilation=2) _debug(self.stage3) self.stage4 = self.encoder.stage(self.stage3, stage=4, repeat=3, dilation=4) _debug(self.stage4) self.score_fr = conv2d('score_fr_dil', x=self.stage4, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.score_fr) if self.targets_resize < 8: self.targets_resize = 8 // self.targets_resize self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.y_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(self.targets_resize * 2, self.targets_resize * 2), stride=(self.targets_resize, self.targets_resize), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8 else: self.logits = self.score_fr
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = RESNET18(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): with tf.variable_scope('conv4_x_dil'): self.conv4 = self.encoder._residual_block('conv4_1_dil', self.encoder.conv3, 256, pool_first=False, strides=1, dilation= 2) _debug(self.conv4) self.conv4 = self.encoder._residual_block('conv4_2_dil', self.conv4, 256) _debug(self.conv4) with tf.variable_scope('conv5_x_dil'): self.conv5 = self.encoder._residual_block('conv5_1_dil', self.conv4, 512, pool_first=False, strides=1, dilation=4) _debug(self.conv5) self.conv5 = self.encoder._residual_block('conv5_2_dil', self.conv5, 512) _debug(self.conv5) self.score_fr = conv2d('score_fr_dil', x=self.conv5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training ) _debug(self.score_fr) self.upscore8 = conv2d_transpose('upscore8', x=self.score_fr, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd, is_training= self.is_training) _debug(self.upscore8) self.logits= self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.app_encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = ShuffleNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.score_fr, self.motion_encoder.score_fr) self.combined_feed1 = tf.multiply(self.app_encoder.feed1, self.motion_encoder.feed1) self.combined_feed2 = tf.multiply(self.app_encoder.feed2, self.motion_encoder.feed2) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.combined_score, output_shape=self.combined_feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore2') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.score_feed1 = conv2d( 'score_feed1', x=self.combined_feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) currvars = get_vars_underscope(tf.get_variable_scope().name, 'score_feed1') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.combined_feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore4') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.score_feed2 = conv2d( 'score_feed2', x=self.combined_feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) currvars = get_vars_underscope(tf.get_variable_scope().name, 'score_feed2') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.app_encoder.wd, bias=self.args.bias) currvars = get_vars_underscope(tf.get_variable_scope().name, 'upscore8') for v in currvars: tf.add_to_collection('decoding_trainable_vars', v) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): self.conv4_2 = atrous_conv2d('conv_ds_7_dil', self.encoder.conv4_1, num_filters=512, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=2, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_dil', self.conv4_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_dil', self.conv5_1, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_dil', self.conv5_2, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_dil', self.conv5_3, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_dil', self.conv5_4, width_multiplier=self.encoder.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_5) self.conv5_6 = atrous_conv2d('conv_ds_13_dil', self.conv5_5, num_filters=1024, kernel_size=(3, 3), padding='SAME', activation=tf.nn.relu, dilation_rate=4, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_dil', self.conv5_6, width_multiplier=self.encoder.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.wd) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_dil', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.wd, kernel_size=(1, 1)) _debug(self.score_fr) if self.targets_resize < 8: self.targets_resize = 8 // self.targets_resize self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.y_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(self.targets_resize * 2, self.targets_resize * 2), stride=(self.targets_resize, self.targets_resize), l2_strength=self.encoder.wd, is_training=self.is_training) _debug(self.upscore8) self.logits = self.upscore8 else: self.logits = self.score_fr
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.expand11 = conv2d( 'expand1_1', x=self.encoder.conv5_6, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose( 'upscale1', x=self.expand11, is_training=self.is_training, output_shape=self.encoder.conv5_5.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.conv5_5) self._debug(self.add1) self.expand12 = conv2d( 'expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv5_5.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): self.expand21 = conv2d( 'expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose( 'upscale2', x=self.expand21, is_training=self.is_training, output_shape=self.encoder.conv4_1.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.conv4_1) self._debug(self.add2) self.expand22 = conv2d( 'expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv4_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): self.expand31 = conv2d( 'expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand31) self.upscale3 = conv2d_transpose( 'upscale3', x=self.expand31, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.encoder.conv3_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale3) self.add3 = tf.add(self.upscale3, self.encoder.conv3_1) self._debug(self.add3) self.expand32 = conv2d( 'expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv3_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): self.expand41 = conv2d( 'expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand41) self.upscale4 = conv2d_transpose( 'upscale4', x=self.expand41, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.encoder.conv2_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale4) self.add4 = tf.add(self.upscale4, self.encoder.conv2_1) self._debug(self.add4) self.expand42 = conv2d( 'expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv2_1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose( 'upscale5', x=self.expand42, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [self.encoder.conv2_1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) self.expand5 = conv2d( 'expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(1, 1), dropout_keep_prob=0.5, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.encoder = VGG16(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.upscale1 = conv2d_transpose( 'upscale0', x=self.encoder.conv5_3, output_shape=self.encoder.conv4_3.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale1) self.concat1 = tf.add(self.upscale1, self.encoder.conv4_3) _debug(self.concat1) self.expand11 = conv2d( 'expand1_1', x=self.concat1, num_filters=self.encoder.conv4_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand11) self.expand12 = conv2d( 'expand1_2', x=self.expand11, num_filters=self.encoder.conv4_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand12) with tf.name_scope('upscale_2'): self.upscale2 = conv2d_transpose( 'upscale2', x=self.expand12, output_shape=self.encoder.conv3_3.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale2) self.concat2 = tf.add(self.upscale2, self.encoder.conv3_3) _debug(self.concat2) self.expand21 = conv2d( 'expand2_1', x=self.concat2, num_filters=self.encoder.conv3_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand21) self.expand22 = conv2d( 'expand2_2', x=self.expand21, num_filters=self.encoder.conv3_3.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand22) with tf.name_scope('upscale_3'): self.upscale3 = conv2d_transpose( 'upscale3', x=self.expand22, output_shape=self.encoder.conv2_2.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale3) self.concat3 = tf.add(self.upscale3, self.encoder.conv2_2) _debug(self.concat3) self.expand31 = conv2d( 'expand3_1', x=self.concat3, num_filters=self.encoder.conv2_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand31) self.expand32 = conv2d( 'expand3_2', x=self.expand31, num_filters=self.encoder.conv2_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand32) with tf.name_scope('upscale_4'): self.upscale4 = conv2d_transpose( 'upscale4', x=self.expand32, output_shape=self.encoder.conv1_2.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale4) self.concat4 = tf.add(self.upscale4, self.encoder.conv1_2) _debug(self.concat4) self.expand41 = conv2d( 'expand4_1', x=self.concat4, num_filters=self.encoder.conv1_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand41) self.expand42 = conv2d( 'expand4_2', x=self.expand41, num_filters=self.encoder.conv1_2.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose( 'upscale5', x=self.expand42, output_shape=self.encoder.conv1_1.shape.as_list(), kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) _debug(self.upscale5) self.concat5 = tf.add(self.upscale5, self.encoder.conv1_1) _debug(self.concat5) self.expand51 = conv2d( 'expand5_1', x=self.concat5, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand51) self.expand52 = conv2d( 'expand5_2', x=self.expand51, num_filters=self.encoder.conv1_1.shape.as_list()[3], kernel_size=(3, 3), l2_strength=self.encoder.wd) _debug(self.expand52) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand52, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) _debug(self.fscore) self.logits = self.fscore
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() print("Building the Decoder FCN8s..") # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.encoder.score_fr, output_shape=self.encoder.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) self._debug(self.fuse_feed1) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, output_shape=self.encoder.feed2.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) self._debug(self.fuse_feed2) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) self._debug(self.upscore8) self.logits = self.upscore8 print("\nDecoder FCN8s is built successfully\n\n")
def init_network(self): """ Building the Network here :return: """ # Init MobileNet as an encoder self.app_encoder = MobileNet(x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, mean_path=self.args.data_dir + 'mean.npy', train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) self.motion_encoder = MobileNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, mean_path=self.args.data_dir + 'flo_mean.npy', train_flag=self.is_training, width_multipler=1.0, weight_decay=self.args.weight_decay) # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.feed2 = tf.multiply(self.app_encoder.conv3_2, self.motion_encoder.conv3_2) self.width_multiplier = 1.0 self.conv4_1 = depthwise_separable_conv2d( 'conv_ds_6_1', self.feed2, width_multiplier=self.width_multiplier, num_filters=256, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv4_1) self.conv4_2 = depthwise_separable_conv2d( 'conv_ds_7_1', self.conv4_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv4_2) self.conv5_1 = depthwise_separable_conv2d( 'conv_ds_8_1', self.conv4_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_1) self.conv5_2 = depthwise_separable_conv2d( 'conv_ds_9_1', self.conv5_1, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_2) self.conv5_3 = depthwise_separable_conv2d( 'conv_ds_10_1', self.conv5_2, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_3) self.conv5_4 = depthwise_separable_conv2d( 'conv_ds_11_1', self.conv5_3, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_4) self.conv5_5 = depthwise_separable_conv2d( 'conv_ds_12_1', self.conv5_4, width_multiplier=self.width_multiplier, num_filters=512, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_5) self.conv5_6 = depthwise_separable_conv2d( 'conv_ds_13_1', self.conv5_5, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(2, 2), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv5_6) self.conv6_1 = depthwise_separable_conv2d( 'conv_ds_14_1', self.conv5_6, width_multiplier=self.width_multiplier, num_filters=1024, kernel_size=(3, 3), padding='SAME', stride=(1, 1), activation=tf.nn.relu6, batchnorm_enabled=True, is_training=self.is_training, l2_strength=self.args.weight_decay) _debug(self.conv6_1) # Pooling is removed. self.score_fr = conv2d('conv_1c_1x1_1', self.conv6_1, num_filters=self.params.num_classes, l2_strength=self.args.weight_decay, kernel_size=(1, 1)) self.feed1 = self.conv4_2 # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.score_fr, output_shape=self.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], is_training=self.is_training, kernel_size=(16, 16), stride=(8, 8), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore8) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.encoder = ShuffleNet(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('upscale_1'): self.expand11 = conv2d('expand1_1', x=self.encoder.stage4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand11) self.upscale1 = conv2d_transpose('upscale1', x=self.expand11, is_training=self.is_training, output_shape=self.encoder.stage3.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale1) self.add1 = tf.add(self.upscale1, self.encoder.stage3) self._debug(self.add1) self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand12) with tf.name_scope('upscale_2'): self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand21) self.upscale2 = conv2d_transpose('upscale2', x=self.expand21, is_training=self.is_training, output_shape=self.encoder.stage2.shape.as_list(), batchnorm_enabled=True, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale2) self.add2 = tf.add(self.upscale2, self.encoder.stage2) self._debug(self.add2) self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand22) with tf.name_scope('upscale_3'): self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand31) self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=True, is_training=self.is_training, output_shape=[self.encoder.max_pool.shape[0], self.encoder.max_pool.shape.as_list()[1] + 1, self.encoder.max_pool.shape.as_list()[2] + 1, self.encoder.max_pool.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale3) padded = tf.pad(self.encoder.max_pool, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.add3 = tf.add(self.upscale3, padded) self._debug(self.add3) self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand32) with tf.name_scope('upscale_4'): self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand41) self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=True, is_training=self.is_training, output_shape=[self.encoder.conv1.shape[0], self.encoder.conv1.shape.as_list()[1] + 1, self.encoder.conv1.shape.as_list()[2] + 1, self.encoder.conv1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale4) padded2 = tf.pad(self.encoder.conv1, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT") self.add4 = tf.add(self.upscale4, padded2) self._debug(self.add4) self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.expand42) with tf.name_scope('upscale_5'): self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=True, is_training=self.is_training, output_shape=self.x_pl.shape.as_list()[0:3] + [ self.encoder.conv1.shape.as_list()[3]], kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd) self._debug(self.upscale5) self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training, num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1), dropout_keep_prob=0.5, l2_strength=self.encoder.wd) self._debug(self.expand5) with tf.name_scope('final_score'): self.fscore = conv2d('fscore', x=self.expand5, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd) self._debug(self.fscore) self.logits = self.fscore
def init_network(self): """ Building the Network here :return: """ # Init ShuffleNet as an encoder self.app_encoder = ShuffleNet( x_input=self.x_pl, num_classes=self.params.num_classes, prefix='app_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = ShuffleNet( x_input=self.flo_pl, num_classes=self.params.num_classes, prefix='mot_', pretrained_path=self.args.pretrained_path, train_flag=self.is_training, batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups, weight_decay=self.args.weight_decay, bias=self.args.bias, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.stage2, self.motion_encoder.stage2) # self.combined_score= tf.concat((self.app_encoder.stage2, self.motion_encoder.stage2), axis=3) # _debug(self.combined_score) # self.combined_score = conv2d('combined_score', self.combined_score, num_filters= 240, l2_strength=self.args.weight_decay, # kernel_size=(1, 1)) self.stage3 = self.app_encoder.stage(self.combined_score, stage=3, repeat=7) _debug(self.stage3) self.stage4 = self.app_encoder.stage(self.stage3, stage=4, repeat=3) _debug(self.stage4) self.feed1 = self.stage3 self.feed2 = self.combined_score # First Experiment is to use the regular conv2d self.score_fr = conv2d('combined_conv_1c_1x1', self.stage4, num_filters=self.params.num_classes, l2_strength=self.args.weight_decay, kernel_size=(1, 1)) _debug(self.score_fr) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.score_fr, output_shape=self.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore2) self.score_feed1 = conv2d( 'score_feed1', x=self.feed1, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore4) self.score_feed2 = conv2d( 'score_feed2', x=self.feed2, batchnorm_enabled=self.args.batchnorm_enabled, is_training=self.is_training, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.args.weight_decay) _debug(self.score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], is_training=self.is_training, kernel_size=(16, 16), stride=(8, 8), l2_strength=self.args.weight_decay, bias=self.args.bias) _debug(self.upscore8) self.logits = self.upscore8
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.app_encoder = VGG16(x_input=self.x_pl, prefix='app_', num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay, mean_path=self.args.data_dir + 'mean.npy') self.motion_encoder = VGG16(x_input=self.flo_pl, prefix='mot_', num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay, mean_path=self.args.data_dir + 'flo_mean.npy') # Build Encoding part self.app_encoder.build() self.motion_encoder.build() self.combined_score = tf.multiply(self.app_encoder.score_fr, self.motion_encoder.score_fr) # Build Decoding part with tf.name_scope('upscore_2s'): self.upscore2 = conv2d_transpose( 'upscore2', x=self.combined_score, output_shape=self.app_encoder.feed1.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) self.app_score_feed1 = conv2d( 'app_score_feed1', x=self.app_encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) self.app_score_feed1 = tf.nn.relu(self.app_score_feed1) self.mot_score_feed1 = conv2d( 'mot_score_feed1', x=self.motion_encoder.feed1, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.motion_encoder.wd) self.mot_score_feed1 = tf.nn.relu(self.mot_score_feed1) self.score_feed1 = tf.multiply(self.app_score_feed1, self.mot_score_feed1) self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2) with tf.name_scope('upscore_4s'): self.upscore4 = conv2d_transpose( 'upscore4', x=self.fuse_feed1, output_shape=self.app_encoder.feed2.shape.as_list()[0:3] + [self.params.num_classes], batchnorm_enabled=self.args.batchnorm_enabled, kernel_size=(4, 4), stride=(2, 2), l2_strength=self.app_encoder.wd, bias=self.args.bias) self.app_score_feed2 = conv2d( 'app_score_feed2', x=self.app_encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.app_encoder.wd) self.app_score_feed2 = tf.nn.relu(self.app_score_feed2) self.mot_score_feed2 = conv2d( 'mot_score_feed2', x=self.motion_encoder.feed2, batchnorm_enabled=self.args.batchnorm_enabled, num_filters=self.params.num_classes, kernel_size=(1, 1), bias=self.args.bias, l2_strength=self.motion_encoder.wd) self.mot_score_feed2 = tf.nn.relu(self.mot_score_feed2) self.score_feed2 = tf.multiply(self.app_score_feed2, self.mot_score_feed2) self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.fuse_feed2, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.app_encoder.wd, bias=self.args.bias) self.logits = self.upscore8
def template(self, x, action, lstm_state): """ :param x: input tensor of shape: [None, truncated_time_steps ] + self.config.state_size :param action: input tensor of shape:[None, truncated_time_steps, action_dim] :param lstm_state: input tensor of shape: [2, lstm_size, lstm_size] :return: the output and the lstm hidden state """ with tf.name_scope('encoder_1'): h1 = tf.layers.conv2d( x, 64, kernel_size=(8, 8), strides=(2, 2), kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='SAME') bn1 = tf.layers.batch_normalization(h1, training=self.is_training) drp1 = tf.layers.dropout(tf.nn.relu(bn1), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('encoder_2'): h2 = tf.layers.conv2d( drp1, 32, kernel_size=(6, 6), strides=(2, 2), kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='SAME') bn2 = tf.layers.batch_normalization(h2, training=self.is_training) drp2 = tf.layers.dropout(tf.nn.relu(bn2), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('encoder_3'): h3 = tf.layers.conv2d( drp2, 32, kernel_size=(6, 6), strides=(2, 2), kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='SAME') bn3 = tf.layers.batch_normalization(h3, training=self.is_training) drp3 = tf.layers.dropout(tf.nn.relu(bn3), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('encoder_4'): h4 = tf.layers.conv2d( drp3, 32, kernel_size=(4, 4), strides=(2, 2), kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='SAME') bn4 = tf.layers.batch_normalization(h4, training=self.is_training) drp4 = tf.layers.dropout(tf.nn.relu(bn4), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('flatten_1'): encoded = tf.contrib.layers.flatten(drp4) # the size of encodded vector encoded_vector_size = encoded.get_shape()[1] with tf.name_scope('lstm_layer') as scope: lstm_out, lstm_new_state = actionlstm_cell( encoded, lstm_state, action, self.config.lstm_size, self.config.action_dim, initializer=tf.contrib.layers.xavier_initializer(), activation=tf.tanh, scope='lstm_layer') with tf.name_scope('hidden_layer_1'): h5 = tf.layers.dense( lstm_out, encoded_vector_size, kernel_initializer=tf.contrib.layers.xavier_initializer()) bn5 = tf.layers.batch_normalization(h5, training=self.is_training) drp5 = tf.layers.dropout(tf.nn.relu(bn5), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('reshape_1'): # the last encoder conv layer shape deconv_init_shape = drp4.get_shape().as_list() reshaped_drp4 = tf.reshape(drp5, [-1] + deconv_init_shape[1:]) with tf.name_scope('decoder_1'): h6 = conv2d_transpose( 'decoder1', reshaped_drp4, output_shape=[ self.config.batch_size, self.config.state_size[0] // 8, self.config.state_size[1] // 8, 32 ], kernel_size=(4, 4), stride=(2, 2), ) bn6 = tf.layers.batch_normalization(h6, training=self.is_training) drp6 = tf.layers.dropout(tf.nn.relu(bn6), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('decoder_2'): h7 = conv2d_transpose( 'decoder2', drp6, output_shape=[ self.config.batch_size, self.config.state_size[0] // 4, self.config.state_size[1] // 4, 32 ], kernel_size=(6, 6), stride=(2, 2), ) bn7 = tf.layers.batch_normalization(h7, training=self.is_training) drp7 = tf.layers.dropout(tf.nn.relu(bn7), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('decoder_3'): h8 = conv2d_transpose( 'decoder3', drp7, output_shape=[ self.config.batch_size, self.config.state_size[0] // 2, self.config.state_size[1] // 2, 32 ], kernel_size=(6, 6), stride=(2, 2), ) bn8 = tf.layers.batch_normalization(h8, training=self.is_training) drp8 = tf.layers.dropout(tf.nn.relu(bn8), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('decoder_4'): h9 = conv2d_transpose( 'decoder4', x=drp8, output_shape=[ self.config.batch_size, self.config.state_size[0], self.config.state_size[1], 64 ], kernel_size=(8, 8), stride=(2, 2), ) bn9 = tf.layers.batch_normalization(h9, training=self.is_training) drp9 = tf.layers.dropout(tf.nn.relu(bn9), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('decoder_5'): next_state_out = tf.layers.conv2d( drp9, 2, kernel_size=(3, 3), strides=(1, 1), kernel_initializer=tf.contrib.layers.xavier_initializer(), padding='SAME') next_state_out_softmax = tf.nn.softmax(next_state_out) if self.config.predict_reward: with tf.name_scope('reward_flatten'): flattened_drp7 = tf.contrib.layers.flatten(drp7) with tf.name_scope('reward_hidden_layer_2'): h7_2 = tf.layers.dense( flattened_drp7, 128, kernel_initializer=tf.contrib.layers.xavier_initializer()) drp7_2 = tf.layers.dropout(tf.nn.relu(h7_2), rate=self.config.dropout_rate, training=self.is_training, name='dropout') with tf.name_scope('reward_output_layer'): reward_out = tf.layers.dense( drp7_2, 1, activation=None, kernel_initializer=tf.contrib.layers.xavier_initializer()) else: reward_out = None return next_state_out, next_state_out_softmax, reward_out, lstm_new_state
def init_network(self): """ Building the Network here :return: """ # Init a VGG16 as an encoder self.encoder = VGG16(x_input=self.x_pl, num_classes=self.params.num_classes, pretrained_path=self.args.pretrained_path, train_flag=self.is_training, reduced_flag=False, weight_decay=self.args.weight_decay) # Build Encoding part self.encoder.build() # Build Decoding part with tf.name_scope('dilation_2'): self.conv4_3_dil = conv2d('conv4_3_dil', x=self.encoder.conv4_2, num_filters=512, kernel_size=(3, 3), activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.conv5_1_dil = atrous_conv2d('conv5_1_dil', x=self.conv4_3_dil, num_filters=512, kernel_size=(3, 3), dilation_rate=2, activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.conv5_2_dil = atrous_conv2d('conv5_2_dil', x=self.conv5_1_dil, num_filters=512, kernel_size=(3, 3), dilation_rate=2, activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.conv5_3_dil = atrous_conv2d('conv5_3_dil', x=self.conv5_2_dil, num_filters=512, kernel_size=(3, 3), dilation_rate=2, activation=tf.nn.relu, l2_strength=self.encoder.wd, is_training=self.is_training) self.fc6_dil = atrous_conv2d('fc6_dil', x=self.conv5_3_dil, num_filters=1024, kernel_size=(7, 7), dilation_rate=4, activation=tf.nn.relu, l2_strength=self.encoder.wd, dropout_keep_prob=0.5, is_training=self.is_training) self.fc7_dil = conv2d('fc7_dil', x=self.fc6_dil, num_filters=1024, kernel_size=(1, 1), activation=tf.nn.relu, dropout_keep_prob=0.5, l2_strength=self.encoder.wd, is_training=self.is_training) self.score_fr = conv2d('score_fr_dil', x=self.fc7_dil, num_filters=self.params.num_classes, kernel_size=(1, 1), l2_strength=self.encoder.wd, is_training=self.is_training) with tf.name_scope('upscore_8s'): self.upscore8 = conv2d_transpose( 'upscore8', x=self.score_fr, output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes], kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd) self.logits = self.upscore8