def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init a VGG16 as an encoder
        self.encoder = VGG16(x_input=self.x_pl,
                             num_classes=self.params.num_classes,
                             pretrained_path=self.args.pretrained_path,
                             train_flag=self.is_training,
                             reduced_flag=False,
                             weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()
        _debug(self.encoder.score_fr)
        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose('upscore2', x=self.encoder.score_fr,
                                             output_shape=self.encoder.feed1.shape.as_list()[0:3] + [
                                                 self.params.num_classes],
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            _debug(self.upscore2)
            self.score_feed1 = conv2d('score_feed1', x=self.encoder.feed1,
                                      num_filters=self.params.num_classes, kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            _debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)
            _debug(self.fuse_feed1)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose('upscore4', x=self.fuse_feed1,
                                             output_shape=self.encoder.feed2.shape.as_list()[0:3] + [
                                                 self.params.num_classes],
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            _debug(self.upscore4)
            self.score_feed2 = conv2d('score_feed2', x=self.encoder.feed2,
                                      num_filters=self.params.num_classes, kernel_size=(1, 1),
                                      l2_strength=self.encoder.wd)
            _debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)
            _debug(self.fuse_feed2)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose('upscore8', x=self.fuse_feed2,
                                             output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes],
                                             kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd)
            _debug(self.upscore8)

        self.logits = self.upscore8
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.encoder = ShuffleNet(
            x_input=self.x_pl,
            num_classes=self.params.num_classes,
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias)

        # Build Encoding part
        self.encoder.build()
        with tf.name_scope('dilation_2'):
            self.stage3 = self.encoder.stage(self.encoder.stage2,
                                             stage=3,
                                             repeat=7,
                                             dilation=2)
            _debug(self.stage3)
            self.stage4 = self.encoder.stage(self.stage3,
                                             stage=4,
                                             repeat=3,
                                             dilation=4)
            _debug(self.stage4)

            self.score_fr = conv2d('score_fr_dil',
                                   x=self.stage4,
                                   num_filters=self.params.num_classes,
                                   kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd,
                                   is_training=self.is_training)
            _debug(self.score_fr)

        if self.targets_resize < 8:
            self.targets_resize = 8 // self.targets_resize
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.score_fr,
                output_shape=self.y_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(self.targets_resize * 2, self.targets_resize * 2),
                stride=(self.targets_resize, self.targets_resize),
                l2_strength=self.encoder.wd,
                is_training=self.is_training)

            _debug(self.upscore8)
            self.logits = self.upscore8
        else:
            self.logits = self.score_fr
Exemplo n.º 3
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = RESNET18(x_input=self.x_pl,
                                num_classes=self.params.num_classes,
                                pretrained_path=self.args.pretrained_path,
                                train_flag=self.is_training,
                                weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('dilation_2'):
            with tf.variable_scope('conv4_x_dil'):
                self.conv4 = self.encoder._residual_block('conv4_1_dil', self.encoder.conv3, 256, pool_first=False, strides=1, dilation= 2)
                _debug(self.conv4)
                self.conv4 = self.encoder._residual_block('conv4_2_dil', self.conv4, 256)
                _debug(self.conv4)

            with tf.variable_scope('conv5_x_dil'):
                self.conv5 = self.encoder._residual_block('conv5_1_dil', self.conv4, 512, pool_first=False, strides=1, dilation=4)
                _debug(self.conv5)
                self.conv5 = self.encoder._residual_block('conv5_2_dil', self.conv5, 512)
                _debug(self.conv5)

            self.score_fr = conv2d('score_fr_dil', x=self.conv5, num_filters=self.params.num_classes,
                                        kernel_size=(1, 1), l2_strength=self.encoder.wd,
                                        is_training=self.is_training )
            _debug(self.score_fr)

            self.upscore8 = conv2d_transpose('upscore8', x=self.score_fr,
                                             output_shape=self.x_pl.shape.as_list()[0:3] + [self.params.num_classes],
                                             kernel_size=(16, 16), stride=(8, 8), l2_strength=self.encoder.wd, is_training= self.is_training)
            _debug(self.upscore8)

        self.logits= self.upscore8
Exemplo n.º 4
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.app_encoder = ShuffleNet(
            x_input=self.x_pl,
            num_classes=self.params.num_classes,
            prefix='app_',
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias,
            mean_path=self.args.data_dir + 'mean.npy')

        self.motion_encoder = ShuffleNet(
            x_input=self.flo_pl,
            num_classes=self.params.num_classes,
            prefix='mot_',
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias,
            mean_path=self.args.data_dir + 'flo_mean.npy')

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.combined_score = tf.multiply(self.app_encoder.score_fr,
                                          self.motion_encoder.score_fr)
        self.combined_feed1 = tf.multiply(self.app_encoder.feed1,
                                          self.motion_encoder.feed1)
        self.combined_feed2 = tf.multiply(self.app_encoder.feed2,
                                          self.motion_encoder.feed2)

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.combined_score,
                output_shape=self.combined_feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.app_encoder.wd,
                bias=self.args.bias)
            currvars = get_vars_underscope(tf.get_variable_scope().name,
                                           'upscore2')
            for v in currvars:
                tf.add_to_collection('decoding_trainable_vars', v)

            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.combined_feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.app_encoder.wd)
            currvars = get_vars_underscope(tf.get_variable_scope().name,
                                           'score_feed1')
            for v in currvars:
                tf.add_to_collection('decoding_trainable_vars', v)

            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.combined_feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.app_encoder.wd,
                bias=self.args.bias)
            currvars = get_vars_underscope(tf.get_variable_scope().name,
                                           'upscore4')
            for v in currvars:
                tf.add_to_collection('decoding_trainable_vars', v)

            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.combined_feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.app_encoder.wd)
            currvars = get_vars_underscope(tf.get_variable_scope().name,
                                           'score_feed2')
            for v in currvars:
                tf.add_to_collection('decoding_trainable_vars', v)

            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.app_encoder.wd,
                bias=self.args.bias)
            currvars = get_vars_underscope(tf.get_variable_scope().name,
                                           'upscore8')
            for v in currvars:
                tf.add_to_collection('decoding_trainable_vars', v)

        self.logits = self.upscore8
Exemplo n.º 5
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """
        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('dilation_2'):
            self.conv4_2 = atrous_conv2d('conv_ds_7_dil',
                                         self.encoder.conv4_1,
                                         num_filters=512,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=2,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv4_2)
            self.conv5_1 = depthwise_separable_conv2d(
                'conv_ds_8_dil',
                self.conv4_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_1)
            self.conv5_2 = depthwise_separable_conv2d(
                'conv_ds_9_dil',
                self.conv5_1,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_2)
            self.conv5_3 = depthwise_separable_conv2d(
                'conv_ds_10_dil',
                self.conv5_2,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_3)
            self.conv5_4 = depthwise_separable_conv2d(
                'conv_ds_11_dil',
                self.conv5_3,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_4)
            self.conv5_5 = depthwise_separable_conv2d(
                'conv_ds_12_dil',
                self.conv5_4,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=512,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv5_5)
            self.conv5_6 = atrous_conv2d('conv_ds_13_dil',
                                         self.conv5_5,
                                         num_filters=1024,
                                         kernel_size=(3, 3),
                                         padding='SAME',
                                         activation=tf.nn.relu,
                                         dilation_rate=4,
                                         batchnorm_enabled=True,
                                         is_training=self.is_training,
                                         l2_strength=self.wd)
            _debug(self.conv5_6)
            self.conv6_1 = depthwise_separable_conv2d(
                'conv_ds_14_dil',
                self.conv5_6,
                width_multiplier=self.encoder.width_multiplier,
                num_filters=1024,
                kernel_size=(3, 3),
                padding='SAME',
                stride=(1, 1),
                activation=tf.nn.relu,
                batchnorm_enabled=True,
                is_training=self.is_training,
                l2_strength=self.wd)
            _debug(self.conv6_1)
            # Pooling is removed.
            self.score_fr = conv2d('conv_1c_1x1_dil',
                                   self.conv6_1,
                                   num_filters=self.params.num_classes,
                                   l2_strength=self.wd,
                                   kernel_size=(1, 1))
            _debug(self.score_fr)

            if self.targets_resize < 8:
                self.targets_resize = 8 // self.targets_resize
                self.upscore8 = conv2d_transpose(
                    'upscore8',
                    x=self.score_fr,
                    output_shape=self.y_pl.shape.as_list()[0:3] +
                    [self.params.num_classes],
                    kernel_size=(self.targets_resize * 2,
                                 self.targets_resize * 2),
                    stride=(self.targets_resize, self.targets_resize),
                    l2_strength=self.encoder.wd,
                    is_training=self.is_training)
                _debug(self.upscore8)
                self.logits = self.upscore8
            else:
                self.logits = self.score_fr
Exemplo n.º 6
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscale_1'):
            self.expand11 = conv2d(
                'expand1_1',
                x=self.encoder.conv5_6,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv5_5.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand11)
            self.upscale1 = conv2d_transpose(
                'upscale1',
                x=self.expand11,
                is_training=self.is_training,
                output_shape=self.encoder.conv5_5.shape.as_list(),
                batchnorm_enabled=True,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale1)
            self.add1 = tf.add(self.upscale1, self.encoder.conv5_5)
            self._debug(self.add1)
            self.expand12 = conv2d(
                'expand1_2',
                x=self.add1,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv5_5.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand12)

        with tf.name_scope('upscale_2'):
            self.expand21 = conv2d(
                'expand2_1',
                x=self.expand12,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv4_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand21)
            self.upscale2 = conv2d_transpose(
                'upscale2',
                x=self.expand21,
                is_training=self.is_training,
                output_shape=self.encoder.conv4_1.shape.as_list(),
                batchnorm_enabled=True,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale2)
            self.add2 = tf.add(self.upscale2, self.encoder.conv4_1)
            self._debug(self.add2)
            self.expand22 = conv2d(
                'expand2_2',
                x=self.add2,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv4_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand22)

        with tf.name_scope('upscale_3'):
            self.expand31 = conv2d(
                'expand3_1',
                x=self.expand22,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv3_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand31)
            self.upscale3 = conv2d_transpose(
                'upscale3',
                x=self.expand31,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.encoder.conv3_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale3)
            self.add3 = tf.add(self.upscale3, self.encoder.conv3_1)
            self._debug(self.add3)
            self.expand32 = conv2d(
                'expand3_2',
                x=self.add3,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv3_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand32)

        with tf.name_scope('upscale_4'):
            self.expand41 = conv2d(
                'expand4_1',
                x=self.expand32,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv2_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand41)
            self.upscale4 = conv2d_transpose(
                'upscale4',
                x=self.expand41,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.encoder.conv2_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale4)
            self.add4 = tf.add(self.upscale4, self.encoder.conv2_1)
            self._debug(self.add4)
            self.expand42 = conv2d(
                'expand4_2',
                x=self.add4,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv2_1.shape.as_list()[3],
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.expand42)

        with tf.name_scope('upscale_5'):
            self.upscale5 = conv2d_transpose(
                'upscale5',
                x=self.expand42,
                batchnorm_enabled=True,
                is_training=self.is_training,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.encoder.conv2_1.shape.as_list()[3]],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscale5)
            self.expand5 = conv2d(
                'expand5',
                x=self.upscale5,
                batchnorm_enabled=True,
                is_training=self.is_training,
                num_filters=self.encoder.conv1_1.shape.as_list()[3],
                kernel_size=(1, 1),
                dropout_keep_prob=0.5,
                l2_strength=self.encoder.wd)
            self._debug(self.expand5)

        with tf.name_scope('final_score'):
            self.fscore = conv2d('fscore',
                                 x=self.expand5,
                                 num_filters=self.params.num_classes,
                                 kernel_size=(1, 1),
                                 l2_strength=self.encoder.wd)
            self._debug(self.fscore)

        self.logits = self.fscore
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init a VGG16 as an encoder
        self.encoder = VGG16(x_input=self.x_pl,
                             num_classes=self.params.num_classes,
                             pretrained_path=self.args.pretrained_path,
                             train_flag=self.is_training,
                             reduced_flag=False,
                             weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscale_1'):
            self.upscale1 = conv2d_transpose(
                'upscale0',
                x=self.encoder.conv5_3,
                output_shape=self.encoder.conv4_3.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            _debug(self.upscale1)
            self.concat1 = tf.add(self.upscale1, self.encoder.conv4_3)
            _debug(self.concat1)
            self.expand11 = conv2d(
                'expand1_1',
                x=self.concat1,
                num_filters=self.encoder.conv4_3.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand11)
            self.expand12 = conv2d(
                'expand1_2',
                x=self.expand11,
                num_filters=self.encoder.conv4_3.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand12)
        with tf.name_scope('upscale_2'):
            self.upscale2 = conv2d_transpose(
                'upscale2',
                x=self.expand12,
                output_shape=self.encoder.conv3_3.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            _debug(self.upscale2)
            self.concat2 = tf.add(self.upscale2, self.encoder.conv3_3)
            _debug(self.concat2)
            self.expand21 = conv2d(
                'expand2_1',
                x=self.concat2,
                num_filters=self.encoder.conv3_3.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand21)
            self.expand22 = conv2d(
                'expand2_2',
                x=self.expand21,
                num_filters=self.encoder.conv3_3.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand22)
        with tf.name_scope('upscale_3'):
            self.upscale3 = conv2d_transpose(
                'upscale3',
                x=self.expand22,
                output_shape=self.encoder.conv2_2.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            _debug(self.upscale3)
            self.concat3 = tf.add(self.upscale3, self.encoder.conv2_2)
            _debug(self.concat3)
            self.expand31 = conv2d(
                'expand3_1',
                x=self.concat3,
                num_filters=self.encoder.conv2_2.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand31)
            self.expand32 = conv2d(
                'expand3_2',
                x=self.expand31,
                num_filters=self.encoder.conv2_2.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand32)
        with tf.name_scope('upscale_4'):
            self.upscale4 = conv2d_transpose(
                'upscale4',
                x=self.expand32,
                output_shape=self.encoder.conv1_2.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            _debug(self.upscale4)
            self.concat4 = tf.add(self.upscale4, self.encoder.conv1_2)
            _debug(self.concat4)
            self.expand41 = conv2d(
                'expand4_1',
                x=self.concat4,
                num_filters=self.encoder.conv1_2.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand41)
            self.expand42 = conv2d(
                'expand4_2',
                x=self.expand41,
                num_filters=self.encoder.conv1_2.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand42)
        with tf.name_scope('upscale_5'):
            self.upscale5 = conv2d_transpose(
                'upscale5',
                x=self.expand42,
                output_shape=self.encoder.conv1_1.shape.as_list(),
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            _debug(self.upscale5)
            self.concat5 = tf.add(self.upscale5, self.encoder.conv1_1)
            _debug(self.concat5)
            self.expand51 = conv2d(
                'expand5_1',
                x=self.concat5,
                num_filters=self.encoder.conv1_1.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand51)
            self.expand52 = conv2d(
                'expand5_2',
                x=self.expand51,
                num_filters=self.encoder.conv1_1.shape.as_list()[3],
                kernel_size=(3, 3),
                l2_strength=self.encoder.wd)
            _debug(self.expand52)

        with tf.name_scope('final_score'):
            self.fscore = conv2d('fscore',
                                 x=self.expand52,
                                 num_filters=self.params.num_classes,
                                 kernel_size=(1, 1),
                                 l2_strength=self.encoder.wd)
            _debug(self.fscore)

        self.logits = self.fscore
Exemplo n.º 8
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.encoder = MobileNet(x_input=self.x_pl,
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 width_multipler=1.0,
                                 weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        print("Building the Decoder FCN8s..")
        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.encoder.score_fr,
                output_shape=self.encoder.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscore2)
            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.encoder.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)
            self._debug(self.fuse_feed1)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                output_shape=self.encoder.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.encoder.wd)
            self._debug(self.upscore4)
            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.encoder.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                l2_strength=self.encoder.wd)
            self._debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)
            self._debug(self.fuse_feed2)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd)
            self._debug(self.upscore8)

        self.logits = self.upscore8
        print("\nDecoder FCN8s is built successfully\n\n")
Exemplo n.º 9
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init MobileNet as an encoder
        self.app_encoder = MobileNet(x_input=self.x_pl,
                                     num_classes=self.params.num_classes,
                                     prefix='app_',
                                     pretrained_path=self.args.pretrained_path,
                                     mean_path=self.args.data_dir + 'mean.npy',
                                     train_flag=self.is_training,
                                     width_multipler=1.0,
                                     weight_decay=self.args.weight_decay)
        self.motion_encoder = MobileNet(
            x_input=self.flo_pl,
            num_classes=self.params.num_classes,
            prefix='mot_',
            pretrained_path=self.args.pretrained_path,
            mean_path=self.args.data_dir + 'flo_mean.npy',
            train_flag=self.is_training,
            width_multipler=1.0,
            weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.feed2 = tf.multiply(self.app_encoder.conv3_2,
                                 self.motion_encoder.conv3_2)
        self.width_multiplier = 1.0
        self.conv4_1 = depthwise_separable_conv2d(
            'conv_ds_6_1',
            self.feed2,
            width_multiplier=self.width_multiplier,
            num_filters=256,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv4_1)
        self.conv4_2 = depthwise_separable_conv2d(
            'conv_ds_7_1',
            self.conv4_1,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(2, 2),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv4_2)
        self.conv5_1 = depthwise_separable_conv2d(
            'conv_ds_8_1',
            self.conv4_2,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_1)
        self.conv5_2 = depthwise_separable_conv2d(
            'conv_ds_9_1',
            self.conv5_1,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_2)
        self.conv5_3 = depthwise_separable_conv2d(
            'conv_ds_10_1',
            self.conv5_2,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_3)
        self.conv5_4 = depthwise_separable_conv2d(
            'conv_ds_11_1',
            self.conv5_3,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_4)
        self.conv5_5 = depthwise_separable_conv2d(
            'conv_ds_12_1',
            self.conv5_4,
            width_multiplier=self.width_multiplier,
            num_filters=512,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_5)
        self.conv5_6 = depthwise_separable_conv2d(
            'conv_ds_13_1',
            self.conv5_5,
            width_multiplier=self.width_multiplier,
            num_filters=1024,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(2, 2),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv5_6)
        self.conv6_1 = depthwise_separable_conv2d(
            'conv_ds_14_1',
            self.conv5_6,
            width_multiplier=self.width_multiplier,
            num_filters=1024,
            kernel_size=(3, 3),
            padding='SAME',
            stride=(1, 1),
            activation=tf.nn.relu6,
            batchnorm_enabled=True,
            is_training=self.is_training,
            l2_strength=self.args.weight_decay)
        _debug(self.conv6_1)
        # Pooling is removed.
        self.score_fr = conv2d('conv_1c_1x1_1',
                               self.conv6_1,
                               num_filters=self.params.num_classes,
                               l2_strength=self.args.weight_decay,
                               kernel_size=(1, 1))

        self.feed1 = self.conv4_2

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.score_fr,
                output_shape=self.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore2)

            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore4)
            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                is_training=self.is_training,
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore8)
        self.logits = self.upscore8
Exemplo n.º 10
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.encoder = ShuffleNet(x_input=self.x_pl, num_classes=self.params.num_classes,
                                  pretrained_path=self.args.pretrained_path, train_flag=self.is_training,
                                  batchnorm_enabled=self.args.batchnorm_enabled, num_groups=self.args.num_groups,
                                  weight_decay=self.args.weight_decay, bias=self.args.bias)
        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('upscale_1'):
            self.expand11 = conv2d('expand1_1', x=self.encoder.stage4, batchnorm_enabled=True,
                                   is_training=self.is_training,
                                   num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand11)
            self.upscale1 = conv2d_transpose('upscale1', x=self.expand11, is_training=self.is_training,
                                             output_shape=self.encoder.stage3.shape.as_list(), batchnorm_enabled=True,
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            self._debug(self.upscale1)
            self.add1 = tf.add(self.upscale1, self.encoder.stage3)
            self._debug(self.add1)
            self.expand12 = conv2d('expand1_2', x=self.add1, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.stage3.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand12)

        with tf.name_scope('upscale_2'):
            self.expand21 = conv2d('expand2_1', x=self.expand12, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand21)
            self.upscale2 = conv2d_transpose('upscale2', x=self.expand21, is_training=self.is_training,
                                             output_shape=self.encoder.stage2.shape.as_list(), batchnorm_enabled=True,
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            self._debug(self.upscale2)
            self.add2 = tf.add(self.upscale2, self.encoder.stage2)
            self._debug(self.add2)
            self.expand22 = conv2d('expand2_2', x=self.add2, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.stage2.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand22)

        with tf.name_scope('upscale_3'):
            self.expand31 = conv2d('expand3_1', x=self.expand22, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand31)
            self.upscale3 = conv2d_transpose('upscale3', x=self.expand31, batchnorm_enabled=True,
                                             is_training=self.is_training,
                                             output_shape=[self.encoder.max_pool.shape[0],
                                                           self.encoder.max_pool.shape.as_list()[1] + 1,
                                                           self.encoder.max_pool.shape.as_list()[2] + 1,
                                                           self.encoder.max_pool.shape.as_list()[3]],
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            self._debug(self.upscale3)
            padded = tf.pad(self.encoder.max_pool, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT")
            self.add3 = tf.add(self.upscale3, padded)
            self._debug(self.add3)
            self.expand32 = conv2d('expand3_2', x=self.add3, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.max_pool.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand32)

        with tf.name_scope('upscale_4'):
            self.expand41 = conv2d('expand4_1', x=self.expand32, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand41)
            self.upscale4 = conv2d_transpose('upscale4', x=self.expand41, batchnorm_enabled=True,
                                             is_training=self.is_training,
                                             output_shape=[self.encoder.conv1.shape[0],
                                                           self.encoder.conv1.shape.as_list()[1] + 1,
                                                           self.encoder.conv1.shape.as_list()[2] + 1,
                                                           self.encoder.conv1.shape.as_list()[3]],
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            self._debug(self.upscale4)
            padded2 = tf.pad(self.encoder.conv1, [[0, 0], [0, 1], [0, 1], [0, 0]], "CONSTANT")
            self.add4 = tf.add(self.upscale4, padded2)
            self._debug(self.add4)
            self.expand42 = conv2d('expand4_2', x=self.add4, batchnorm_enabled=True, is_training=self.is_training,
                                   num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd)
            self._debug(self.expand42)

        with tf.name_scope('upscale_5'):
            self.upscale5 = conv2d_transpose('upscale5', x=self.expand42, batchnorm_enabled=True,
                                             is_training=self.is_training,
                                             output_shape=self.x_pl.shape.as_list()[0:3] + [
                                                 self.encoder.conv1.shape.as_list()[3]],
                                             kernel_size=(4, 4), stride=(2, 2), l2_strength=self.encoder.wd)
            self._debug(self.upscale5)
            self.expand5 = conv2d('expand5', x=self.upscale5, batchnorm_enabled=True, is_training=self.is_training,
                                  num_filters=self.encoder.conv1.shape.as_list()[3], kernel_size=(1, 1),
                                  dropout_keep_prob=0.5,
                                  l2_strength=self.encoder.wd)
            self._debug(self.expand5)

        with tf.name_scope('final_score'):
            self.fscore = conv2d('fscore', x=self.expand5,
                                 num_filters=self.params.num_classes, kernel_size=(1, 1),
                                 l2_strength=self.encoder.wd)
            self._debug(self.fscore)

        self.logits = self.fscore
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init ShuffleNet as an encoder
        self.app_encoder = ShuffleNet(
            x_input=self.x_pl,
            num_classes=self.params.num_classes,
            prefix='app_',
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias,
            mean_path=self.args.data_dir + 'mean.npy')

        self.motion_encoder = ShuffleNet(
            x_input=self.flo_pl,
            num_classes=self.params.num_classes,
            prefix='mot_',
            pretrained_path=self.args.pretrained_path,
            train_flag=self.is_training,
            batchnorm_enabled=self.args.batchnorm_enabled,
            num_groups=self.args.num_groups,
            weight_decay=self.args.weight_decay,
            bias=self.args.bias,
            mean_path=self.args.data_dir + 'flo_mean.npy')

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.combined_score = tf.multiply(self.app_encoder.stage2,
                                          self.motion_encoder.stage2)
        #        self.combined_score= tf.concat((self.app_encoder.stage2, self.motion_encoder.stage2), axis=3)
        #        _debug(self.combined_score)
        #        self.combined_score = conv2d('combined_score', self.combined_score, num_filters= 240, l2_strength=self.args.weight_decay,
        #                               kernel_size=(1, 1))

        self.stage3 = self.app_encoder.stage(self.combined_score,
                                             stage=3,
                                             repeat=7)
        _debug(self.stage3)

        self.stage4 = self.app_encoder.stage(self.stage3, stage=4, repeat=3)
        _debug(self.stage4)

        self.feed1 = self.stage3
        self.feed2 = self.combined_score
        # First Experiment is to use the regular conv2d
        self.score_fr = conv2d('combined_conv_1c_1x1',
                               self.stage4,
                               num_filters=self.params.num_classes,
                               l2_strength=self.args.weight_decay,
                               kernel_size=(1, 1))
        _debug(self.score_fr)

        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.score_fr,
                output_shape=self.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore2)

            self.score_feed1 = conv2d(
                'score_feed1',
                x=self.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore4)
            self.score_feed2 = conv2d(
                'score_feed2',
                x=self.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                is_training=self.is_training,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.args.weight_decay)
            _debug(self.score_feed2)
            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                is_training=self.is_training,
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.args.weight_decay,
                bias=self.args.bias)
            _debug(self.upscore8)
        self.logits = self.upscore8
Exemplo n.º 12
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init a VGG16 as an encoder
        self.app_encoder = VGG16(x_input=self.x_pl,
                                 prefix='app_',
                                 num_classes=self.params.num_classes,
                                 pretrained_path=self.args.pretrained_path,
                                 train_flag=self.is_training,
                                 reduced_flag=False,
                                 weight_decay=self.args.weight_decay,
                                 mean_path=self.args.data_dir + 'mean.npy')
        self.motion_encoder = VGG16(x_input=self.flo_pl,
                                    prefix='mot_',
                                    num_classes=self.params.num_classes,
                                    pretrained_path=self.args.pretrained_path,
                                    train_flag=self.is_training,
                                    reduced_flag=False,
                                    weight_decay=self.args.weight_decay,
                                    mean_path=self.args.data_dir +
                                    'flo_mean.npy')

        # Build Encoding part
        self.app_encoder.build()
        self.motion_encoder.build()
        self.combined_score = tf.multiply(self.app_encoder.score_fr,
                                          self.motion_encoder.score_fr)
        # Build Decoding part
        with tf.name_scope('upscore_2s'):
            self.upscore2 = conv2d_transpose(
                'upscore2',
                x=self.combined_score,
                output_shape=self.app_encoder.feed1.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.app_encoder.wd,
                bias=self.args.bias)

            self.app_score_feed1 = conv2d(
                'app_score_feed1',
                x=self.app_encoder.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.app_encoder.wd)
            self.app_score_feed1 = tf.nn.relu(self.app_score_feed1)
            self.mot_score_feed1 = conv2d(
                'mot_score_feed1',
                x=self.motion_encoder.feed1,
                batchnorm_enabled=self.args.batchnorm_enabled,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.motion_encoder.wd)
            self.mot_score_feed1 = tf.nn.relu(self.mot_score_feed1)
            self.score_feed1 = tf.multiply(self.app_score_feed1,
                                           self.mot_score_feed1)
            self.fuse_feed1 = tf.add(self.score_feed1, self.upscore2)

        with tf.name_scope('upscore_4s'):
            self.upscore4 = conv2d_transpose(
                'upscore4',
                x=self.fuse_feed1,
                output_shape=self.app_encoder.feed2.shape.as_list()[0:3] +
                [self.params.num_classes],
                batchnorm_enabled=self.args.batchnorm_enabled,
                kernel_size=(4, 4),
                stride=(2, 2),
                l2_strength=self.app_encoder.wd,
                bias=self.args.bias)

            self.app_score_feed2 = conv2d(
                'app_score_feed2',
                x=self.app_encoder.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.app_encoder.wd)
            self.app_score_feed2 = tf.nn.relu(self.app_score_feed2)
            self.mot_score_feed2 = conv2d(
                'mot_score_feed2',
                x=self.motion_encoder.feed2,
                batchnorm_enabled=self.args.batchnorm_enabled,
                num_filters=self.params.num_classes,
                kernel_size=(1, 1),
                bias=self.args.bias,
                l2_strength=self.motion_encoder.wd)
            self.mot_score_feed2 = tf.nn.relu(self.mot_score_feed2)
            self.score_feed2 = tf.multiply(self.app_score_feed2,
                                           self.mot_score_feed2)

            self.fuse_feed2 = tf.add(self.score_feed2, self.upscore4)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.fuse_feed2,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.app_encoder.wd,
                bias=self.args.bias)

        self.logits = self.upscore8
Exemplo n.º 13
0
    def template(self, x, action, lstm_state):
        """
        :param x: input tensor of shape: [None, truncated_time_steps ] + self.config.state_size
        :param action: input tensor of shape:[None, truncated_time_steps, action_dim]
        :param lstm_state: input tensor of shape: [2, lstm_size, lstm_size]
        :return: the output and the lstm hidden state
        """

        with tf.name_scope('encoder_1'):
            h1 = tf.layers.conv2d(
                x,
                64,
                kernel_size=(8, 8),
                strides=(2, 2),
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                padding='SAME')
            bn1 = tf.layers.batch_normalization(h1, training=self.is_training)
            drp1 = tf.layers.dropout(tf.nn.relu(bn1),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('encoder_2'):
            h2 = tf.layers.conv2d(
                drp1,
                32,
                kernel_size=(6, 6),
                strides=(2, 2),
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                padding='SAME')
            bn2 = tf.layers.batch_normalization(h2, training=self.is_training)
            drp2 = tf.layers.dropout(tf.nn.relu(bn2),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('encoder_3'):
            h3 = tf.layers.conv2d(
                drp2,
                32,
                kernel_size=(6, 6),
                strides=(2, 2),
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                padding='SAME')
            bn3 = tf.layers.batch_normalization(h3, training=self.is_training)
            drp3 = tf.layers.dropout(tf.nn.relu(bn3),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('encoder_4'):
            h4 = tf.layers.conv2d(
                drp3,
                32,
                kernel_size=(4, 4),
                strides=(2, 2),
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                padding='SAME')
            bn4 = tf.layers.batch_normalization(h4, training=self.is_training)
            drp4 = tf.layers.dropout(tf.nn.relu(bn4),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('flatten_1'):
            encoded = tf.contrib.layers.flatten(drp4)

        # the size of encodded vector
        encoded_vector_size = encoded.get_shape()[1]

        with tf.name_scope('lstm_layer') as scope:
            lstm_out, lstm_new_state = actionlstm_cell(
                encoded,
                lstm_state,
                action,
                self.config.lstm_size,
                self.config.action_dim,
                initializer=tf.contrib.layers.xavier_initializer(),
                activation=tf.tanh,
                scope='lstm_layer')

        with tf.name_scope('hidden_layer_1'):
            h5 = tf.layers.dense(
                lstm_out,
                encoded_vector_size,
                kernel_initializer=tf.contrib.layers.xavier_initializer())
            bn5 = tf.layers.batch_normalization(h5, training=self.is_training)
            drp5 = tf.layers.dropout(tf.nn.relu(bn5),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('reshape_1'):
            # the last encoder conv layer shape
            deconv_init_shape = drp4.get_shape().as_list()
            reshaped_drp4 = tf.reshape(drp5, [-1] + deconv_init_shape[1:])
        with tf.name_scope('decoder_1'):
            h6 = conv2d_transpose(
                'decoder1',
                reshaped_drp4,
                output_shape=[
                    self.config.batch_size, self.config.state_size[0] // 8,
                    self.config.state_size[1] // 8, 32
                ],
                kernel_size=(4, 4),
                stride=(2, 2),
            )
            bn6 = tf.layers.batch_normalization(h6, training=self.is_training)
            drp6 = tf.layers.dropout(tf.nn.relu(bn6),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('decoder_2'):

            h7 = conv2d_transpose(
                'decoder2',
                drp6,
                output_shape=[
                    self.config.batch_size, self.config.state_size[0] // 4,
                    self.config.state_size[1] // 4, 32
                ],
                kernel_size=(6, 6),
                stride=(2, 2),
            )
            bn7 = tf.layers.batch_normalization(h7, training=self.is_training)
            drp7 = tf.layers.dropout(tf.nn.relu(bn7),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('decoder_3'):
            h8 = conv2d_transpose(
                'decoder3',
                drp7,
                output_shape=[
                    self.config.batch_size, self.config.state_size[0] // 2,
                    self.config.state_size[1] // 2, 32
                ],
                kernel_size=(6, 6),
                stride=(2, 2),
            )
            bn8 = tf.layers.batch_normalization(h8, training=self.is_training)
            drp8 = tf.layers.dropout(tf.nn.relu(bn8),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('decoder_4'):
            h9 = conv2d_transpose(
                'decoder4',
                x=drp8,
                output_shape=[
                    self.config.batch_size, self.config.state_size[0],
                    self.config.state_size[1], 64
                ],
                kernel_size=(8, 8),
                stride=(2, 2),
            )
            bn9 = tf.layers.batch_normalization(h9, training=self.is_training)
            drp9 = tf.layers.dropout(tf.nn.relu(bn9),
                                     rate=self.config.dropout_rate,
                                     training=self.is_training,
                                     name='dropout')

        with tf.name_scope('decoder_5'):
            next_state_out = tf.layers.conv2d(
                drp9,
                2,
                kernel_size=(3, 3),
                strides=(1, 1),
                kernel_initializer=tf.contrib.layers.xavier_initializer(),
                padding='SAME')
            next_state_out_softmax = tf.nn.softmax(next_state_out)

        if self.config.predict_reward:
            with tf.name_scope('reward_flatten'):
                flattened_drp7 = tf.contrib.layers.flatten(drp7)

            with tf.name_scope('reward_hidden_layer_2'):
                h7_2 = tf.layers.dense(
                    flattened_drp7,
                    128,
                    kernel_initializer=tf.contrib.layers.xavier_initializer())
                drp7_2 = tf.layers.dropout(tf.nn.relu(h7_2),
                                           rate=self.config.dropout_rate,
                                           training=self.is_training,
                                           name='dropout')

            with tf.name_scope('reward_output_layer'):
                reward_out = tf.layers.dense(
                    drp7_2,
                    1,
                    activation=None,
                    kernel_initializer=tf.contrib.layers.xavier_initializer())
        else:
            reward_out = None

        return next_state_out, next_state_out_softmax, reward_out, lstm_new_state
Exemplo n.º 14
0
    def init_network(self):
        """
        Building the Network here
        :return:
        """

        # Init a VGG16 as an encoder
        self.encoder = VGG16(x_input=self.x_pl,
                             num_classes=self.params.num_classes,
                             pretrained_path=self.args.pretrained_path,
                             train_flag=self.is_training,
                             reduced_flag=False,
                             weight_decay=self.args.weight_decay)

        # Build Encoding part
        self.encoder.build()

        # Build Decoding part
        with tf.name_scope('dilation_2'):
            self.conv4_3_dil = conv2d('conv4_3_dil',
                                      x=self.encoder.conv4_2,
                                      num_filters=512,
                                      kernel_size=(3, 3),
                                      activation=tf.nn.relu,
                                      l2_strength=self.encoder.wd,
                                      is_training=self.is_training)

            self.conv5_1_dil = atrous_conv2d('conv5_1_dil',
                                             x=self.conv4_3_dil,
                                             num_filters=512,
                                             kernel_size=(3, 3),
                                             dilation_rate=2,
                                             activation=tf.nn.relu,
                                             l2_strength=self.encoder.wd,
                                             is_training=self.is_training)

            self.conv5_2_dil = atrous_conv2d('conv5_2_dil',
                                             x=self.conv5_1_dil,
                                             num_filters=512,
                                             kernel_size=(3, 3),
                                             dilation_rate=2,
                                             activation=tf.nn.relu,
                                             l2_strength=self.encoder.wd,
                                             is_training=self.is_training)

            self.conv5_3_dil = atrous_conv2d('conv5_3_dil',
                                             x=self.conv5_2_dil,
                                             num_filters=512,
                                             kernel_size=(3, 3),
                                             dilation_rate=2,
                                             activation=tf.nn.relu,
                                             l2_strength=self.encoder.wd,
                                             is_training=self.is_training)

            self.fc6_dil = atrous_conv2d('fc6_dil',
                                         x=self.conv5_3_dil,
                                         num_filters=1024,
                                         kernel_size=(7, 7),
                                         dilation_rate=4,
                                         activation=tf.nn.relu,
                                         l2_strength=self.encoder.wd,
                                         dropout_keep_prob=0.5,
                                         is_training=self.is_training)

            self.fc7_dil = conv2d('fc7_dil',
                                  x=self.fc6_dil,
                                  num_filters=1024,
                                  kernel_size=(1, 1),
                                  activation=tf.nn.relu,
                                  dropout_keep_prob=0.5,
                                  l2_strength=self.encoder.wd,
                                  is_training=self.is_training)

            self.score_fr = conv2d('score_fr_dil',
                                   x=self.fc7_dil,
                                   num_filters=self.params.num_classes,
                                   kernel_size=(1, 1),
                                   l2_strength=self.encoder.wd,
                                   is_training=self.is_training)

        with tf.name_scope('upscore_8s'):
            self.upscore8 = conv2d_transpose(
                'upscore8',
                x=self.score_fr,
                output_shape=self.x_pl.shape.as_list()[0:3] +
                [self.params.num_classes],
                kernel_size=(16, 16),
                stride=(8, 8),
                l2_strength=self.encoder.wd)

        self.logits = self.upscore8