示例#1
0
    def build_models(self, image):
        with tf.variable_scope('img_discriminator'):
            nf = 64
            self.img = image  # size 64, 64, 3

            # layer 1
            self.l1 = op.conv2d(self.img, nf, name='l1')
            self.l1 = op.lrelu(self.l1)
            # self.l1_bn = op.batch_norm(name='l1_bn0')
            # self.l1 = op.lrelu(self.l1_bn(self.l1, train=self.train))

            # layer 2
            self.l2 = op.conv2d(self.l1, nf * 2, name='l2')
            self.l2_bn = op.batch_norm(name='l2_bn0')
            self.l2 = op.lrelu(self.l2_bn(self.l2, train=self.train))

            # layer 3
            self.l3 = op.conv2d(self.l2, nf * 4, name='l3')
            self.l3_bn = op.batch_norm(name='l3_bn0')
            self.l3 = op.lrelu(self.l3_bn(self.l3, train=self.train))

            # layer 4
            self.l4 = op.conv2d(self.l3, nf * 8, name='l4')
            self.l4_bn = op.batch_norm(name='l4_bn0')
            self.l4 = op.lrelu(self.l4_bn(self.l4, train=self.train))

            # layer 6, actually it is different from the original paper..
            self.score = op.linear(tf.reshape(self.l4, [self.batch_size, -1]),
                                   1, 'final')
        return
示例#2
0
    def discriminator(self, image, is_training, reuse=False):
        with tf.variable_scope("discriminator"):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            # [batch,256,256,1] -> [batch,128,128,64]
            h0 = lrelu(conv2d(image, self.discriminator_dim,
                              scope="d_h0_conv"))
            # [batch,128,128,64] -> [batch,64,64,64*2]
            h1 = lrelu(
                batch_norm(conv2d(h0,
                                  self.discriminator_dim * 2,
                                  scope="d_h1_conv"),
                           is_training,
                           scope="d_bn_1"))
            # [batch,64,64,64*2] -> [batch,32,32,64*4]
            h2 = lrelu(
                batch_norm(conv2d(h1,
                                  self.discriminator_dim * 4,
                                  scope="d_h2_conv"),
                           is_training,
                           scope="d_bn_2"))
            # [batch,32,32,64*4] -> [batch,31,31,64*8]
            h3 = lrelu(
                batch_norm(conv2d(h2,
                                  self.discriminator_dim * 8,
                                  sh=1,
                                  sw=1,
                                  scope="d_h3_conv"),
                           is_training,
                           scope="d_bn_3"))

            # real or fake binary loss
            fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 1, scope="d_fc1")

            return tf.sigmoid(fc1), fc1
示例#3
0
    def discriminator(self, image, is_training, reuse=False):
        with tf.variable_scope("discriminator"):
            if reuse:
                tf.get_variable_scope().reuse_variables()
            h0 = lrelu(conv2d(image, self.discriminator_dim,
                              scope="d_h0_conv"))
            h1 = lrelu(
                batch_norm(conv2d(h0,
                                  self.discriminator_dim * 2,
                                  scope="d_h1_conv"),
                           is_training,
                           scope="d_bn_1"))
            h2 = lrelu(
                batch_norm(conv2d(h1,
                                  self.discriminator_dim * 4,
                                  scope="d_h2_conv"),
                           is_training,
                           scope="d_bn_2"))
            h3 = lrelu(
                batch_norm(conv2d(h2,
                                  self.discriminator_dim * 8,
                                  scope="d_h3_conv"),
                           is_training,
                           scope="d_bn_3"))
            # h4 = lrelu(batch_norm(conv2d(h3, self.discriminator_dim * 8, scope="d_h4_conv"),
            #                       is_training, scope="d_bn_4"))
            # h5 = lrelu(batch_norm(conv2d(h4, self.discriminator_dim * 8, sh=1, sw=1, scope="d_h5_conv"),
            #                       is_training, scope="d_bn_5"))
            # real or fake binary loss
            fc1 = fc(tf.reshape(h3, [self.batch_size, -1]), 8, scope="d_fc1")
            fc2 = fc(fc1, 1, scope="d_fc2")

            return tf.nn.sigmoid(fc2), fc2
示例#4
0
    def build_models(self, image, sentence_vec):
        with tf.variable_scope('img_discriminator'):
            self.img = image  # size 64, 64, 3
            self.sentence_vec = op.lrelu(
                op.linear(sentence_vec, 128,
                          'conditional_vec'))  # size [batch, 128]

            # set the size of each layer first, we have four conv layer
            l1_h, l1_w, l1_c = 32, 32, 64
            l2_h, l2_w, l2_c = 16, 16, 128
            l3_h, l3_w, l3_c = 8, 8, 256
            l4_h, l4_w, l4_c = 4, 4, 512

            # layer 1
            self.l1 = op.conv2d(self.img, l1_c, name='l1')
            self.l1 = op.lrelu(self.l1)
            # self.l1_bn = op.batch_norm(name='l1_bn0')
            # self.l1 = op.lrelu(self.l1_bn(self.l1, train=self.train))

            # layer 2
            self.l2 = op.conv2d(self.l1, l2_c, name='l2')
            self.l2_bn = op.batch_norm(name='l2_bn0')
            self.l2 = op.lrelu(self.l2_bn(self.l2, train=self.train))

            # layer 3
            self.l3 = op.conv2d(self.l2, l3_c, name='l3')
            self.l3_bn = op.batch_norm(name='l3_bn0')
            self.l3 = op.lrelu(self.l3_bn(self.l3, train=self.train))

            # layer 4
            self.l4 = op.conv2d(self.l3, l4_c, name='l4')
            self.l4_bn = op.batch_norm(name='l4_bn0')
            self.l4 = op.lrelu(self.l4_bn(self.l4, train=self.train))

            # now self.l4 is size 4, 4, 512, we try to connect the text info
            self.sentence_vec = tf.expand_dims(self.sentence_vec, 1)
            self.sentence_vec = tf.expand_dims(self.sentence_vec, 2)
            # batch, 1, 1, 128 to batch, 4, 4, 128
            self.sentence_vec = tf.tile(self.sentence_vec, [1, 4, 4, 1])

            self.l4 = tf.concat(3, [self.l4, self.sentence_vec])

            # layer 5
            self.l5 = op.conv2d(self.l4, l4_c, 1, 1, 1, 1, name='l5')
            self.l5_bn = op.batch_norm(name='l5_bn0')
            self.l5 = op.lrelu(self.l5_bn(self.l5, train=self.train))

            # layer 6, actually it is different from the original paper..
            self.score = op.linear(tf.reshape(self.l5, [self.batch_size, -1]),
                                   1, 'final')

        return
示例#5
0
    def encoder(self, images, is_training, reuse=False):
        with tf.variable_scope("generator"):
            if reuse:
                tf.get_variable_scope().reuse_variables()

            encode_layers = dict()

            def encode_layer(x, output_filters, layer):
                act = lrelu(x)
                conv = conv2d(act,
                              output_filters=output_filters,
                              scope="g_e%d_conv" % layer)
                enc = batch_norm(conv, is_training, scope="g_e%d_bn" % layer)
                encode_layers["e%d" % layer] = enc
                return enc

            e1 = conv2d(images, self.generator_dim, scope="g_e1_conv")
            encode_layers["e1"] = e1
            e2 = encode_layer(e1, self.generator_dim * 2, 2)
            e3 = encode_layer(e2, self.generator_dim * 4, 3)
            e4 = encode_layer(e3, self.generator_dim * 8, 4)
            e5 = encode_layer(e4, self.generator_dim * 8, 5)
            e6 = encode_layer(e5, self.generator_dim * 8, 6)
            e7 = encode_layer(e6, self.generator_dim * 8, 7)
            e8 = encode_layer(e7, self.generator_dim * 8, 8)

            return e8, encode_layers
示例#6
0
 def encode_layer(x, output_filters, layer):
     act = lrelu(x)
     conv = conv2d(act,
                   output_filters=output_filters,
                   scope="d_e%d_conv" % layer)
     enc = batch_norm(conv, is_training, scope="d_e%d_bn" % layer)
     return enc
示例#7
0
    def discriminator(self, images, is_training, reuse=False):
        with tf.variable_scope("discriminator"):
            if reuse:
                tf.get_variable_scope().reuse_variables()

            def encode_layer(x, output_filters, layer):
                act = lrelu(x)
                conv = conv2d(act,
                              output_filters=output_filters,
                              scope="d_e%d_conv" % layer)
                enc = batch_norm(conv, is_training, scope="d_e%d_bn" % layer)
                return enc

            # Encoder layers
            e1 = conv2d(images, self.generator_dim, scope="d_e1_conv")
            e2 = encode_layer(e1, self.generator_dim * 2, 2)
            e3 = encode_layer(e2, self.generator_dim * 4, 3)
            e4 = encode_layer(e3, self.generator_dim * 8, 4)
            e5 = encode_layer(e4, self.generator_dim * 8, 5)
            e6 = encode_layer(e5, self.generator_dim * 8, 6)
            e7 = encode_layer(e6, self.generator_dim * 8, 7)
            e8 = encode_layer(e7, self.generator_dim * 8, 8)

            # Decoder layers
            s = self.output_width
            s2, s4, s8, s16, s32, s64, s128 = int(s / 2), int(s / 4), int(
                s / 8), int(s / 16), int(s / 32), int(s / 64), int(s / 128)

            def decode_layer(x,
                             output_width,
                             output_filters,
                             layer,
                             dropout=False):
                dec = deconv2d(tf.nn.relu(x), [
                    self.batch_size, output_width, output_width, output_filters
                ],
                               scope="d_d%d_deconv" % layer)
                if layer != 8:
                    dec = batch_norm(dec,
                                     is_training,
                                     scope="d_d%d_bn" % layer)
                if dropout:
                    dec = tf.nn.dropout(dec, 0.5)
                return dec

            d1 = decode_layer(e8,
                              s128,
                              self.generator_dim * 8,
                              layer=1,
                              dropout=True)
            d2 = decode_layer(d1,
                              s64,
                              self.generator_dim * 8,
                              layer=2,
                              dropout=True)
            d3 = decode_layer(d2,
                              s32,
                              self.generator_dim * 8,
                              layer=3,
                              dropout=True)
            d4 = decode_layer(d3, s16, self.generator_dim * 8, layer=4)
            d5 = decode_layer(d4, s8, self.generator_dim * 4, layer=5)
            d6 = decode_layer(d5, s4, self.generator_dim * 2, layer=6)
            d7 = decode_layer(d6, s2, self.generator_dim, layer=7)
            d8 = decode_layer(d7, s, self.output_filters, layer=8)

            output = tf.nn.tanh(d8)  # scale to (-1, 1)
            return output
示例#8
0
    def build_model(self,
                    train_input_frames,
                    train_target_frames,
                    test_input_frames,
                    test_target_frames,
                    ):
        self.scale_channels = self.info['MODEL_PARAMETER_G']['SCALE_CHANNELS']
        self.scale_kernel_sizes = self.info['MODEL_PARAMETER_G']['SCALE_KERNEL_SIZES']
        self.num_scale_nets = len(self.scale_channels)
        with tf.variable_scope(self.gen_name):
            with tf.variable_scope(self.gen_name + '_data'):
                train_height = train_input_frames.get_shape().as_list()[1]
                train_width = train_target_frames.get_shape().as_list()[2]
                test_height = test_input_frames.get_shape().as_list()[1]
                test_width = test_target_frames.get_shape().as_list()[2]
            train_scale_preds = []
            train_scale_targets = []
            test_scale_preds = []
            test_scale_targets = []
            for scale_num in range(self.num_scale_nets):
                with tf.variable_scope(self.gen_name + '_scale' + str(scale_num)):
                    with tf.variable_scope(self.gen_name + '_scale' + str(scale_num) + 'convolution'):
                        scale_factor = 1. / 2 ** ((self.num_scale_nets - 1) - scale_num)
                        scale_train_height = int(train_height * scale_factor)
                        scale_train_width = int(train_width * scale_factor)
                        scale_test_height = int(test_height * scale_factor)
                        scale_test_width = int(test_width * scale_factor)
                        scale_train_input = tf.image.resize_images(train_input_frames,
                                                             [scale_train_height, scale_train_width])
                        scale_test_input = tf.image.resize_images(test_input_frames,
                                                                   [scale_test_height, scale_test_width])
                        scale_train_target = tf.image.resize_images(train_target_frames,
                                                              [scale_train_height, scale_train_width])
                        scale_test_target = tf.image.resize_images(test_target_frames,
                                                              [scale_test_height, scale_test_width])




                        if scale_num > 0:
                            last_scale_train_pred = train_scale_preds[-1]
                            last_scale_test_pred = test_scale_preds[-1]
                            last_gen_train_frames = tf.image.resize_images(last_scale_train_pred,
                                                                     [scale_train_height, scale_train_width])
                            last_gen_test_frames = tf.image.resize_images(last_scale_test_pred,
                                                                           [scale_test_height, scale_test_width])


                            scale_train_input = tf.concat([scale_train_input, last_gen_train_frames], 3)
                            scale_test_input = tf.concat([scale_test_input, last_gen_test_frames], 3)
                        else:
                            last_scale_train_pred = None
                            last_scale_test_pred = None

                        for i in range(len(self.scale_kernel_sizes[scale_num])):
                            if i == len(self.scale_kernel_sizes[scale_num]) - 1:
                                scale_train_input,scale_test_input = ops.conv2d(
                                    input=scale_train_input,
                                    test_input = scale_test_input,
                                    filter_size=[
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_channels[scale_num][i],
                                        self.scale_channels[scale_num][i + 1]
                                    ],
                                    b_size=[self.scale_channels[scale_num][i + 1]],
                                    strides=[1, 1, 1, 1],
                                    padding='SAME',
                                    dtype=tf.float32,
                                    activate='tanh'
                                )
                            else:
                                scale_train_input, scale_test_input = ops.conv2d(
                                    input=scale_train_input,
                                    test_input=scale_test_input,
                                    filter_size=[
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_kernel_sizes[scale_num][i],
                                        self.scale_channels[scale_num][i],
                                        self.scale_channels[scale_num][i + 1]
                                    ],
                                    b_size=[self.scale_channels[scale_num][i + 1]],
                                    strides=[1, 1, 1, 1],
                                    padding='SAME',
                                    dtype=tf.float32,
                                    activate='leaky_relu'
                                )

                        scale_train_pred = scale_train_input
                        scale_test_pred = scale_test_input

                        train_scale_preds.append(scale_train_pred)
                        test_scale_preds.append(scale_test_pred)
                        train_scale_targets.append(scale_train_target)
                        test_scale_targets.append(scale_test_target)

            return train_scale_preds, train_scale_targets,test_scale_preds,test_scale_targets