예제 #1
0
파일: model_gan.py 프로젝트: huawang123/GAN
def generator(z, is_training=True, reuse=False):
    # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657)
    with tf.variable_scope("generator", reuse=reuse):
        net = tf.nn.relu(
            bn(linear(z, 1024, scope='g_fc1'),
               is_training=is_training,
               scope='linear1'))
        net = tf.nn.relu(
            bn(linear(net, 128 * 7 * 7, scope='g_fc2'),
               is_training=is_training,
               scope='linear2'))
        net = tf.reshape(net, [-1, 7, 7, 128])
        batch_size = net.get_shape().as_list()[0]
        net = deconv2d(net,
                       output_size=14,
                       output_channel=64,
                       kernel=(4, 4),
                       stride=(2, 2),
                       activation='relu',
                       use_bn=True,
                       is_training=True,
                       name='d_conv1')
        out = deconv2d(net,
                       output_size=28,
                       output_channel=1,
                       kernel=(4, 4),
                       stride=(2, 2),
                       activation='sigmoid',
                       name='gen_images')
        return out
예제 #2
0
 def generator(self, z):
     s_h, s_w = self.image_h, self.image_w
     s_h2, s_w2 = utils.compute_size(s_h, 2), utils.compute_size(s_w, 2)
     s_h4, s_w4 = utils.compute_size(s_h2, 2), utils.compute_size(s_w2, 2)
     s_h8, s_w8 = utils.compute_size(s_h4, 2), utils.compute_size(s_w4, 2)
     s_h16, s_w16 = utils.compute_size(s_h8, 2), utils.compute_size(s_w8, 2)
     fmap_dim = self.fmap_dim_g
     batch_size = self.batch_size
     with tf.variable_scope("generator") as scope:
         z_ = utils.fc(z, s_h16 * s_w16 * 8 * fmap_dim, name='g_l0_fc')
         gl0 = utils.lrelu(
             self.g_bn_l0(
                 tf.reshape(z_, [batch_size, s_h16, s_w16, fmap_dim * 8])))
         gl1 = utils.lrelu(
             self.g_bn_l1(
                 utils.deconv2d(gl0, [batch_size, s_h8, s_w8, fmap_dim * 4],
                                name='g_l1_deconv')))
         gl2 = utils.lrelu(
             self.g_bn_l2(
                 utils.deconv2d(gl1, [batch_size, s_h4, s_w4, fmap_dim * 2],
                                name='g_l2_deconv')))
         gl3 = utils.lrelu(
             self.g_bn_l3(
                 utils.deconv2d(gl2, [batch_size, s_h2, s_w2, fmap_dim * 1],
                                name='g_l3_deconv')))
         gl4 = utils.deconv2d(gl3, [batch_size, s_h, s_w, 3],
                              name='g_l4_deconv')
     return tf.nn.tanh(gl4)
예제 #3
0
    def generator(self, noise, caption):
        s = self.image_size
        s2, s4, s8, s16 = int(s / 2), int(s / 4), int(s / 8), int(s / 16)

        reduced_caption = utils.lrelu(
            utils.linear(caption, self.reduced_text_dim, 'g_embedding'))
        noise_concat = tf.concat([noise, reduced_caption], 1)
        new_noise = utils.linear(noise_concat,
                                 self.channel_dim * 8 * s16 * s16, 'g_h0_lin')

        h0 = tf.reshape(new_noise, [-1, s16, s16, self.channel_dim * 8])
        h0 = tf.nn.relu(self.g_bn0(h0))

        h1 = utils.deconv2d(h0,
                            [self.batch_size, s8, s8, self.channel_dim * 4],
                            name='g_h1')
        h1 = tf.nn.relu(self.g_bn1(h1))

        h2 = utils.deconv2d(h1,
                            [self.batch_size, s4, s4, self.channel_dim * 2],
                            name='g_h2')
        h2 = tf.nn.relu(self.g_bn2(h2))

        h3 = utils.deconv2d(h2, [self.batch_size, s2, s2, self.channel_dim],
                            name='g_h3')
        h3 = tf.nn.relu(self.g_bn3(h3))

        h4 = utils.deconv2d(h3, [self.batch_size, s, s, 3], name='g_h4')

        return (tf.tanh(h4) / 2. + 0.5)
예제 #4
0
 def __init__(self,
              n_channels,
              n_channels_sm,
              n_branches,
              ksize,
              fmap_size,
              use_batchnorm=False):
     super(DecSwitchedDeconv, self).__init__()
     self.n_branches = n_branches
     self.deconvs = nn.ModuleList([
         nn.Sequential(
             U.deconv2d(
                 n_channels, n_channels_sm, ksize, 1, out_h_or_w=fmap_size),
             nn.BatchNorm2d(n_channels_sm), nn.ReLU(),
             U.deconv2d(
                 n_channels_sm, n_channels, ksize, 1, out_h_or_w=fmap_size),
             nn.BatchNorm2d(n_channels))
         if use_batchnorm else nn.Sequential(
             U.deconv2d(
                 n_channels, n_channels_sm, ksize, 1, out_h_or_w=fmap_size),
             nn.ReLU(),
             U.deconv2d(
                 n_channels_sm, n_channels, ksize, 1, out_h_or_w=fmap_size))
         for _ in range(n_branches)
     ])
예제 #5
0
    def generator(self, cond):
        with tf.variable_scope("gen"):
            feature = conf.conv_channel_base
            e1 = conv2d(cond, feature, name="e1")
            e2 = batch_norm(conv2d(lrelu(e1), feature*2, name="e2"), "e2")
            e3 = batch_norm(conv2d(lrelu(e2), feature*4, name="e3"), "e3")
            e4 = batch_norm(conv2d(lrelu(e3), feature*8, name="e4"), "e4")
            e5 = batch_norm(conv2d(lrelu(e4), feature*8, name="e5"), "e5")
            e6 = batch_norm(conv2d(lrelu(e5), feature*8, name="e6"), "e6")
            e7 = batch_norm(conv2d(lrelu(e6), feature*8, name="e7"), "e7")
            e8 = batch_norm(conv2d(lrelu(e7), feature*8, name="e8"), "e8")

            size = conf.img_size
            num = [0] * 9
            for i in range(1,9):
                num[9-i]=size
                size =(size+1)/2

            d1 = deconv2d(tf.nn.relu(e8), [1,num[1],num[1],feature*8], name="d1")
            d1 = tf.concat(3, [tf.nn.dropout(batch_norm(d1, "d1"), 0.5), e7])
            d2 = deconv2d(tf.nn.relu(d1), [1,num[2],num[2],feature*8], name="d2")
            d2 = tf.concat(3, [tf.nn.dropout(batch_norm(d2, "d2"), 0.5), e6])
            d3 = deconv2d(tf.nn.relu(d2), [1,num[3],num[3],feature*8], name="d3")
            d3 = tf.concat(3, [tf.nn.dropout(batch_norm(d3, "d3"), 0.5), e5]) 
            d4 = deconv2d(tf.nn.relu(d3), [1,num[4],num[4],feature*8], name="d4")
            d4 = tf.concat(3, [batch_norm(d4, "d4"), e4])
            d5 = deconv2d(tf.nn.relu(d4), [1,num[5],num[5],feature*4], name="d5")
            d5 = tf.concat(3, [batch_norm(d5, "d5"), e3]) 
            d6 = deconv2d(tf.nn.relu(d5), [1,num[6],num[6],feature*2], name="d6")
            d6 = tf.concat(3, [batch_norm(d6, "d6"), e2])
            d7 = deconv2d(tf.nn.relu(d6), [1,num[7],num[7],feature], name="d7")
            d7 = tf.concat(3, [batch_norm(d7, "d7"), e1]) 
            d8 = deconv2d(tf.nn.relu(d7), [1,num[8],num[8],conf.img_channel], name="d8")

            return tf.nn.tanh(d8)
예제 #6
0
    def generator(self, cond):
        with tf.variable_scope("gen"):
            feature = conf.conv_channel_base
            e1 = conv2d(cond, feature, name="e1")
            e2 = batch_norm(conv2d(lrelu(e1), feature*2, name="e2"), "e2")
            e3 = batch_norm(conv2d(lrelu(e2), feature*4, name="e3"), "e3")
            e4 = batch_norm(conv2d(lrelu(e3), feature*8, name="e4"), "e4")
            e5 = batch_norm(conv2d(lrelu(e4), feature*8, name="e5"), "e5")
            e6 = batch_norm(conv2d(lrelu(e5), feature*8, name="e6"), "e6")
            e7 = batch_norm(conv2d(lrelu(e6), feature*8, name="e7"), "e7")
            e8 = batch_norm(conv2d(lrelu(e7), feature*8, name="e8"), "e8")

            d1 = deconv2d(tf.nn.relu(e8), [1,2,2,feature*8], name="d1")
            d1 = tf.concat(3, [tf.nn.dropout(batch_norm(d1, "d1"), 0.5), e7])
            d2 = deconv2d(tf.nn.relu(d1), [1,4,4,feature*8], name="d2")
            d2 = tf.concat(3, [tf.nn.dropout(batch_norm(d2, "d2"), 0.5), e6])
            d3 = deconv2d(tf.nn.relu(d2), [1,8,8,feature*8], name="d3")
            d3 = tf.concat(3, [tf.nn.dropout(batch_norm(d3, "d3"), 0.5), e5])
            d4 = deconv2d(tf.nn.relu(d3), [1,16,16,feature*8], name="d4")
            d4 = tf.concat(3, [batch_norm(d4, "d4"), e4])
            d5 = deconv2d(tf.nn.relu(d4), [1,32,32,feature*4], name="d5")
            d5 = tf.concat(3, [batch_norm(d5, "d5"), e3])
            d6 = deconv2d(tf.nn.relu(d5), [1,64,64,feature*2], name="d6")
            d6 = tf.concat(3, [batch_norm(d6, "d6"), e2])
            d7 = deconv2d(tf.nn.relu(d6), [1,128,128,feature], name="d7")
            d7 = tf.concat(3, [batch_norm(d7, "d7"), e1])
            d8 = deconv2d(tf.nn.relu(d7), [1,256,256,conf.img_channel], name="d8")

            return tf.nn.tanh(d8)
예제 #7
0
    def __init__(self, factors):
        super(Decoder, self).__init__()
        self.factors = factors

        self.factor_embeds = nn.ParameterDict({
            'color': nn.Parameter(torch.randn(self.factors['color'], N_FACTOR_DIMS)),
            'shape': nn.Parameter(torch.randn(self.factors['shape'], N_FACTOR_DIMS)),
            'size': nn.Parameter(torch.randn(self.factors['size'], N_FACTOR_DIMS)),
            'camera': nn.Parameter(torch.randn(self.factors['camera'], N_FACTOR_DIMS)),
            'background': nn.Parameter(torch.randn(self.factors['background'], N_FACTOR_DIMS)),
            'horizontal': nn.Parameter(torch.randn(self.factors['horizontal'], N_FACTOR_DIMS)),
            'vertical': nn.Parameter(torch.randn(self.factors['vertical'], N_FACTOR_DIMS))
        })

        n_dims = N_EMBED_DIMS + N_FACTOR_DIMS
        self.input_color = nn.Linear(n_dims, 512)
        self.input_shape = nn.Linear(n_dims, 512)
        self.input_size = nn.Linear(n_dims, 512)
        self.path_col_shp_siz = nn.Sequential(nn.ReLU(), nn.Linear(512, 1024))

        self.input_horizontal = nn.Linear(n_dims, 512)
        self.input_vertical = nn.Linear(n_dims, 512)
        self.path_hor_ver = nn.Sequential(nn.ReLU(), nn.Linear(512, 1024))

        self.input_camera = nn.Sequential(nn.Linear(n_dims, 512), nn.ReLU(), nn.Linear(512, 1024))
        self.input_background = nn.Sequential(nn.Linear(n_dims, 512), nn.ReLU(), nn.Linear(512, 1024))

        self.path_shallow = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024),
                                          U.Lambda(lambda x: x.reshape(-1, 16, 8, 8)),  # 16 x 8 x 8
                                          nn.ReLU(), U.deconv2d(16, 64, 1, 1, True, 8))  # 64 x 8 x 8
        self.path_deep = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024),
                                       U.Lambda(lambda x: x.reshape(-1, 64, 4, 4)),  # 64 x 4 x 4
                                       nn.ReLU(), U.deconv2d(64, 64, 4, 1, True, 4),  # 64 x 4 x 4
                                       nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 8))  # 64 x 8 x 8
        self.path_base = nn.Sequential(nn.ReLU(), U.deconv2d(64, 16, 4, 2, True, 16),  # 16 x 16 x 16
                                       nn.ReLU(), U.deconv2d(16, 3, 6, 4, True, 64))  # 3 x 64 x 64

        self.path_shallow2 = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024),
                                           U.Lambda(lambda x: x.reshape(-1, 16, 8, 8)),  # 16 x 8 x 8
                                           nn.ReLU(), U.deconv2d(16, 64, 2, 2, True, 16))  # 64 x 16 x 16
        self.path_deep2 = nn.Sequential(nn.ReLU(), nn.Linear(1024, 1024),
                                        U.Lambda(lambda x: x.reshape(-1, 64, 4, 4)),  # 64 x 4 x 4
                                        nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 8),  # 64 x 8 x 8
                                        nn.ReLU(), U.deconv2d(64, 64, 4, 2, True, 16))  # 64 x 16 x 16
        self.path_base2 = nn.Sequential(nn.ReLU(), U.deconv2d(64, 16, 2, 2, True, 32),  # 16 x 32 x 32
                                        nn.ReLU(), U.deconv2d(16, 3, 2, 2, True, 64))  # 3 x 64 x 64
예제 #8
0
    def set_up(self):

        with tf.variable_scope('conv1'):
            network = conv2d(self.input, [7, 7], 32, scope='conv1_1')
            network = conv2d(network, [3, 3], 32, scope='conv1_2')
            network = max_pool(network, 'pool1')  # downsample

        with tf.variable_scope('conv1'):
            network = conv2d(network, [3, 3], 64, scope='conv1_1')
            network = conv2d(network, [3, 3], 64, scope='conv1_2')
            network = max_pool(network, 'pool2')  # downsample

        with tf.variable_scope('conv1'):
            network = conv2d(network, [3, 3], 128, scope='conv1_1')
            network = conv2d(network, [3, 3], 128, scope='conv1_2')

        with tf.variable_scope('deconv1'):
            network = deconv2d(network, [3, 3], 64,
                               scope='deconv1_1')  # upsample
            network = deconv2d(network, [3, 3],
                               64,
                               stride=1,
                               scope='deconv1_1')

        with tf.variable_scope('deconv2'):
            network = deconv2d(network, [3, 3], 32,
                               scope='deconv1_1')  # upsample
            network = deconv2d(network, [3, 3],
                               32,
                               stride=1,
                               scope='deconv1_1')

        with tf.variable_scope('out_class'):
            logits = conv2d(network, [3, 3],
                            2,
                            bn=False,
                            relu=False,
                            scope='logits')

        self.pred_prob = tf.nn.softmax(logits, name='predictions')[:, :, :, 1]
        self.pred = tf.argmax(logits, 3)
        self.loss = iou_loss(self.pred_prob, self.label)
        self.train_score = iou_loss(tf.cast(self.pred, tf.float32), self.label)
        self.optimizer = tf.train.AdamOptimizer(
            learning_rate=self.learning_rate, epsilon=1e-4).minimize(self.loss)
예제 #9
0
    def generator(self, cond):
        with tf.variable_scope("gen"):
            e1 = batch_norm(conv2d(cond, 64, f=4, name="e1"),
                            'e1')  ##128x128x64
            e10 = tf.nn.elu(e1)
            e1a = Identity_block_for_G(e10, [16, 16, 64], stage='Gstge1a')
            e1b = Identity_block_for_G(e1a, [16, 16, 64], stage='Gstge1b')
            e1c = Identity_block_for_G(e1b, [16, 16, 64], stage='Gstge1c')
            e2 = batch_norm(conv2d(e1c, 128, f=4, name="e2"), "e2")  #64x64x128
            e20 = tf.nn.elu(e2)
            e2a = Identity_block_for_G(e20, [32, 32, 128], stage='Gstge2a')
            e2b = Identity_block_for_G(e2a, [32, 32, 128], stage='Gstge2b')
            e2c = Identity_block_for_G(e2b, [32, 32, 128], stage='Gstge2c')
            e3 = batch_norm(conv2d(e2c, 256, f=4, name="e3"), "e3")  #32x32x256
            e30 = tf.nn.elu(e3)
            e3a = Identity_block_for_G(e30, [64, 64, 256], stage='Gstge3a')
            e3b = Identity_block_for_G(e3a, [64, 64, 256], stage='Gstge3b')
            e3c = Identity_block_for_G(e3b, [64, 64, 256], stage='Gstge3c')
            e4 = batch_norm(conv2d(e3c, 512, f=4, name="e4"), "e4")  #16x16x512
            e40 = tf.nn.elu(e4)
            e4a = Identity_block_for_G(e40, [128, 128, 512], stage='Gstge4a')
            e4b = Identity_block_for_G(e4a, [128, 128, 512], stage='Gstge4b')
            e4c = Identity_block_for_G(e4b, [128, 128, 512], stage='Gstge4c')

            e5 = batch_norm(conv2d(e4c, 512, f=4, name="e5"), "e5")  #8x8x512
            e50 = tf.nn.elu(e5)
            e5a = Identity_block_for_G(e50, [128, 128, 512], stage='Gstge5a')
            e5b = Identity_block_for_G(e5a, [128, 128, 512], stage='Gstge5b')
            e6 = batch_norm(conv2d(e5b, 512, f=4, name="e6"), "e6")  #4x4x512
            e60 = tf.nn.elu(e6)

            d1 = batch_norm(deconv2d(e60, [1, 8, 8, 512], name="d1"), 'd1')
            d10 = tf.nn.elu(tf.add(d1, e5))
            d1a = Identity_block_for_G(d10, [128, 128, 512], stage='Gstge6a')
            d1b = Identity_block_for_G(d1a, [128, 128, 512], stage='Gstge6b')

            d2 = batch_norm(deconv2d(d1b, [1, 16, 16, 512], name="d2"), 'd2')
            d20 = tf.nn.elu(tf.add(e4, d2))
            d2a = Identity_block_for_G(d20, [128, 128, 512], stage='Gstge7a')
            d2b = Identity_block_for_G(d2a, [128, 128, 512], stage='Gstge7b')
            d2c = Identity_block_for_G(d2b, [128, 128, 512], stage='Gstge7c')
            d3 = batch_norm(deconv2d(d2c, [1, 32, 32, 256], name="d3"), 'd3')
            d30 = tf.nn.elu(tf.add(e3, d3))
            d3a = Identity_block_for_G(d30, [64, 64, 256], stage='Gstge8a')
            d3b = Identity_block_for_G(d3a, [64, 64, 256], stage='Gstge8b')
            d3c = Identity_block_for_G(d3b, [64, 64, 256], stage='Gstge8c')
            d4 = batch_norm(deconv2d(d3c, [1, 64, 64, 128], name="d4"), 'd4')
            d40 = tf.nn.elu(tf.add(e2, d4))
            d4a = Identity_block_for_G(d40, [32, 32, 128], stage='Gstge9a')
            d4b = Identity_block_for_G(d4a, [32, 32, 128], stage='Gstge9b')
            d4c = Identity_block_for_G(d4b, [32, 32, 128], stage='Gstge9c')
            d5 = batch_norm(deconv2d(d4c, [1, 128, 128, 64], name="d5"), 'd5')
            d50 = tf.nn.elu(tf.add(e1, d5))
            d5a = Identity_block_for_G(d50, [16, 16, 64], stage='Gstge10a')
            d5b = Identity_block_for_G(d5a, [16, 16, 64], stage='Gstge10b')
            d5c = Identity_block_for_G(d5b, [16, 16, 64], stage='Gstge10c')
            d6 = deconv2d(d5c, [1, 256, 256, 1], name="d6")

            return tf.tanh(d6)
예제 #10
0
    def __call__(self, z, y=None, is_training=True, reuse=False):
        with tf.variable_scope(self.name, reuse=reuse):
            batch_size = z.get_shape().as_list()[0]
            if y is not None:
                z = tf.concat([z, y], 1)  # [bz,zdim+10]

            net = tf.nn.relu(
                bn(dense(z, 1024, name='g_fc1'), is_training, name='g_bn1'))
            net = tf.nn.relu(
                bn(dense(net, 128 * 7 * 7, name='g_fc2'),
                   is_training,
                   name='g_bn2'))
            net = tf.reshape(net, [batch_size, 7, 7, 128])
            # [bz, 14, 14, 64]
            net = tf.nn.relu(
                bn(deconv2d(net, 64, 4, 4, 2, 2, padding='SAME', name='g_dc3'),
                   is_training,
                   name='g_bn3'))
            # [bz, 28, 28, 1]
            out = tf.nn.sigmoid(
                deconv2d(net, 1, 4, 4, 2, 2, padding='SAME', name='g_dc4'))
            return out
예제 #11
0
def VAE(input_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh,
        dropout=False,
        denoising=False,
        convolutional=False,
        variational=False,
        on_cloud=0):
    """(Variational) (Convolutional) (Denoising) Autoencoder.

    Uses tied weights.

    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].
    n_filters : list, optional
        Number of filters for each layer.
        If convolutional=True, this refers to the total number of output
        filters to create for each layer, with each layer's number of output
        filters as a list.
        If convolutional=False, then this refers to the total number of neurons
        for each layer in a fully connected network.
    filter_sizes : list, optional
        Only applied when convolutional=True.  This refers to the ksize (height
        and width) of each convolutional layer.
    n_hidden : int, optional
        Only applied when variational=True.  This refers to the first fully
        connected layer prior to the variational embedding, directly after
        the encoding.  After the variational embedding, another fully connected
        layer is created with the same size prior to decoding.  Set to 0 to
        not use an additional hidden layer.
    n_code : int, optional
        Only applied when variational=True.  This refers to the number of
        latent Gaussians to sample for creating the inner most encoding.
    activation : function, optional
        Activation function to apply to each layer, e.g. tf.nn.relu
    dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.
    denoising : bool, optional
        Whether or not to apply denoising.  If using denoising, you must feed a
        value for 'corrupt_prob', as returned in the dictionary.  1.0 means no
        corruption is used.  0.0 means every feature is corrupted.  Sensible
        values are between 0.5-0.8.
    convolutional : bool, optional
        Whether or not to use a convolutional network or else a fully connected
        network will be created.  This effects the n_filters parameter's
        meaning.
    variational : bool, optional
        Whether or not to create a variational embedding layer.  This will
        create a fully connected layer after the encoding, if `n_hidden` is
        greater than 0, then will create a multivariate gaussian sampling
        layer, then another fully connected layer.  The size of the fully
        connected layers are determined by `n_hidden`, and the size of the
        sampling layer is determined by `n_code`.

    Returns
    -------
    model : dict
        {
            'cost': Tensor to optimize.
            'Ws': All weights of the encoder.
            'x': Input Placeholder
            'z': Inner most encoding Tensor (latent features)
            'y': Reconstruction of the Decoder
            'keep_prob': Amount to keep when using Dropout
            'corrupt_prob': Amount to corrupt when using Denoising
            'train': Set to True when training/Applies to Batch Normalization.
        }
    """
    # network input / placeholders for train (bn) and dropout
    x = tf.placeholder(tf.float32, input_shape, 'x')
    phase_train = tf.placeholder(tf.bool, name='phase_train')
    keep_prob = tf.placeholder(tf.float32, name='keep_prob')
    corrupt_prob = tf.placeholder(tf.float32, [1])

    # apply noise if denoising
    x_ = (utils.corrupt(x) * corrupt_prob + x *
          (1 - corrupt_prob)) if denoising else x

    # 2d -> 4d if convolution
    x_tensor = utils.to_tensor(x_) if convolutional else x_
    current_input = x_tensor

    Ws = []
    shapes = []

    # Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope('encoder/{}'.format(layer_i)):
            shapes.append(current_input.get_shape().as_list())
            if convolutional:
                h, W = utils.conv2d(x=current_input,
                                    n_output=n_output,
                                    k_h=filter_sizes[layer_i],
                                    k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            Ws.append(W)
            current_input = h

    shapes.append(current_input.get_shape().as_list())

    with tf.variable_scope('variational'):
        if variational:
            dims = current_input.get_shape().as_list()
            flattened = utils.flatten(current_input)

            if n_hidden:
                h = utils.linear(flattened, n_hidden, name='W_fc')[0]
                h = activation(batch_norm(h, phase_train, 'fc/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = flattened

            z_mu = utils.linear(h, n_code, name='mu')[0]
            z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]

            # Sample from noise distribution p(eps) ~ N(0, 1)
            epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code]))

            # Sample from posterior
            z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

            if n_hidden:
                h = utils.linear(z, n_hidden, name='fc_t')[0]
                h = activation(batch_norm(h, phase_train, 'fc_t/bn'))
                if dropout:
                    h = tf.nn.dropout(h, keep_prob)
            else:
                h = z

            size = dims[1] * dims[2] * dims[3] if convolutional else dims[1]
            h = utils.linear(h, size, name='fc_t2')[0]
            current_input = activation(batch_norm(h, phase_train, 'fc_t2/bn'))
            if dropout:
                current_input = tf.nn.dropout(current_input, keep_prob)

            if convolutional:
                current_input = tf.reshape(
                    current_input,
                    tf.stack([
                        tf.shape(current_input)[0], dims[1], dims[2], dims[3]
                    ]))
        else:
            z = current_input

    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    # %%
    # Decoding layers
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i)):
            shape = shapes[layer_i + 1]
            if convolutional:
                h, W = utils.deconv2d(x=current_input,
                                      n_output_h=shape[1],
                                      n_output_w=shape[2],
                                      n_output_ch=shape[3],
                                      n_input_ch=shapes[layer_i][3],
                                      k_h=filter_sizes[layer_i],
                                      k_w=filter_sizes[layer_i])
            else:
                h, W = utils.linear(x=current_input, n_output=n_output)
            h = activation(batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))
            if dropout:
                h = tf.nn.dropout(h, keep_prob)
            current_input = h

    y = current_input
    x_flat = utils.flatten(x)
    y_flat = utils.flatten(y)

    # l2 loss
    loss_x = tf.reduce_sum(tf.squared_difference(x_flat, y_flat), 1)

    if variational:
        # variational lower bound, kl-divergence
        loss_z = -0.5 * tf.reduce_sum(
            1.0 + 2.0 * z_log_sigma - tf.square(z_mu) -
            tf.exp(2.0 * z_log_sigma), 1)

        # add l2 loss
        cost = tf.reduce_mean(loss_x + loss_z)
    else:
        # just optimize l2 loss
        cost = tf.reduce_mean(loss_x)

    return {
        'cost': cost,
        'Ws': Ws,
        'x': x,
        'z': z,
        'y': y,
        'keep_prob': keep_prob,
        'corrupt_prob': corrupt_prob,
        'train': phase_train
    }
예제 #12
0
def VAE(input_shape=[None, 784],
        n_filters=[64, 64, 64],
        filter_sizes=[4, 4, 4],
        encoderNum=0,
        n_hidden=32,
        n_code=2,
        activation=tf.nn.tanh):
    ''' 
    Parameters
    ----------
    input_shape : list, optional
        Shape of the input to the network. e.g. for MNIST: [None, 784].

    n_filters : list, optional
        Number of filters for each layer.
        This refers to the total number of output filters to create for each layer, with each layer's number of output filters as a list.

    filter_sizes : list, optional
        This refers to the ksize (height and width) of each convolutional layer.

     n_hidden : int, optional
         variational.  This refers to the first fully connected layer prior to the variational embedding, directly after the encoding.  After the variational embedding, another fully connected layer is created with the same size prior to decoding.

     n_code : int, optional
        variational.  This refers to the number of latent Gaussians to sample for creating the inner most encoding.

     activation : function, optional
         Activation function to apply to each layer, e.g. tf.nn.relu

     dropout : bool, optional
        Whether or not to apply dropout.  If using dropout, you must feed a
        value for 'keep_prob', as returned in the dictionary.  1.0 means no
        dropout is used.  0.0 means every connection is dropped.  Sensible
        values are between 0.5-0.8.

    Returns
    -------
    model:dict
    {
        'cost': Tensor to optimize.
        'Ws': All weights of the encoder.
        'x': Input Placeholder
        'z': Inner most encoding Tensor(latent features)
        'y': target image placeholder
        'keep_prob': Amount to keep when using Dropout
        'train': Set to True when training/Applies to Batch Normalization
    }
   '''

    #network input placeholders
    x = tf.placeholder(tf.float32, input_shape, 'x' + str(encoderNum))
    y = tf.placeholder(tf.float32, input_shape, 'y' + str(encoderNum))

    phase_train = tf.placeholder(tf.bool, name='phase_train' + str(encoderNum))
    keep_prob = tf.placeholder(tf.float32, name='keep_prob' + str(encoderNum))

    x_tensor = x
    current_input = x_tensor

    #lists to hold the weights and shapes of each layer of the encoder
    Ws = []
    shapes = []

    #Build the encoder
    for layer_i, n_output in enumerate(n_filters):
        with tf.variable_scope(str(layer_i) + str(encoderNum)):
            shapes.append(current_input.get_shape().as_list())

            #produce weights and values through convolution
            h, W = utils.conv2d(x=current_input,
                                n_output=n_output,
                                k_h=filter_sizes[layer_i],
                                k_w=filter_sizes[layer_i],
                                name='conv2d' + str(layer_i) + str(encoderNum))

            #pass normalised batch through the activation function
            h = activation(
                batch_norm.Batch_norm(h, phase_train,
                                      'bn' + str(layer_i) + str(encoderNum)))

            #for dropout
            h = tf.nn.dropout(h, keep_prob)

            #add the weights to the weights list
            Ws.append(W)
            #input for next layer is output for this layer
            current_input = h

    shapes.append(current_input.get_shape().as_list())

    #variational section
    with tf.variable_scope('variational' + str(encoderNum)):

        dims = current_input.get_shape().as_list()

        if len(dims) == 4:
            flattened = tf.reshape(current_input,
                                   shape=[-1, dims[1] * dims[2] * dims[3]])
        elif len(dims) == 2 or len(dims) == 1:
            flattened = current_input

        #linear fully connected layer at the centre of the encoder
        h = utils.linear(flattened, n_hidden, name='W_fc')[0]
        h = activation(batch_norm.Batch_norm(h, phase_train, 'fc/bn'))
        h = tf.nn.dropout(h, keep_prob)

        z_mu = utils.linear(h, n_code, name='mu')[0]
        z_log_sigma = 0.5 * utils.linear(h, n_code, name='log_sigma')[0]

        #Sample from noise distribution
        epsilon = tf.random_normal(tf.stack([tf.shape(x)[0], n_code]))

        #Sample from posterior
        z = z_mu + tf.multiply(epsilon, tf.exp(z_log_sigma))

        h = utils.linear(z, n_hidden, name='fc_t')[0]
        h = activation(batch_norm.Batch_norm(h, phase_train, 'fc_t/bn'))
        h = tf.nn.dropout(h, keep_prob)

        size = dims[1] * dims[2] * dims[3]
        h = utils.linear(h, size, name='fc_t2')[0]
        current_input = activation(
            batch_norm.Batch_norm(h, phase_train, 'fc_t2/bn'))

        current_input = tf.reshape(
            current_input,
            tf.stack([tf.shape(current_input)[0], dims[1], dims[2], dims[3]]))

    #reverse the shapes filters and weights to undo the encoding
    shapes.reverse()
    n_filters.reverse()
    Ws.reverse()

    n_filters += [input_shape[-1]]

    ###Decoding-------------------
    for layer_i, n_output in enumerate(n_filters[1:]):
        with tf.variable_scope('decoder/{}'.format(layer_i) + str(encoderNum)):
            shape = shapes[layer_i + 1]

            #convolve
            h, W = utils.deconv2d(x=current_input,
                                  n_output_h=shape[1],
                                  n_output_w=shape[2],
                                  n_output_ch=shape[3],
                                  n_input_ch=shapes[layer_i][3],
                                  k_h=filter_sizes[layer_i],
                                  k_w=filter_sizes[layer_i])

            h = activation(
                batch_norm.Batch_norm(h, phase_train, 'dec/bn' + str(layer_i)))

            #for dropout
            h = tf.nn.dropout(h, keep_prob)

            current_input = h

    #the output from the final decoding layer is the output of the graph
    x_output = current_input

    #flatten the target image
    y_flat = utils.flatten(y)

    ##make the model learn an output which when added to the original input
    ##produces the next frame

    #flatten the input image
    x_original_flat = utils.flatten(x)

    #flatten the graph output
    dims1 = x_output.get_shape().as_list()
    if len(dims1) == 4:
        x_output_flat = tf.reshape(x_output,
                                   shape=[-1, dims1[1] * dims1[2] * dims1[3]])
    elif len(dims1) == 2 or len(dims1) == 1:
        x_output_flat = x_output

    #the ultimate output is the graph output added to the original input
    x_output_final = x_original_flat + x_output_flat

    #l2 loss
    #difference between final output and target image
    loss_x = tf.reduce_sum(tf.squared_difference(y_flat, x_output_final), 1)

    #penalizing latent vectors
    loss_z = -0.5 * tf.reduce_sum(
        1.0 + 2.0 * z_log_sigma - tf.square(z_mu) - tf.exp(2.0 * z_log_sigma),
        1)

    #total cost is the of the image loss and the latent loss
    cost = tf.reduce_mean(loss_x + loss_z)

    return {
        'cost': cost,
        'Ws': Ws,
        'x': x,
        'x_output_final': x_output_final,
        'z': z,
        'y': y,
        'keep_prob': keep_prob,
        'train': phase_train
    }
예제 #13
0
    def __init__(self):
        super(Decoder, self).__init__()
        self.factors = {
            'color': 4,
            'shape': 4,
            'size': 2,
            'camera': 3,
            'background': 3,
            'horizontal': 40,
            'vertical': 40
        }

        self.mu = nn.ParameterDict({
            'color':
            nn.Parameter(torch.randn(self.factors['color'], 128)),
            'shape':
            nn.Parameter(torch.randn(self.factors['shape'], 128)),
            'size':
            nn.Parameter(torch.randn(self.factors['size'], 128)),
            'camera':
            nn.Parameter(torch.randn(self.factors['camera'], 128)),
            'background':
            nn.Parameter(torch.randn(self.factors['background'], 128)),
            'horizontal':
            nn.Parameter(torch.randn(self.factors['horizontal'], 128)),
            'vertical':
            nn.Parameter(torch.randn(self.factors['vertical'], 128))
        })
        self.logvar = nn.ParameterDict({
            'color':
            nn.Parameter(torch.zeros(self.factors['color'], 128)),
            'shape':
            nn.Parameter(torch.zeros(self.factors['shape'], 128)),
            'size':
            nn.Parameter(torch.zeros(self.factors['size'], 128)),
            'camera':
            nn.Parameter(torch.zeros(self.factors['camera'], 128)),
            'background':
            nn.Parameter(torch.zeros(self.factors['background'], 128)),
            'horizontal':
            nn.Parameter(torch.zeros(self.factors['horizontal'], 128)),
            'vertical':
            nn.Parameter(torch.zeros(self.factors['vertical'], 128))
        })

        self.input_color = nn.Linear(160, 256)
        self.input_shape = nn.Linear(160, 256)
        self.input_size = nn.Linear(160, 256)
        self.path_col_shp_siz = nn.Sequential(nn.ReLU(), nn.Linear(256, 1024))

        self.input_horizontal = nn.Linear(160, 256)
        self.input_vertical = nn.Linear(160, 256)
        self.path_hor_ver = nn.Sequential(nn.ReLU(), nn.Linear(256, 1024))

        self.input_camera = nn.Linear(160, 1024)
        self.input_background = nn.Linear(160, 1024)

        self.path_shallow = nn.Sequential(
            nn.ReLU(),
            nn.Linear(1024, 1024),
            U.Lambda(lambda x: x.reshape(-1, 16, 8, 8)),  # 16 x 8 x 8
            nn.ReLU(),
            U.deconv2d(16, 64, 1, 1, True, 8))  # 64 x 8 x 8
        self.path_deep = nn.Sequential(
            nn.ReLU(),
            nn.Linear(1024, 1024),
            U.Lambda(lambda x: x.reshape(-1, 64, 4, 4)),  # 64 x 4 x 4
            nn.ReLU(),
            U.deconv2d(64, 64, 4, 1, True, 4),  # 64 x 4 x 4
            nn.ReLU(),
            U.deconv2d(64, 64, 4, 2, True, 8))  # 64 x 8 x 8
        self.path_base = nn.Sequential(
            nn.ReLU(),
            U.deconv2d(64, 16, 4, 2, True, 16),  # 16 x 16 x 16
            nn.ReLU(),
            U.deconv2d(16, 3, 6, 4, True, 64))  # 3 x 64 x 64
예제 #14
0
def create_conv_network(x,
                        channels_x,
                        channels_y,
                        layers=3,
                        feature_base=64,
                        filter_size=5,
                        pool_size=2,
                        keep_prob=0.8,
                        create_summary=True):
    """
    :param x: input_tensor, shape should be [None, n, m, channels_x]
    :param channels_x: number of channels in the input image. For Mri, input has 4 channels.
    :param channels_y: number of channels in the output image. For Mri, output has 2 channels.
    :param layers: number of layers in u-net architecture.
    :param feature_base: Neurons in first layer of cnn. Next layers have twice the number of neurons in previous layers.
    :param filter_size: size of convolution filter
    :param pool_size: size of pooling layer
    :create_summary: Creates Tensorboard summary if True
    """

    logging.info(
        "Layers: {layers}, features: {features}, filter size {fill_size}x{fill_size}, pool size {pool_size}x{pool_size},"
        "input channels {in_channels}, output channels {out_channels}".format(
            layers=layers,
            features=feature_base,
            fill_size=filter_size,
            pool_size=pool_size,
            in_channels=channels_x,
            out_channels=channels_y))

    #placeholder for input image
    with tf.name_scope("input_image"):
        n = tf.shape(x)[1]
        m = tf.shape(x)[2]

        x_image = tf.reshape(x, tf.stack([-1, n, m, channels_x]))
        input_node = x_image

    weights = []
    biases = []
    convs = []
    pools = OrderedDict()
    deconv = OrderedDict()
    dw_h_convs = OrderedDict()
    up_h_convs = OrderedDict()

    # down layers
    for layer in range(layers):
        with tf.name_scope("down_conv_layer{}".format(str(layer))):
            features = (2**layer) * feature_base
            std_dev = np.sqrt(2. / (filter_size * filter_size * features))

            if layer == 0:
                w1 = utils.weight_variable(
                    [filter_size, filter_size, channels_x, features], std_dev,
                    "w1")
            else:
                w1 = utils.weight_variable(
                    [filter_size, filter_size, features // 2, features],
                    std_dev, "w1")

            w2 = utils.weight_variable(
                [filter_size, filter_size, features, features], std_dev, "w2")
            b1 = utils.bias_variable([features], "b1")
            b2 = utils.bias_variable([features], "b2")

            conv_1 = utils.conv2d(input_node, w1, b1, keep_prob)
            conv_2 = utils.conv2d(tf.nn.relu(conv_1), w2, b2, keep_prob)
            dw_h_convs[layer] = tf.nn.relu(conv_2)

            weights.append((w1, w2))
            biases.append((b1, b2))
            convs.append((conv_1, conv_2))

            # do max pooling if not the last layer
            if layer < layers - 1:
                pools[layer] = utils.max_pool(dw_h_convs[layer], pool_size)
                input_node = pools[layer]

    input_node = dw_h_convs[layers - 1]

    #up layers
    for layer in range(layers - 2, -1, -1):
        with tf.name_scope("up_conv_layer{}".format(str(layer))):
            features = (2**(layer + 1)) * feature_base
            std_dev = np.sqrt(2. / (filter_size * filter_size * features))

            wd = utils.weight_variable_devonc(
                [pool_size, pool_size, features // 2, features], std_dev, "wd")
            bd = utils.bias_variable([features // 2], "bd")

            h_deconv = tf.nn.relu(
                utils.deconv2d(input_node, wd, pool_size) + bd)
            h_deconv_concat = tf.concat([dw_h_convs[layer], h_deconv], 3)

            deconv[layer] = h_deconv_concat

            w1 = utils.weight_variable(
                [filter_size, filter_size, features, features // 2], std_dev,
                "w1")
            w2 = utils.weight_variable(
                [filter_size, filter_size, features // 2, features // 2],
                std_dev, "w2")
            b1 = utils.bias_variable([features // 2], "b1")
            b2 = utils.bias_variable([features // 2], "b2")

            conv_1 = utils.conv2d(h_deconv_concat, w1, b1, keep_prob)
            conv_2 = utils.conv2d(tf.nn.relu(conv_1), w2, b2, keep_prob)

            input_node = tf.nn.relu(conv_2)
            up_h_convs[layer] = input_node

            weights.append((w1, w2))
            biases.append((b1, b2))
            convs.append((conv_1, conv_2))

    #Output image
    with tf.name_scope("output_image"):
        weight = utils.weight_variable([1, 1, feature_base, channels_y],
                                       std_dev, "out_weight")
        bias = utils.bias_variable([channels_y], "out_bias")
        output_image = tf.add(
            utils.conv2d(input_node, weight, bias, tf.constant(1.0)), x_image)
        up_h_convs["out"] = output_image

    # Create Summaries
    if create_summary:
        with tf.name_scope("summaries"):
            for i, (c1, c2) in enumerate(convs):
                tf.summary.image("summary_conv_{:02}_01".format(i),
                                 utils.get_image_summary(c1))
                tf.summary.image("summary_conv_{:02}_02".format(i),
                                 utils.get_image_summary(c2))

            for k in pools.keys():
                tf.summary.image("summary_pool_{:02}".format(k),
                                 utils.get_image_summary(pools[k]))

            for k in deconv.keys():
                tf.summary.image("summary_deconv_concat_{:02}".format(k),
                                 utils.get_image_summary(deconv[k]))

            for k in dw_h_convs.keys():
                tf.summary.histogram(
                    "dw_convolution_{:02}/activations".format(k),
                    dw_h_convs[k])

            for k in up_h_convs.keys():
                tf.summary.histogram("up_convolution_{}/activations".format(k),
                                     up_h_convs[k])

    variables = []
    for w1, w2 in weights:
        variables.append(w1)
        variables.append(w2)

    for b1, b2 in biases:
        variables.append(b1)
        variables.append(b2)

    return output_image, variables
예제 #15
0
    def _build_fcn(self, input_op, reuse=False, is_training=True):
        row, col = self.input_shape[0], self.input_shape[1]
        row_p1, col_p1 = int(row / 2), int(col / 2)
        row_p2, col_p2 = int(row_p1 / 2), int(col_p1 / 2)

        with tf.variable_scope('FCNN', reuse=reuse):
            conv1_1 = conv2d_relu(input_op,
                                  n_out=64,
                                  name='conv1_1',
                                  is_training=is_training)
            conv1_2 = conv2d_relu(conv1_1,
                                  n_out=64,
                                  name='conv1_2',
                                  is_training=is_training)
            pool_1 = pooling(conv1_2, name='pool_1')

            conv2_1 = conv2d_relu(pool_1,
                                  n_out=128,
                                  name='conv2_1',
                                  is_training=is_training)
            conv2_2 = conv2d_relu(conv2_1,
                                  n_out=128,
                                  name='conv2_2',
                                  is_training=is_training)
            pool_2 = pooling(conv2_2, name='pool_2')

            conv3_1 = dilated_block(pool_2,
                                    n_out=256,
                                    is_training=is_training,
                                    name='conv3_1')
            conv3_2 = dilated_block(conv3_1,
                                    n_out=256,
                                    is_training=is_training,
                                    name='conv3_2')
            conv3_3 = dilated_block(conv3_2,
                                    n_out=256,
                                    is_training=is_training,
                                    name='conv3_3')
            pool_3 = pooling(conv3_3, name='pool_3')

            conv4_1 = dilated_block(pool_3,
                                    n_out=512,
                                    is_training=is_training,
                                    name='conv4_1')
            conv4_2 = dilated_block(conv4_1,
                                    n_out=512,
                                    is_training=is_training,
                                    name='conv4_2')
            conv4_3 = dilated_block(conv4_2,
                                    n_out=512,
                                    is_training=is_training,
                                    name='conv4_3')
            deconv_1 = deconv2d(
                conv4_3,
                output_shape=[self.batch_size, row_p2, col_p2, 256],
                name='deconv_1')

            concat_1 = tf.concat([conv3_3, deconv_1], axis=3, name='concat_1')
            conv5_1 = dilated_block(concat_1,
                                    n_out=256,
                                    is_training=is_training,
                                    name='conv5_1')
            conv5_2 = dilated_block(conv5_1,
                                    n_out=256,
                                    is_training=is_training,
                                    name='conv5_2')
            conv5_3 = dilated_block(conv5_2,
                                    n_out=256,
                                    is_training=is_training,
                                    name='conv5_3')
            deconv_2 = deconv2d(
                conv5_3,
                output_shape=[self.batch_size, row_p1, col_p1, 128],
                name='deconv_2')

            concat_2 = tf.concat([conv2_2, deconv_2], axis=3, name='concat_2')
            conv6_1 = conv2d_relu(concat_2,
                                  n_out=151,
                                  name='conv6_1',
                                  is_training=is_training)
            conv6_2 = conv2d_relu(conv6_1,
                                  n_out=151,
                                  name='conv6_2',
                                  is_training=is_training)
            deconv_3 = deconv2d(conv6_2,
                                output_shape=[self.batch_size, row, col, 64],
                                name='deconv_3')

            concat_3 = tf.concat([conv1_2, deconv_3], axis=3, name='concat_3')
            conv7_1 = conv2d_relu(concat_3,
                                  n_out=151,
                                  name='conv7_1',
                                  is_training=is_training)
            conv7_2 = conv2d(conv7_1, n_out=151, name='conv7_2')
            return tf.nn.softmax(conv7_2, axis=3), conv7_2