Exemplo n.º 1
0
    def outputOp(self, memory, vecQuestions, images, imageInDim):
        with tf.variable_scope("outputUnit"):
            features = memory
            dim = config.memDim

            if config.outQuestion:
                eVecQuestions = ops.linear(vecQuestions,
                                           config.ctrlDim,
                                           config.memDim,
                                           name="outQuestion")
                features, dim = ops.concat(features,
                                           eVecQuestions,
                                           config.memDim,
                                           mul=config.outQuestionMul)

            if config.outImage:
                images, imagesDim = ops.linearizeFeatures(
                    images,
                    self.H,
                    self.W,
                    self.imageInDim,
                    outputDim=config.outImageDim)
                images = ops.linear(images,
                                    config.memDim,
                                    config.outImageDim,
                                    name="outImage")
                features = tf.concat([features, images], axis=-1)
                dim += config.outImageDim

        return features, dim
def decoder_block(img, skip, is_t):
    with tf.variable_scope('gen_upsample'):
        with slim.arg_scope([slim.separable_conv2d], depth_multiplier=1):
            shape = tf.shape(img)
            h, w = shape[1], shape[2]
            #block0 output_size=16
            im = tf.image.resize_bilinear(img, [h * 2, w * 2],
                                          name='upsample_16')
            im = slim.separable_conv2d(im, 512, [3, 3], scope='conv_sp_512')
            im = bn(im, is_t=is_t, name='bn_sp_512')
            im = concat(im, skip[4], name='cat_512')
            #block1 output_size=32
            im = conv2d(im, output_dim=256, name='conv_256')
            im = tf.image.resize_bilinear(im, [h * 4, w * 4],
                                          name='upsample_32')
            im = slim.separable_conv2d(im, 256, [3, 3], scope='conv_sp_256')
            im = bn(im, is_t=is_t, name='bn_sp_256')
            im = concat(im, skip[3], name='cat_256')
            #block2 output_size=64
            im = conv2d(im, output_dim=128, name='conv_128')
            im = tf.image.resize_bilinear(im, [h * 8, w * 8],
                                          name='upsample_64')
            im = slim.separable_conv2d(im, 128, [3, 3], scope='conv_sp_128')
            im = bn(im, is_t=is_t, name='bn_sp_128')
            im = concat(im, skip[2], name='cat_128')
            #block3 output_size=128
            im = conv2d(im, output_dim=64, name='conv_64')
            im = tf.image.resize_bilinear(im, [h * 16, w * 16],
                                          name='upsample_32')
            im = slim.separable_conv2d(im, 64, [3, 3], scope='conv_sp_64')
            im = bn(im, is_t=is_t, name='bn_sp_64')
            im = concat(im, skip[1], name='cat_64')
            #block2 output_size=256
            im = conv2d(im, output_dim=32, name='conv_32')
            im = tf.image.resize_bilinear(im, [h * 32, w * 32],
                                          name='upsample_128')
            im = slim.separable_conv2d(im, 32, [3, 3], scope='conv_sp_32')
            im = bn(im, is_t=is_t, name='bn_sp_32')
            im = concat(im, skip[0], name='cat_32')
            #output
            im = conv2d(im, output_dim=16, name='output_16')
            im = tf.nn.relu(bn(im, is_t=is_t, name='output_bn_16'))
            im = conv2d(im, output_dim=3, k_h=1, k_w=1, name='output_3')

            return im
Exemplo n.º 3
0
    def discriminator(self, image, y=None, reuse=False):
        with tf.variable_scope('discriminator') as scope:
            if reuse:
                scope.reuse_variables()

            if not self.y_dim:
                h0 = ops.lrelu(ops.conv2d(image, self.df_dim,
                                          name='d_h0_conv'))
                h1 = ops.lrelu(
                    self.d_bn1(
                        ops.conv2d(h0, self.df_dim * 2, name='d_h1_conv')))
                h2 = ops.lrelu(
                    self.d_bn2(
                        ops.conv2d(h1, self.df_dim * 4, name='d_h2_conv')))
                h3 = ops.lrelu(
                    self.d_bn3(
                        ops.conv2d(h2, self.df_dim * 8, name='d_h3_conv')))
                h4 = ops.linear(tf.reshape(h3, [self.batch_size, -1]), 1,
                                'd_h4_lin')
                return tf.nn.sigmoid(h4), h4
            else:
                yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
                x = ops.conv_cond_concat(image, yb)

                h0 = ops.lrelu(
                    ops.conv2d(x, self.c_dim + self.y_dim, name='d_h0_conv'))
                h0 = ops.conv_cond_concat(h0, yb)

                h1 = ops.lrelu(
                    self.d_bn1(
                        ops.conv2d(h0,
                                   self.df_dim + self.y_dim,
                                   name='d_h1_conv')))
                h1 = tf.reshape(h1, [self.batch_size, -1])
                h1 = ops.concat([h1, y], 1)

                h2 = ops.lrelu(
                    self.d_bn2(ops.linear(h1, self.dfc_dim, 'd_h2_lin')))
                h2 = ops.concat([h2, y], 1)

                h3 = ops.linear(h2, 1, 'd_h3_lin')

                return tf.nn.sigmoid(h3), h3
Exemplo n.º 4
0
    def _discriminator(x, y, reuse_vars=False):
        with tf.variable_scope(params.dis_scope, reuse=reuse_vars):
            h0 = ops.concat(x, y)

            h1_pure = ops.convolution(h0,
                                      params.dis_filters_size,
                                      params.dis_filters,
                                      name='h1')
            h1 = h1_pure
            if params.use_batch_norm:
                h1 = ops.batch_norm(h1, name='bn1')
            h1 = ops.lrelu(h1)
            h1 = ops.concat(h1, y)

            h2 = ops.convolution(h1,
                                 params.dis_filters_size,
                                 params.dis_filters * 2,
                                 name='h2')
            if params.use_batch_norm:
                h2 = ops.batch_norm(h2, name='bn2')
            h2 = ops.lrelu(h2)
            h2 = ops.concat(h2, y)

            h3 = ops.convolution(h2,
                                 params.dis_filters_size,
                                 params.dis_filters * 4,
                                 name='h3')
            if params.use_batch_norm:
                h3 = ops.batch_norm(h3, name='bn3')
            h3 = ops.lrelu(h3)
            h3 = ops.concat(h3, y)

            h4 = tf.reshape(h3, [params.batch_size, -1])
            h4 = ops.fully_connected(h4, 1, 'h4')
            return h4, {
                'h0': h0,
                'h1': h1,
                'h1_pure': h1_pure,
                'h2': h2,
                'h3': h3,
                'h4': h4
            }
Exemplo n.º 5
0
    def stn_to_pixel_coords(stn_coords, img_size):

        if not isinstance(stn_coords, tf.Tensor):
            stn_coords = np.asarray(stn_coords)

        sx, sy, tx, ty = ops.split(stn_coords, 4, axis=-1)
        y, h = SpatialTransformer.stn_to_pixel_coord(sy, ty, img_size[0])
        x, w = SpatialTransformer.stn_to_pixel_coord(sx, tx, img_size[1])

        coords = ops.concat((y, x, h, w), -1)
        return coords
Exemplo n.º 6
0
    def discriminator(self, image, y=None, reuse=False):
        """Defines the D network structure.
        """
        with tf.variable_scope("discriminator") as scope:
            if reuse:
                scope.reuse_variables()

            if not self.y_dim:
                h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv'))
                h1 = lrelu(
                    self.d_bn1(conv2d(h0, self.df_dim * 2, name='d_h1_conv')))
                h2 = lrelu(
                    self.d_bn2(conv2d(h1, self.df_dim * 4, name='d_h2_conv')))
                h3 = lrelu(
                    self.d_bn3(conv2d(h2, self.df_dim * 8, name='d_h3_conv')))
                h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1,
                            'd_h4_lin')

                return tf.nn.sigmoid(h4), h4
            else:
                yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
                x = conv_cond_concat(image, yb)

                h0 = lrelu(conv2d(x, self.c_dim + self.y_dim,
                                  name='d_h0_conv'))
                h0 = conv_cond_concat(h0, yb)

                h1 = lrelu(
                    self.d_bn1(
                        conv2d(h0, self.df_dim + self.y_dim,
                               name='d_h1_conv')))
                h1 = tf.reshape(h1, [self.batch_size, -1])
                h1 = concat([h1, y], 1)

                h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin')))
                h2 = concat([h2, y], 1)

                h3 = linear(h2, 1, 'd_h3_lin')

                return tf.nn.sigmoid(h3), h3
Exemplo n.º 7
0
def gaussian_blur(image, kernel, kernel_size, cdim=3):
    # kernel as placeholder variable, so it can change
    outputs = []
    pad_w = (kernel_size - 1) // 2
    padded = tf.pad(image, [[0, 0], [pad_w, pad_w], [pad_w, pad_w], [0, 0]],
                    mode='REFLECT')
    for channel_idx in range(cdim):
        data_c = padded[:, :, :, channel_idx:(channel_idx + 1)]
        g = tf.reshape(kernel, [1, kernel_size, 1, 1])
        data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID')
        g = tf.reshape(kernel, [kernel_size, 1, 1, 1])
        data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID')
        outputs.append(data_c)
    return concat(outputs, axis=3)
Exemplo n.º 8
0
    def pixel_to_stn_coords(yxhw, img_size):

        img_size = np.asarray(img_size).astype(np.float32)
        if not isinstance(yxhw, tf.Tensor):
            yxhw = np.asarray(yxhw).astype(np.float32)

        while len(img_size.shape) < len(yxhw.shape):
            img_size = img_size[np.newaxis, ...]

        scale = yxhw[..., 2:] / (img_size + 1)
        shift = 2 * yxhw[..., :2] / (img_size - 1.) + scale - 1.

        sy, sx = ops.split(scale, 2, -1)
        ty, tx = ops.split(shift, 2, -1)
        stn_coords = ops.concat((sx, sy, tx, ty), -1)
        return stn_coords
Exemplo n.º 9
0
def gaussian_blur_adaptive(image, sigma, eps=0.01, img_width=32, cdim=3):
    if sigma == 0:
        return image
    outputs = []
    kernel = gauss_kernel(sigma, eps, img_width - 1)
    pad_w = (kernel.shape[0] - 1) // 2
    padded = tf.pad(image, [[0, 0], [pad_w, pad_w], [pad_w, pad_w], [0, 0]],
                    mode='REFLECT')
    for channel_idx in range(cdim):
        data_c = padded[:, :, :, channel_idx:(channel_idx + 1)]
        g = np.expand_dims(kernel, 0)
        g = np.expand_dims(g, axis=2)
        g = np.expand_dims(g, axis=3)
        data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID')
        g = np.expand_dims(kernel, 1)
        g = np.expand_dims(g, axis=2)
        g = np.expand_dims(g, axis=3)
        data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID')
        outputs.append(data_c)
    return concat(outputs, axis=3)
Exemplo n.º 10
0
    def forward(self, word_indices: list[Tensor]) -> Tensor:
        """Executes the forward pass of a FeedForwardLanguageModel.

        Args:
            word_indices: list of [batch_size] tensors
                length = number of previous characters / n-gram length
                each one contains indices of chars at that position

        Returns:
            [batch_size, vocab_size] Tensor
            containing logits (not full probabilities, i.e. pre-softmax)
            over the vocab for each example in the batch
        """
        # TODO: (~7 lines) implement the forward pass of FFNN LM here
        # HINT: use ops.concat to concatenate word embeddings together
        # It takes a variable-length list of Tensors as its input, so you can
        # call it using as ops.concat(*embeddings), where embeddings is a list
        # of Tensors, corresponding to the relevant embeddings
        # [batch_size, num_words * embedding_size]
        embs = ops.concat(*[self.embedding(index) for index in word_indices])
        return self.output(ops.tanh(self.fc(embs)))
Exemplo n.º 11
0
    def generator(self, z, y=None):
        with tf.variable_scope("generator"):
            if self.y_dim is None:
                s_h, s_w = self.output_height, self.output_width
                s_h2, s_w2 = (conv_out_size_same(s_h, 2),
                              conv_out_size_same(s_w, 2))
                s_h4, s_w4 = (conv_out_size_same(s_h2, 2),
                              conv_out_size_same(s_w2, 2))
                s_h8, s_w8 = (conv_out_size_same(s_h4, 2),
                              conv_out_size_same(s_w4, 2))
                s_h16, s_w16 = (conv_out_size_same(s_h8, 2),
                                conv_out_size_same(s_w8, 2))

                # project `z` and reshape
                self.z_, self.h0_w, self.h0_b = linear(
                    z, self.gf_dim*8*s_h16*s_w16, 'g_h0_lin', with_w=True)

                self.h0 = tf.reshape(
                    self.z_, [-1, s_h16, s_w16, self.gf_dim * 8])
                h0 = tf.nn.relu(self.g_bn0(self.h0))

                self.h1, self.h1_w, self.h1_b = deconv2d(
                    h0, [self.batch_size, s_h8, s_w8, self.gf_dim*4],
                    name='g_h1', with_w=True)
                h1 = tf.nn.relu(self.g_bn1(self.h1))

                h2, self.h2_w, self.h2_b = deconv2d(
                    h1, [self.batch_size, s_h4, s_w4, self.gf_dim*2],
                    name='g_h2', with_w=True)
                h2 = tf.nn.relu(self.g_bn2(h2))

                h3, self.h3_w, self.h3_b = deconv2d(
                    h2, [self.batch_size, s_h2, s_w2, self.gf_dim*1],
                    name='g_h3', with_w=True)
                h3 = tf.nn.relu(self.g_bn3(h3))

                h4, self.h4_w, self.h4_b = deconv2d(
                    h3, [self.batch_size, s_h, s_w, self.c_dim],
                    name='g_h4', with_w=True)

                return tf.nn.tanh(h4)

            else:
                s_h, s_w = self.output_height, self.output_width
                s_h2, s_h4 = s_h // 2, s_h // 4
                s_w2, s_w4 = s_w // 2, s_w // 4

                # yb = tf.expand_dims(tf.expand_dims(y, 1),2)
                yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
                z = concat([z, y], 1)

                h0 = tf.nn.relu(
                    self.g_bn0(linear(z, self.gfc_dim, 'g_h0_lin')))
                h0 = concat([h0, y], 1)

                h1 = tf.nn.relu(self.g_bn1(
                    linear(h0, self.gf_dim * 2 * s_h4 * s_w4, 'g_h1_lin')))
                h1 = tf.reshape(
                    h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2])

                h1 = conv_cond_concat(h1, yb)

                h2 = tf.nn.relu(
                    self.g_bn2(
                        deconv2d(
                            h1,
                            [self.batch_size, s_h2, s_w2, self.gf_dim * 2],
                            name='g_h2'
                        )
                    )
                )
                h2 = conv_cond_concat(h2, yb)

                return tf.nn.sigmoid(
                    deconv2d(
                        h2,
                        [self.batch_size, s_h, s_w, self.c_dim],
                        name='g_h3'
                    )
                )
Exemplo n.º 12
0
    def sampler(self, z, y=None):
        with tf.variable_scope("generator") as scope:
            scope.reuse_variables()

            if self.y_dim is None:
                s_h, s_w = self.output_height, self.output_width
                s_h2, s_w2 = (conv_out_size_same(s_h, 2),
                              conv_out_size_same(s_w, 2))
                s_h4, s_w4 = (conv_out_size_same(s_h2, 2),
                              conv_out_size_same(s_w2, 2))
                s_h8, s_w8 = (conv_out_size_same(s_h4, 2),
                              conv_out_size_same(s_w4, 2))
                s_h16, s_w16 = (conv_out_size_same(s_h8, 2),
                                conv_out_size_same(s_w8, 2))

                # project `z` and reshape
                h0 = tf.reshape(
                    linear(z, self.gf_dim * 8 * s_h16 * s_w16, 'g_h0_lin'),
                    [-1, s_h16, s_w16, self.gf_dim * 8])
                h0 = tf.nn.relu(self.g_bn0(h0, train=False))

                h1 = deconv2d(
                    h0, [self.batch_size, s_h8, s_w8, self.gf_dim * 4],
                    name='g_h1')
                h1 = tf.nn.relu(self.g_bn1(h1, train=False))

                h2 = deconv2d(
                    h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2],
                    name='g_h2')
                h2 = tf.nn.relu(self.g_bn2(h2, train=False))

                h3 = deconv2d(
                    h2, [self.batch_size, s_h2, s_w2, self.gf_dim * 1],
                    name='g_h3')
                h3 = tf.nn.relu(self.g_bn3(h3, train=False))

                h4 = deconv2d(
                    h3, [self.batch_size, s_h, s_w, self.c_dim],
                    name='g_h4')

                return tf.nn.tanh(h4)

            else:
                s_h, s_w = self.output_height, self.output_width
                s_h2, s_h4 = s_h // 2, s_h // 4
                s_w2, s_w4 = s_w // 2, s_w // 4

                # yb = tf.reshape(y, [-1, 1, 1, self.y_dim])
                yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim])
                z = concat([z, y], 1)

                h0 = tf.nn.relu(
                    self.g_bn0(
                        linear(z, self.gfc_dim, 'g_h0_lin'), train=False
                    )
                )
                h0 = concat([h0, y], 1)

                h1 = tf.nn.relu(
                    self.g_bn1(
                        linear(h0, self.gf_dim * 2 * s_h4 * s_w4, 'g_h1_lin'),
                        train=False
                    )
                )
                h1 = tf.reshape(
                    h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2])
                h1 = conv_cond_concat(h1, yb)

                h2 = tf.nn.relu(
                    self.g_bn2(
                        deconv2d(
                            h1,
                            [self.batch_size, s_h2, s_w2, self.gf_dim * 2],
                            name='g_h2'
                        ),
                        train=False
                    )
                )
                h2 = conv_cond_concat(h2, yb)

                return tf.nn.sigmoid(
                    deconv2d(h2, [self.batch_size, s_h, s_w, self.c_dim],
                             name='g_h3')
                )
Exemplo n.º 13
0
    def generator(self, z, g_inputs, y=None, sampler=False):
        with tf.variable_scope("generator") as scope:
            if sampler:
                scope.reuse_variables()
            do_train = not sampler

            bs = self.args.batch_size

            conv_out_size_same = lambda h, w, stride: [
                int(math.ceil(s / stride)) for s in [h, w]
            ]

            s_h, s_w = self.args.output_height, self.args.output_width
            s_h2, s_w2 = conv_out_size_same(s_h, s_w, 2)
            s_h4, s_w4 = conv_out_size_same(s_h2, s_w2, 2)
            s_h8, s_w8 = conv_out_size_same(s_h4, s_w4, 2)
            s_h16, s_w16 = conv_out_size_same(s_h8, s_w8, 2)

            # *** First layers: g_inputs => g_flat *** #
            gi = ops.lrelu(
                ops.conv2d(g_inputs, self.args.df_dim, name='g_gi0_conv'))
            for idx in range(1, 4):
                conv = ops.conv2d(gi,
                                  self.args.df_dim * (2**idx),
                                  name="g_gi" + str(idx) + "_conv")
                gi = ops.lrelu(
                    ops.bn_layer(conv,
                                 train=do_train,
                                 name="gi" + str(idx) + "_bn"))
            gi_flat = ops.linear(tf.reshape(gi, [bs, -1]),
                                 self.args.g_feature_dim, 'g_gi4_lin')

            # *** Map gi_flat to [-1,1] to be more similar to z: *** #
            gi_flat = tf.nn.tanh(gi_flat)

            # *** Layers from flat (z and gi_flat) to full size: *** #
            z0 = ops.concat([gi_flat, z], -1)

            gd0 = ops.linear(z0, self.args.gf_dim * 8 * s_h16 * s_w16,
                             'g_h0_lin')
            gd0 = tf.reshape(gd0, [bs, s_h16, s_w16, self.args.gf_dim * 8])
            gd0 = tf.nn.relu(ops.bn_layer(gd0, train=do_train, name="g_bn0"))

            gd = gd0
            s = [None, s_h8, s_h4, s_h2, s_h]
            m = [None, 4, 2, 2, 2]
            for idx in range(1, 5):
                deconv = ops.deconv2d(
                    gd, [bs, s[idx], s[idx], self.args.gf_dim * m[idx]],
                    name="g_h" + str(idx))
                gd = tf.nn.relu(
                    ops.bn_layer(deconv,
                                 train=do_train,
                                 name="g_bn" + str(idx)))
            gd4 = ops.concat([gd, g_inputs], -1)

            # *** 2 Layers to merge gd and g_inputs: *** #
            gd5 = ops.deconv2d(gd4, [bs, s_h, s_w, self.args.gf_dim],
                               k_h=1,
                               k_w=1,
                               d_h=1,
                               d_w=1,
                               name='g_h5')
            gd5 = tf.nn.relu(gd5)
            gd6 = ops.deconv2d(gd5, [bs, s_h, s_w, self.args.c_dim],
                               k_h=1,
                               k_w=1,
                               d_h=1,
                               d_w=1,
                               name='g_h6')

            return tf.nn.sigmoid(gd6)
Exemplo n.º 14
0
    def _generator(z, zy):
        with tf.variable_scope(params.gen_scope):
            imh, imw = params.dataset.image_size, params.dataset.image_size

            hidden_layers_num = 3
            imdiv = 2**hidden_layers_num

            h0 = tf.concat([z, zy], axis=1)

            h1 = ops.fully_connected(h0, (imh // imdiv) * (imw // imdiv) *
                                     params.gen_filters * 4, 'h1')
            if params.use_batch_norm:
                h1 = ops.batch_norm(h1, name='bn1')
            h1 = tf.reshape(
                h1, [-1, imh // imdiv, imw // imdiv, params.gen_filters * 4])
            h1 = ops.lrelu(h1)
            h1 = ops.dropout(h1,
                             training=training,
                             keep=params.gen_keep_dropout,
                             name='dropout1')
            h1 = ops.concat(h1, zy)

            h2 = ops.deconvolution(h1,
                                   params.gen_filters_size,
                                   params.gen_filters * 2,
                                   name='h2')
            if params.use_batch_norm:
                h2 = ops.batch_norm(h2, name='bn2')
            h2 = ops.lrelu(h2)
            h2 = ops.dropout(h2,
                             training=training,
                             keep=params.gen_keep_dropout,
                             name='dropout2')
            h2 = ops.concat(h2, zy)

            h3_pure = ops.deconvolution(h2,
                                        params.gen_filters_size,
                                        params.gen_filters,
                                        name='h3')
            h3 = h3_pure
            if params.use_batch_norm:
                h3 = ops.batch_norm(h3, name='bn3')
            h3 = ops.lrelu(h3)
            h3 = ops.dropout(h3,
                             training=training,
                             keep=params.gen_keep_dropout,
                             name='dropout3')
            h3 = ops.concat(h3, zy)

            h4 = ops.deconvolution(h3,
                                   params.gen_filters_size,
                                   params.dataset.channels_size,
                                   name='h4')
            return tf.nn.tanh(h4), {
                'h0': h0,
                'h1': h1,
                'h2': h2,
                'h3': h3,
                'h3_pure': h3_pure,
                'h4': h4
            }
Exemplo n.º 15
0
    def zero_state(self, batchSize, dtype=tf.float32):
        ## initialize data-structures
        self.attentions = {"kb": [], "question": [], "self": [], "gate": []}
        self.autoEncLosses = {
            "control": tf.constant(0.0),
            "memory": tf.constant(0.0)
        }

        ## initialize state
        initialControl = self.initState("initCtrl", config.ctrlDim,
                                        config.initCtrl, batchSize)
        if self.memSameSizeWithKB:
            initialMemory = self.initmemState("initMem", (100, config.memDim),
                                              config.initMem, batchSize)
        else:
            initialMemory = self.initState("initMem", config.memDim,
                                           config.initMem, batchSize)

        self.controls = tf.expand_dims(initialControl, axis=1)
        self.memories = tf.expand_dims(initialMemory, axis=1)
        self.infos = tf.expand_dims(initialMemory, axis=1)

        self.contControl = initialControl

        ## initialize knowledge base
        # optionally merge question into knowledge base representation
        if config.initKBwithQ != "NON":
            if config.imageEnsembleFeatures:
                self.knowledgeBase = ops.linear(self.knowledgeBase,
                                                self.kbDim,
                                                config.memDim,
                                                name="initKB")
            elif config.imageObjects:
                self.knowledgeBase = ops.linear(self.knowledgeBase,
                                                config.imageDims[-1],
                                                config.memDim,
                                                name="initKB")
            elif config.imageObjectsAndGrid:
                print("self.knowledgeBase", self.knowledgeBase.shape,
                      "config.imageDims", config.imageDims)
                self.knowledgeBase = ops.linear(self.knowledgeBase,
                                                config.imageDims[-1] + 4,
                                                config.memDim,
                                                name="initKB")
            elif config.imageSceneGraph:
                print("self.knowledgeBase", self.knowledgeBase.shape,
                      "config.imageDims", config.imageDims)
                self.knowledgeBase = ops.linear(self.knowledgeBase,
                                                900,
                                                config.memDim,
                                                name="initKB")
            else:
                iVecQuestions = ops.linear(self.vecQuestions,
                                           config.ctrlDim,
                                           config.memDim,
                                           name="questions")

                concatMul = (config.initKBwithQ == "MUL")
                cnct, dim = ops.concat(self.knowledgeBase,
                                       iVecQuestions,
                                       config.memDim,
                                       mul=concatMul,
                                       expandY=True)
                self.knowledgeBase = ops.linear(cnct,
                                                dim,
                                                config.memDim,
                                                name="initKB")

        ## initialize question words
        # choose question words to work with (original embeddings or encoder outputs)
        words = self.questionCntxWords if config.controlContextual else self.questionWords

        # optionally add parametric "null" word in the to all questions
        if config.addNullWord:
            words, self.questionLengths = self.addNullWord(
                words, self.questionLengths)

        # project words
        if config.controlPreDropout < 1.0:
            words = tf.nn.dropout(words, self.dropouts["controlPre"])

        self.inWords = self.outWords = words
        if config.controlInWordsProj or config.controlOutWordsProj:
            pWords = ops.linear(words,
                                config.wrdQEmbDim,
                                config.ctrlDim,
                                name="wordsProj")
            self.inWords = pWords if config.controlInWordsProj else words
            self.outWords = pWords if config.controlOutWordsProj else words

        ## initialize memory variational dropout mask
        if config.memoryVariationalDropout:
            if self.memSameSizeWithKB:
                self.memDpMask = ops.generateVarDpMask(
                    (batchSize, 100, config.memDim), self.dropouts["memory"])
            else:
                self.memDpMask = ops.generateVarDpMask(
                    (batchSize, config.memDim), self.dropouts["memory"])

        return MACCellTuple(initialControl, initialMemory)
Exemplo n.º 16
0
    def write(self,
              memory,
              info,
              control,
              contControl=None,
              name="",
              reuse=None):
        with tf.variable_scope("write" + name, reuse=reuse):

            # optionally project info
            if config.writeInfoProj:
                info = ops.linear(info,
                                  config.memDim,
                                  config.memDim,
                                  name="info")

            # optional info nonlinearity
            info = ops.activations[config.writeInfoAct](info)

            # compute self-attention vector based on previous controls and memories
            if config.writeSelfAtt:
                print("using self attention")
                selfControl = control
                if config.writeSelfAttMod == "CONT":
                    selfControl = contControl
                # elif config.writeSelfAttMod == "POST":
                #     selfControl = postControl
                selfControl = ops.linear(selfControl,
                                         config.ctrlDim,
                                         config.ctrlDim,
                                         name="ctrlProj")

                interactions = self.controls * tf.expand_dims(selfControl,
                                                              axis=1)

                # if config.selfAttShareInter:
                #     selfAttlogits = self.linearP(selfAttInter, config.encDim, 1, self.interL[0], self.interL[1], name = "modSelfAttInter")
                attention = ops.inter2att(interactions,
                                          config.ctrlDim,
                                          name="selfAttention")
                self.attentions["self"].append(attention)
                selfSmry = ops.att2Smry(attention, self.memories)

            # get write unit inputs: previous memory, the new info, optionally self-attention / control
            newMemory, dim = memory, config.memDim
            if config.writeInputs == "INFO":
                newMemory = info
            elif config.writeInputs == "SUM":
                newMemory += info
            elif config.writeInputs == "BOTH":
                newMemory, dim = ops.concat(newMemory,
                                            info,
                                            dim,
                                            mul=config.writeConcatMul)
            # else: MEM

            if config.writeSelfAtt:
                newMemory = tf.concat([newMemory, selfSmry], axis=-1)
                dim += config.memDim

            if config.writeMergeCtrl:
                newMemory = tf.concat([newMemory, control], axis=-1)
                dim += config.memDim

            # project memory back to memory dimension
            if config.writeMemProj or (dim != config.memDim):
                newMemory = ops.linear(newMemory,
                                       dim,
                                       config.memDim,
                                       name="newMemory")

            # optional memory nonlinearity
            newMemory = ops.activations[config.writeMemAct](newMemory)

            # write unit gate
            if config.writeGate:
                gateDim = config.memDim
                if config.writeGateShared:
                    gateDim = 1

                z = tf.sigmoid(
                    ops.linear(control,
                               config.ctrlDim,
                               gateDim,
                               name="gate",
                               bias=config.writeGateBias))

                self.attentions["gate"].append(z)

                newMemory = newMemory * z + memory * (1 - z)

            # optional batch normalization
            if config.memoryBN:
                newMemory = tf.contrib.layers.batch_norm(
                    newMemory,
                    decay=config.bnDecay,
                    center=config.bnCenter,
                    scale=config.bnScale,
                    is_training=self.train,
                    updates_collections=None)

        return newMemory
Exemplo n.º 17
0
    def zero_state(self, batchSize, dtype=tf.float32):
        ## initialize data-structures
        self.attentions = {"kb": [], "question": [], "self": [], "gate": []}
        self.autoEncLosses = {
            "control": tf.constant(0.0),
            "memory": tf.constant(0.0)
        }

        ## initialize state
        initialControl = self.initState("initCtrl", config.ctrlDim,
                                        config.initCtrl, batchSize)
        initialMemory = self.initState("initMem", config.memDim,
                                       config.initMem, batchSize)

        self.controls = tf.expand_dims(initialControl, axis=1)
        self.memories = tf.expand_dims(initialMemory, axis=1)
        self.infos = tf.expand_dims(initialMemory, axis=1)

        self.contControl = initialControl
        # self.contControls = tf.expand_dims(initialControl, axis = 1)
        # self.postControls = tf.expand_dims(initialControl, axis = 1)

        ## initialize knowledge base
        # optionally merge question into knowledge base representation
        if config.initKBwithQ != "NON":
            iVecQuestions = ops.linear(self.vecQuestions,
                                       config.ctrlDim,
                                       config.memDim,
                                       name="questions")

            concatMul = (config.initKBwithQ == "MUL")
            cnct, dim = ops.concat(self.knowledgeBase,
                                   iVecQuestions,
                                   config.memDim,
                                   mul=concatMul,
                                   expandY=True)
            self.knowledgeBase = ops.linear(cnct,
                                            dim,
                                            config.memDim,
                                            name="initKB")

        ## initialize question words
        # choose question words to work with (original embeddings or encoder outputs)
        words = self.questionCntxWords if config.controlContextual else self.questionWords

        # optionally add parametric "null" word in the to all questions
        if config.addNullWord:
            words, questionLengths = self.addNullWord(words, questionLengths)

        # project words
        self.inWords = self.outWords = words
        if config.controlInWordsProj or config.controlOutWordsProj:
            pWords = ops.linear(words,
                                config.ctrlDim,
                                config.ctrlDim,
                                name="wordsProj")
            self.inWords = pWords if config.controlInWordsProj else words
            self.outWords = pWords if config.controlOutWordsProj else words

        # if config.controlCoverage:
        #     self.coverage = tf.zeros((batchSize, tf.shape(words)[1]), dtype = tf.float32)
        #     self.coverageBias = tf.get_variable("coverageBias", shape = (),
        #         initializer = config.controlCoverageBias)

        ## initialize memory variational dropout mask
        if config.memoryVariationalDropout:
            self.memDpMask = ops.generateVarDpMask((batchSize, config.memDim),
                                                   self.dropouts["memory"])

        return MACCellTuple(initialControl, initialMemory)