def outputOp(self, memory, vecQuestions, images, imageInDim): with tf.variable_scope("outputUnit"): features = memory dim = config.memDim if config.outQuestion: eVecQuestions = ops.linear(vecQuestions, config.ctrlDim, config.memDim, name="outQuestion") features, dim = ops.concat(features, eVecQuestions, config.memDim, mul=config.outQuestionMul) if config.outImage: images, imagesDim = ops.linearizeFeatures( images, self.H, self.W, self.imageInDim, outputDim=config.outImageDim) images = ops.linear(images, config.memDim, config.outImageDim, name="outImage") features = tf.concat([features, images], axis=-1) dim += config.outImageDim return features, dim
def decoder_block(img, skip, is_t): with tf.variable_scope('gen_upsample'): with slim.arg_scope([slim.separable_conv2d], depth_multiplier=1): shape = tf.shape(img) h, w = shape[1], shape[2] #block0 output_size=16 im = tf.image.resize_bilinear(img, [h * 2, w * 2], name='upsample_16') im = slim.separable_conv2d(im, 512, [3, 3], scope='conv_sp_512') im = bn(im, is_t=is_t, name='bn_sp_512') im = concat(im, skip[4], name='cat_512') #block1 output_size=32 im = conv2d(im, output_dim=256, name='conv_256') im = tf.image.resize_bilinear(im, [h * 4, w * 4], name='upsample_32') im = slim.separable_conv2d(im, 256, [3, 3], scope='conv_sp_256') im = bn(im, is_t=is_t, name='bn_sp_256') im = concat(im, skip[3], name='cat_256') #block2 output_size=64 im = conv2d(im, output_dim=128, name='conv_128') im = tf.image.resize_bilinear(im, [h * 8, w * 8], name='upsample_64') im = slim.separable_conv2d(im, 128, [3, 3], scope='conv_sp_128') im = bn(im, is_t=is_t, name='bn_sp_128') im = concat(im, skip[2], name='cat_128') #block3 output_size=128 im = conv2d(im, output_dim=64, name='conv_64') im = tf.image.resize_bilinear(im, [h * 16, w * 16], name='upsample_32') im = slim.separable_conv2d(im, 64, [3, 3], scope='conv_sp_64') im = bn(im, is_t=is_t, name='bn_sp_64') im = concat(im, skip[1], name='cat_64') #block2 output_size=256 im = conv2d(im, output_dim=32, name='conv_32') im = tf.image.resize_bilinear(im, [h * 32, w * 32], name='upsample_128') im = slim.separable_conv2d(im, 32, [3, 3], scope='conv_sp_32') im = bn(im, is_t=is_t, name='bn_sp_32') im = concat(im, skip[0], name='cat_32') #output im = conv2d(im, output_dim=16, name='output_16') im = tf.nn.relu(bn(im, is_t=is_t, name='output_bn_16')) im = conv2d(im, output_dim=3, k_h=1, k_w=1, name='output_3') return im
def discriminator(self, image, y=None, reuse=False): with tf.variable_scope('discriminator') as scope: if reuse: scope.reuse_variables() if not self.y_dim: h0 = ops.lrelu(ops.conv2d(image, self.df_dim, name='d_h0_conv')) h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.df_dim * 2, name='d_h1_conv'))) h2 = ops.lrelu( self.d_bn2( ops.conv2d(h1, self.df_dim * 4, name='d_h2_conv'))) h3 = ops.lrelu( self.d_bn3( ops.conv2d(h2, self.df_dim * 8, name='d_h3_conv'))) h4 = ops.linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h4_lin') return tf.nn.sigmoid(h4), h4 else: yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) x = ops.conv_cond_concat(image, yb) h0 = ops.lrelu( ops.conv2d(x, self.c_dim + self.y_dim, name='d_h0_conv')) h0 = ops.conv_cond_concat(h0, yb) h1 = ops.lrelu( self.d_bn1( ops.conv2d(h0, self.df_dim + self.y_dim, name='d_h1_conv'))) h1 = tf.reshape(h1, [self.batch_size, -1]) h1 = ops.concat([h1, y], 1) h2 = ops.lrelu( self.d_bn2(ops.linear(h1, self.dfc_dim, 'd_h2_lin'))) h2 = ops.concat([h2, y], 1) h3 = ops.linear(h2, 1, 'd_h3_lin') return tf.nn.sigmoid(h3), h3
def _discriminator(x, y, reuse_vars=False): with tf.variable_scope(params.dis_scope, reuse=reuse_vars): h0 = ops.concat(x, y) h1_pure = ops.convolution(h0, params.dis_filters_size, params.dis_filters, name='h1') h1 = h1_pure if params.use_batch_norm: h1 = ops.batch_norm(h1, name='bn1') h1 = ops.lrelu(h1) h1 = ops.concat(h1, y) h2 = ops.convolution(h1, params.dis_filters_size, params.dis_filters * 2, name='h2') if params.use_batch_norm: h2 = ops.batch_norm(h2, name='bn2') h2 = ops.lrelu(h2) h2 = ops.concat(h2, y) h3 = ops.convolution(h2, params.dis_filters_size, params.dis_filters * 4, name='h3') if params.use_batch_norm: h3 = ops.batch_norm(h3, name='bn3') h3 = ops.lrelu(h3) h3 = ops.concat(h3, y) h4 = tf.reshape(h3, [params.batch_size, -1]) h4 = ops.fully_connected(h4, 1, 'h4') return h4, { 'h0': h0, 'h1': h1, 'h1_pure': h1_pure, 'h2': h2, 'h3': h3, 'h4': h4 }
def stn_to_pixel_coords(stn_coords, img_size): if not isinstance(stn_coords, tf.Tensor): stn_coords = np.asarray(stn_coords) sx, sy, tx, ty = ops.split(stn_coords, 4, axis=-1) y, h = SpatialTransformer.stn_to_pixel_coord(sy, ty, img_size[0]) x, w = SpatialTransformer.stn_to_pixel_coord(sx, tx, img_size[1]) coords = ops.concat((y, x, h, w), -1) return coords
def discriminator(self, image, y=None, reuse=False): """Defines the D network structure. """ with tf.variable_scope("discriminator") as scope: if reuse: scope.reuse_variables() if not self.y_dim: h0 = lrelu(conv2d(image, self.df_dim, name='d_h0_conv')) h1 = lrelu( self.d_bn1(conv2d(h0, self.df_dim * 2, name='d_h1_conv'))) h2 = lrelu( self.d_bn2(conv2d(h1, self.df_dim * 4, name='d_h2_conv'))) h3 = lrelu( self.d_bn3(conv2d(h2, self.df_dim * 8, name='d_h3_conv'))) h4 = linear(tf.reshape(h3, [self.batch_size, -1]), 1, 'd_h4_lin') return tf.nn.sigmoid(h4), h4 else: yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) x = conv_cond_concat(image, yb) h0 = lrelu(conv2d(x, self.c_dim + self.y_dim, name='d_h0_conv')) h0 = conv_cond_concat(h0, yb) h1 = lrelu( self.d_bn1( conv2d(h0, self.df_dim + self.y_dim, name='d_h1_conv'))) h1 = tf.reshape(h1, [self.batch_size, -1]) h1 = concat([h1, y], 1) h2 = lrelu(self.d_bn2(linear(h1, self.dfc_dim, 'd_h2_lin'))) h2 = concat([h2, y], 1) h3 = linear(h2, 1, 'd_h3_lin') return tf.nn.sigmoid(h3), h3
def gaussian_blur(image, kernel, kernel_size, cdim=3): # kernel as placeholder variable, so it can change outputs = [] pad_w = (kernel_size - 1) // 2 padded = tf.pad(image, [[0, 0], [pad_w, pad_w], [pad_w, pad_w], [0, 0]], mode='REFLECT') for channel_idx in range(cdim): data_c = padded[:, :, :, channel_idx:(channel_idx + 1)] g = tf.reshape(kernel, [1, kernel_size, 1, 1]) data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID') g = tf.reshape(kernel, [kernel_size, 1, 1, 1]) data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID') outputs.append(data_c) return concat(outputs, axis=3)
def pixel_to_stn_coords(yxhw, img_size): img_size = np.asarray(img_size).astype(np.float32) if not isinstance(yxhw, tf.Tensor): yxhw = np.asarray(yxhw).astype(np.float32) while len(img_size.shape) < len(yxhw.shape): img_size = img_size[np.newaxis, ...] scale = yxhw[..., 2:] / (img_size + 1) shift = 2 * yxhw[..., :2] / (img_size - 1.) + scale - 1. sy, sx = ops.split(scale, 2, -1) ty, tx = ops.split(shift, 2, -1) stn_coords = ops.concat((sx, sy, tx, ty), -1) return stn_coords
def gaussian_blur_adaptive(image, sigma, eps=0.01, img_width=32, cdim=3): if sigma == 0: return image outputs = [] kernel = gauss_kernel(sigma, eps, img_width - 1) pad_w = (kernel.shape[0] - 1) // 2 padded = tf.pad(image, [[0, 0], [pad_w, pad_w], [pad_w, pad_w], [0, 0]], mode='REFLECT') for channel_idx in range(cdim): data_c = padded[:, :, :, channel_idx:(channel_idx + 1)] g = np.expand_dims(kernel, 0) g = np.expand_dims(g, axis=2) g = np.expand_dims(g, axis=3) data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID') g = np.expand_dims(kernel, 1) g = np.expand_dims(g, axis=2) g = np.expand_dims(g, axis=3) data_c = tf.nn.conv2d(data_c, g, [1, 1, 1, 1], 'VALID') outputs.append(data_c) return concat(outputs, axis=3)
def forward(self, word_indices: list[Tensor]) -> Tensor: """Executes the forward pass of a FeedForwardLanguageModel. Args: word_indices: list of [batch_size] tensors length = number of previous characters / n-gram length each one contains indices of chars at that position Returns: [batch_size, vocab_size] Tensor containing logits (not full probabilities, i.e. pre-softmax) over the vocab for each example in the batch """ # TODO: (~7 lines) implement the forward pass of FFNN LM here # HINT: use ops.concat to concatenate word embeddings together # It takes a variable-length list of Tensors as its input, so you can # call it using as ops.concat(*embeddings), where embeddings is a list # of Tensors, corresponding to the relevant embeddings # [batch_size, num_words * embedding_size] embs = ops.concat(*[self.embedding(index) for index in word_indices]) return self.output(ops.tanh(self.fc(embs)))
def generator(self, z, y=None): with tf.variable_scope("generator"): if self.y_dim is None: s_h, s_w = self.output_height, self.output_width s_h2, s_w2 = (conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)) s_h4, s_w4 = (conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)) s_h8, s_w8 = (conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)) s_h16, s_w16 = (conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)) # project `z` and reshape self.z_, self.h0_w, self.h0_b = linear( z, self.gf_dim*8*s_h16*s_w16, 'g_h0_lin', with_w=True) self.h0 = tf.reshape( self.z_, [-1, s_h16, s_w16, self.gf_dim * 8]) h0 = tf.nn.relu(self.g_bn0(self.h0)) self.h1, self.h1_w, self.h1_b = deconv2d( h0, [self.batch_size, s_h8, s_w8, self.gf_dim*4], name='g_h1', with_w=True) h1 = tf.nn.relu(self.g_bn1(self.h1)) h2, self.h2_w, self.h2_b = deconv2d( h1, [self.batch_size, s_h4, s_w4, self.gf_dim*2], name='g_h2', with_w=True) h2 = tf.nn.relu(self.g_bn2(h2)) h3, self.h3_w, self.h3_b = deconv2d( h2, [self.batch_size, s_h2, s_w2, self.gf_dim*1], name='g_h3', with_w=True) h3 = tf.nn.relu(self.g_bn3(h3)) h4, self.h4_w, self.h4_b = deconv2d( h3, [self.batch_size, s_h, s_w, self.c_dim], name='g_h4', with_w=True) return tf.nn.tanh(h4) else: s_h, s_w = self.output_height, self.output_width s_h2, s_h4 = s_h // 2, s_h // 4 s_w2, s_w4 = s_w // 2, s_w // 4 # yb = tf.expand_dims(tf.expand_dims(y, 1),2) yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) z = concat([z, y], 1) h0 = tf.nn.relu( self.g_bn0(linear(z, self.gfc_dim, 'g_h0_lin'))) h0 = concat([h0, y], 1) h1 = tf.nn.relu(self.g_bn1( linear(h0, self.gf_dim * 2 * s_h4 * s_w4, 'g_h1_lin'))) h1 = tf.reshape( h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2]) h1 = conv_cond_concat(h1, yb) h2 = tf.nn.relu( self.g_bn2( deconv2d( h1, [self.batch_size, s_h2, s_w2, self.gf_dim * 2], name='g_h2' ) ) ) h2 = conv_cond_concat(h2, yb) return tf.nn.sigmoid( deconv2d( h2, [self.batch_size, s_h, s_w, self.c_dim], name='g_h3' ) )
def sampler(self, z, y=None): with tf.variable_scope("generator") as scope: scope.reuse_variables() if self.y_dim is None: s_h, s_w = self.output_height, self.output_width s_h2, s_w2 = (conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2)) s_h4, s_w4 = (conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2)) s_h8, s_w8 = (conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2)) s_h16, s_w16 = (conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2)) # project `z` and reshape h0 = tf.reshape( linear(z, self.gf_dim * 8 * s_h16 * s_w16, 'g_h0_lin'), [-1, s_h16, s_w16, self.gf_dim * 8]) h0 = tf.nn.relu(self.g_bn0(h0, train=False)) h1 = deconv2d( h0, [self.batch_size, s_h8, s_w8, self.gf_dim * 4], name='g_h1') h1 = tf.nn.relu(self.g_bn1(h1, train=False)) h2 = deconv2d( h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2], name='g_h2') h2 = tf.nn.relu(self.g_bn2(h2, train=False)) h3 = deconv2d( h2, [self.batch_size, s_h2, s_w2, self.gf_dim * 1], name='g_h3') h3 = tf.nn.relu(self.g_bn3(h3, train=False)) h4 = deconv2d( h3, [self.batch_size, s_h, s_w, self.c_dim], name='g_h4') return tf.nn.tanh(h4) else: s_h, s_w = self.output_height, self.output_width s_h2, s_h4 = s_h // 2, s_h // 4 s_w2, s_w4 = s_w // 2, s_w // 4 # yb = tf.reshape(y, [-1, 1, 1, self.y_dim]) yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) z = concat([z, y], 1) h0 = tf.nn.relu( self.g_bn0( linear(z, self.gfc_dim, 'g_h0_lin'), train=False ) ) h0 = concat([h0, y], 1) h1 = tf.nn.relu( self.g_bn1( linear(h0, self.gf_dim * 2 * s_h4 * s_w4, 'g_h1_lin'), train=False ) ) h1 = tf.reshape( h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2]) h1 = conv_cond_concat(h1, yb) h2 = tf.nn.relu( self.g_bn2( deconv2d( h1, [self.batch_size, s_h2, s_w2, self.gf_dim * 2], name='g_h2' ), train=False ) ) h2 = conv_cond_concat(h2, yb) return tf.nn.sigmoid( deconv2d(h2, [self.batch_size, s_h, s_w, self.c_dim], name='g_h3') )
def generator(self, z, g_inputs, y=None, sampler=False): with tf.variable_scope("generator") as scope: if sampler: scope.reuse_variables() do_train = not sampler bs = self.args.batch_size conv_out_size_same = lambda h, w, stride: [ int(math.ceil(s / stride)) for s in [h, w] ] s_h, s_w = self.args.output_height, self.args.output_width s_h2, s_w2 = conv_out_size_same(s_h, s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, s_w4, 2) s_h16, s_w16 = conv_out_size_same(s_h8, s_w8, 2) # *** First layers: g_inputs => g_flat *** # gi = ops.lrelu( ops.conv2d(g_inputs, self.args.df_dim, name='g_gi0_conv')) for idx in range(1, 4): conv = ops.conv2d(gi, self.args.df_dim * (2**idx), name="g_gi" + str(idx) + "_conv") gi = ops.lrelu( ops.bn_layer(conv, train=do_train, name="gi" + str(idx) + "_bn")) gi_flat = ops.linear(tf.reshape(gi, [bs, -1]), self.args.g_feature_dim, 'g_gi4_lin') # *** Map gi_flat to [-1,1] to be more similar to z: *** # gi_flat = tf.nn.tanh(gi_flat) # *** Layers from flat (z and gi_flat) to full size: *** # z0 = ops.concat([gi_flat, z], -1) gd0 = ops.linear(z0, self.args.gf_dim * 8 * s_h16 * s_w16, 'g_h0_lin') gd0 = tf.reshape(gd0, [bs, s_h16, s_w16, self.args.gf_dim * 8]) gd0 = tf.nn.relu(ops.bn_layer(gd0, train=do_train, name="g_bn0")) gd = gd0 s = [None, s_h8, s_h4, s_h2, s_h] m = [None, 4, 2, 2, 2] for idx in range(1, 5): deconv = ops.deconv2d( gd, [bs, s[idx], s[idx], self.args.gf_dim * m[idx]], name="g_h" + str(idx)) gd = tf.nn.relu( ops.bn_layer(deconv, train=do_train, name="g_bn" + str(idx))) gd4 = ops.concat([gd, g_inputs], -1) # *** 2 Layers to merge gd and g_inputs: *** # gd5 = ops.deconv2d(gd4, [bs, s_h, s_w, self.args.gf_dim], k_h=1, k_w=1, d_h=1, d_w=1, name='g_h5') gd5 = tf.nn.relu(gd5) gd6 = ops.deconv2d(gd5, [bs, s_h, s_w, self.args.c_dim], k_h=1, k_w=1, d_h=1, d_w=1, name='g_h6') return tf.nn.sigmoid(gd6)
def _generator(z, zy): with tf.variable_scope(params.gen_scope): imh, imw = params.dataset.image_size, params.dataset.image_size hidden_layers_num = 3 imdiv = 2**hidden_layers_num h0 = tf.concat([z, zy], axis=1) h1 = ops.fully_connected(h0, (imh // imdiv) * (imw // imdiv) * params.gen_filters * 4, 'h1') if params.use_batch_norm: h1 = ops.batch_norm(h1, name='bn1') h1 = tf.reshape( h1, [-1, imh // imdiv, imw // imdiv, params.gen_filters * 4]) h1 = ops.lrelu(h1) h1 = ops.dropout(h1, training=training, keep=params.gen_keep_dropout, name='dropout1') h1 = ops.concat(h1, zy) h2 = ops.deconvolution(h1, params.gen_filters_size, params.gen_filters * 2, name='h2') if params.use_batch_norm: h2 = ops.batch_norm(h2, name='bn2') h2 = ops.lrelu(h2) h2 = ops.dropout(h2, training=training, keep=params.gen_keep_dropout, name='dropout2') h2 = ops.concat(h2, zy) h3_pure = ops.deconvolution(h2, params.gen_filters_size, params.gen_filters, name='h3') h3 = h3_pure if params.use_batch_norm: h3 = ops.batch_norm(h3, name='bn3') h3 = ops.lrelu(h3) h3 = ops.dropout(h3, training=training, keep=params.gen_keep_dropout, name='dropout3') h3 = ops.concat(h3, zy) h4 = ops.deconvolution(h3, params.gen_filters_size, params.dataset.channels_size, name='h4') return tf.nn.tanh(h4), { 'h0': h0, 'h1': h1, 'h2': h2, 'h3': h3, 'h3_pure': h3_pure, 'h4': h4 }
def zero_state(self, batchSize, dtype=tf.float32): ## initialize data-structures self.attentions = {"kb": [], "question": [], "self": [], "gate": []} self.autoEncLosses = { "control": tf.constant(0.0), "memory": tf.constant(0.0) } ## initialize state initialControl = self.initState("initCtrl", config.ctrlDim, config.initCtrl, batchSize) if self.memSameSizeWithKB: initialMemory = self.initmemState("initMem", (100, config.memDim), config.initMem, batchSize) else: initialMemory = self.initState("initMem", config.memDim, config.initMem, batchSize) self.controls = tf.expand_dims(initialControl, axis=1) self.memories = tf.expand_dims(initialMemory, axis=1) self.infos = tf.expand_dims(initialMemory, axis=1) self.contControl = initialControl ## initialize knowledge base # optionally merge question into knowledge base representation if config.initKBwithQ != "NON": if config.imageEnsembleFeatures: self.knowledgeBase = ops.linear(self.knowledgeBase, self.kbDim, config.memDim, name="initKB") elif config.imageObjects: self.knowledgeBase = ops.linear(self.knowledgeBase, config.imageDims[-1], config.memDim, name="initKB") elif config.imageObjectsAndGrid: print("self.knowledgeBase", self.knowledgeBase.shape, "config.imageDims", config.imageDims) self.knowledgeBase = ops.linear(self.knowledgeBase, config.imageDims[-1] + 4, config.memDim, name="initKB") elif config.imageSceneGraph: print("self.knowledgeBase", self.knowledgeBase.shape, "config.imageDims", config.imageDims) self.knowledgeBase = ops.linear(self.knowledgeBase, 900, config.memDim, name="initKB") else: iVecQuestions = ops.linear(self.vecQuestions, config.ctrlDim, config.memDim, name="questions") concatMul = (config.initKBwithQ == "MUL") cnct, dim = ops.concat(self.knowledgeBase, iVecQuestions, config.memDim, mul=concatMul, expandY=True) self.knowledgeBase = ops.linear(cnct, dim, config.memDim, name="initKB") ## initialize question words # choose question words to work with (original embeddings or encoder outputs) words = self.questionCntxWords if config.controlContextual else self.questionWords # optionally add parametric "null" word in the to all questions if config.addNullWord: words, self.questionLengths = self.addNullWord( words, self.questionLengths) # project words if config.controlPreDropout < 1.0: words = tf.nn.dropout(words, self.dropouts["controlPre"]) self.inWords = self.outWords = words if config.controlInWordsProj or config.controlOutWordsProj: pWords = ops.linear(words, config.wrdQEmbDim, config.ctrlDim, name="wordsProj") self.inWords = pWords if config.controlInWordsProj else words self.outWords = pWords if config.controlOutWordsProj else words ## initialize memory variational dropout mask if config.memoryVariationalDropout: if self.memSameSizeWithKB: self.memDpMask = ops.generateVarDpMask( (batchSize, 100, config.memDim), self.dropouts["memory"]) else: self.memDpMask = ops.generateVarDpMask( (batchSize, config.memDim), self.dropouts["memory"]) return MACCellTuple(initialControl, initialMemory)
def write(self, memory, info, control, contControl=None, name="", reuse=None): with tf.variable_scope("write" + name, reuse=reuse): # optionally project info if config.writeInfoProj: info = ops.linear(info, config.memDim, config.memDim, name="info") # optional info nonlinearity info = ops.activations[config.writeInfoAct](info) # compute self-attention vector based on previous controls and memories if config.writeSelfAtt: print("using self attention") selfControl = control if config.writeSelfAttMod == "CONT": selfControl = contControl # elif config.writeSelfAttMod == "POST": # selfControl = postControl selfControl = ops.linear(selfControl, config.ctrlDim, config.ctrlDim, name="ctrlProj") interactions = self.controls * tf.expand_dims(selfControl, axis=1) # if config.selfAttShareInter: # selfAttlogits = self.linearP(selfAttInter, config.encDim, 1, self.interL[0], self.interL[1], name = "modSelfAttInter") attention = ops.inter2att(interactions, config.ctrlDim, name="selfAttention") self.attentions["self"].append(attention) selfSmry = ops.att2Smry(attention, self.memories) # get write unit inputs: previous memory, the new info, optionally self-attention / control newMemory, dim = memory, config.memDim if config.writeInputs == "INFO": newMemory = info elif config.writeInputs == "SUM": newMemory += info elif config.writeInputs == "BOTH": newMemory, dim = ops.concat(newMemory, info, dim, mul=config.writeConcatMul) # else: MEM if config.writeSelfAtt: newMemory = tf.concat([newMemory, selfSmry], axis=-1) dim += config.memDim if config.writeMergeCtrl: newMemory = tf.concat([newMemory, control], axis=-1) dim += config.memDim # project memory back to memory dimension if config.writeMemProj or (dim != config.memDim): newMemory = ops.linear(newMemory, dim, config.memDim, name="newMemory") # optional memory nonlinearity newMemory = ops.activations[config.writeMemAct](newMemory) # write unit gate if config.writeGate: gateDim = config.memDim if config.writeGateShared: gateDim = 1 z = tf.sigmoid( ops.linear(control, config.ctrlDim, gateDim, name="gate", bias=config.writeGateBias)) self.attentions["gate"].append(z) newMemory = newMemory * z + memory * (1 - z) # optional batch normalization if config.memoryBN: newMemory = tf.contrib.layers.batch_norm( newMemory, decay=config.bnDecay, center=config.bnCenter, scale=config.bnScale, is_training=self.train, updates_collections=None) return newMemory
def zero_state(self, batchSize, dtype=tf.float32): ## initialize data-structures self.attentions = {"kb": [], "question": [], "self": [], "gate": []} self.autoEncLosses = { "control": tf.constant(0.0), "memory": tf.constant(0.0) } ## initialize state initialControl = self.initState("initCtrl", config.ctrlDim, config.initCtrl, batchSize) initialMemory = self.initState("initMem", config.memDim, config.initMem, batchSize) self.controls = tf.expand_dims(initialControl, axis=1) self.memories = tf.expand_dims(initialMemory, axis=1) self.infos = tf.expand_dims(initialMemory, axis=1) self.contControl = initialControl # self.contControls = tf.expand_dims(initialControl, axis = 1) # self.postControls = tf.expand_dims(initialControl, axis = 1) ## initialize knowledge base # optionally merge question into knowledge base representation if config.initKBwithQ != "NON": iVecQuestions = ops.linear(self.vecQuestions, config.ctrlDim, config.memDim, name="questions") concatMul = (config.initKBwithQ == "MUL") cnct, dim = ops.concat(self.knowledgeBase, iVecQuestions, config.memDim, mul=concatMul, expandY=True) self.knowledgeBase = ops.linear(cnct, dim, config.memDim, name="initKB") ## initialize question words # choose question words to work with (original embeddings or encoder outputs) words = self.questionCntxWords if config.controlContextual else self.questionWords # optionally add parametric "null" word in the to all questions if config.addNullWord: words, questionLengths = self.addNullWord(words, questionLengths) # project words self.inWords = self.outWords = words if config.controlInWordsProj or config.controlOutWordsProj: pWords = ops.linear(words, config.ctrlDim, config.ctrlDim, name="wordsProj") self.inWords = pWords if config.controlInWordsProj else words self.outWords = pWords if config.controlOutWordsProj else words # if config.controlCoverage: # self.coverage = tf.zeros((batchSize, tf.shape(words)[1]), dtype = tf.float32) # self.coverageBias = tf.get_variable("coverageBias", shape = (), # initializer = config.controlCoverageBias) ## initialize memory variational dropout mask if config.memoryVariationalDropout: self.memDpMask = ops.generateVarDpMask((batchSize, config.memDim), self.dropouts["memory"]) return MACCellTuple(initialControl, initialMemory)