def _att(self, a, b, keep_prob, scope, q_len=None): """ b attends to a """ with tf.variable_scope(scope): # kernel trick here? with tf.variable_scope("ah"): ah = lib.conv( a, [1, 1, a.get_shape().as_list()[-1], g_channel], w_fn=lib.xavieru_conv_w, b_fn=lib.zero_b, act_fn=g_act) ah = tf.nn.dropout(ah, keep_prob, [self.batch_size, 1, 1, g_channel]) with tf.variable_scope("bh"): bh = lib.conv( b, [1, 1, b.get_shape().as_list()[-1], g_channel], w_fn=lib.xavieru_conv_w, b_fn=lib.zero_b, act_fn=g_act) bh = tf.nn.dropout(bh, keep_prob, [self.batch_size, 1, 1, g_channel]) with tf.variable_scope("alpha"): alpha = ah * bh with tf.variable_scope("conv1x1"): alpha = lib.conv(alpha, [1, 1, g_channel, 1], w_fn=lib.xavieru_conv_w, b_fn=None, act_fn=g_linear) if q_len is not None: with tf.variable_scope("separate_softmax_attention"): attended_list = [] alpha_list = tf.unstack(alpha) a_list = tf.unstack(a) for i in xrange(self.batch_size): sliced_alpha = alpha[i][:q_len[i], :, :] sliced_alpha = lib.softmax(sliced_alpha, 0) sliced_a = a[i][:q_len[i], :, :] with tf.variable_scope("attended"): attended_a = tf.reduce_sum(sliced_a * sliced_alpha, 0, True) attended_list.append(attended_a) attended_a = tf.stack(attended_list, 0) else: with tf.variable_scope("softmax"): alpha = lib.softmax(alpha, 1) with tf.variable_scope("attended"): attended_a = tf.reduce_sum(a * alpha, 1, True) return attended_a
def forward_graph(self, qid_input=None, v_input=None, q_input=None, q_len_input=None): with tf.variable_scope(self.var_scope_str): if qid_input is not None: qid = qid_input else: qid = tf.placeholder(tf.int32, [self.batch_size, 1]) if v_input is not None: v_ph = tf.reshape(v_input, [self.batch_size, 14, 14, 2048]) else: v_ph = tf.placeholder(tf.float32, [self.batch_size, 14, 14, 2048], "v_ph") if q_input is not None: q_ph = q_input else: q_ph = tf.placeholder(tf.int32, [self.batch_size, None], "q_ph") if q_len_input is not None: q_len = tf.reshape(q_len_input, [self.batch_size]) else: q_len = tf.placeholder(tf.int32, [self.batch_size]) keep_prob_ph = tf.placeholder(tf.float32, [], "keep_prob_ph") v_model = Visual("visual", self.batch_size) v = v_model(v_ph, keep_prob_ph) embed = Embed("q_embed", self.q_vocab_size, self.embed_size, self.batch_size) q_0 = embed(q_ph) q_model = Question("question", self.batch_size) q = q_model(q_0, keep_prob_ph, q_len) att_model_1 = Attention("attention_1", self.batch_size) att_1 = att_model_1(v, q, keep_prob_ph, q_len) ans_model = Answer("answer", self.batch_size) logit = ans_model(att_1, self.a_vocab_size, keep_prob_ph) pred_idx = tf.argmax(logit, 1) softmax_logit = lib.softmax(logit) #v_model.summarize_tensor() #embed.summarize_embed() #q_model.summarize_tensor() att_model_1.summarize_tensor() #ans_model.summarize_tensor() regu_vars = [] regu_vars += [w for w in embed.weights] regu_vars += [w for w in v_model.weights] regu_vars += [w for w in q_model.weights] regu_vars += [w for w in att_model_1.weights] regu_vars += [w for w in ans_model.weights] regu_vars = [ w for w in regu_vars if "bias" not in w.name and "gamma" not in w.name and "beta" not in w.name ] self.embed_model = embed self.q_model = q_model self.v_model = v_model self.att_model = att_model_1 self.ans_model = ans_model self.regu_vars = regu_vars self.qid = qid self.v_ph = v_ph self.q_ph = q_ph self.q_len = q_len self.logit = logit self.pred_idx = pred_idx self.softmax_logit = softmax_logit self.keep_prob_ph = keep_prob_ph
def co_att(self, v, q, q_len, dim, var_scope_str): # v (batch_size, 196, 1, 2048) # q (batch_size, 27, 1, 256) v_dim = v.get_shape().as_list()[-1] q_dim = q.get_shape().as_list()[-1] with tf.variable_scope(var_scope_str): with tf.variable_scope("C"): b_c_q = lib.zero_b("bias_q", [q_dim]) b_c_v = lib.zero_b("bias_v", [v_dim]) with tf.variable_scope("QW"): QW = lib.conv(q + b_c_q, [1, 1, q_dim, v_dim], b_fn=None, act_fn=g_linear) # (b, 27, 1, 2048) QW = tf.squeeze(QW, 2) # (b, 27, 2048) V = tf.squeeze(v + b_c_v) # (b, 196, 2048) QWV = tf.matmul(QW, V, transpose_b=True) # (b, 27, 196) C = g_act(QWV) # (b, 27, 196) with tf.variable_scope("WQ"): WQ = lib.conv(q, [1, 1, q_dim, dim], b_fn=lib.zero_b, act_fn=g_linear) # (b, 27, 1, dim) WQ = tf.transpose(tf.squeeze(WQ, 2), [0, 2, 1]) # (b, dim, 27) with tf.variable_scope("WV"): WV = lib.conv(v, [1, 1, v_dim, dim], b_fn=lib.zero_b, act_fn=g_linear) # (b, 196, 1, dim) WV = tf.transpose(tf.squeeze(WV, 2), [0, 2, 1]) # (b, dim, 196) with tf.variable_scope("H_v"): WQC = tf.matmul(WQ, C) # (b, dim, 196) WV_WQC = tf.add(WV, WQC) # (b, dim, 196) H_v = g_act(WV_WQC) # (b, dim, 196) with tf.variable_scope("v_attention"): H_v = tf.expand_dims(tf.transpose(H_v, [0, 2, 1]), 2) # (b, 196, 1, dim) wH_v = lib.conv(H_v, [1, 1, dim, 1], act_fn=g_linear) wH_v = tf.squeeze(wH_v, [2, 3]) # (b, 196) a_v = lib.softmax(wH_v) # (b, 196) with tf.variable_scope("attented_v"): v_hat = tf.multiply(tf.squeeze(v, 2), tf.expand_dims(a_v, 2)) # (b, 196, v_dim) v_hat = tf.reduce_sum(v_hat, 1) # (b, v_dim) with tf.variable_scope("H_q"): WVC = tf.matmul(WV, C, transpose_b=True) # (b, dim, 27) WQ_WVC = tf.add(WQ, WVC) # (b, dim, 27) H_q = g_act(WQ_WVC) # (b, dim, 27) with tf.variable_scope("q_attention"): H_q = tf.expand_dims(tf.transpose(H_q, [0, 2, 1]), 2) # (b, 27, 1, dim) wH_q = lib.conv(H_q, [1, 1, dim, 1], act_fn=g_linear) # (b, 27, 1, 1) wH_q = tf.squeeze(wH_q, [2, 3]) # (b, 27) with tf.variable_scope("split_softmax_attention_concat"): q_hat = [] for i in xrange(self.batch_size): sliced_q = q[i, :q_len[i], :] # (q_len[i], q_dim) sliced_wH_q = lib.softmax(wH_q[i, :q_len[i]]) # (q_len[i]) attention = tf.multiply(tf.squeeze(sliced_q, 1), tf.expand_dims(sliced_wH_q, 1)) # (q_len[i], q_dim) attention = tf.reduce_sum(attention, 0) # (q_dim) q_hat.append(attention) q_hat = tf.stack(q_hat, 0) # (b, q_dim) return v_hat, q_hat