def double_linear_logits(args, size, bias, bias_start=0.0, scope=None, mask=None, wd=0.0, input_keep_prob=1.0, is_train=None): with tf.variable_scope(scope or "Double_Linear_Logits"): first = tf.tanh( linear(args, size, bias, bias_start=bias_start, scope='first', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train)) second = linear(first, 1, bias, bias_start=bias_start, squeeze=True, scope='second', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train) if mask is not None: second = exp_mask(second, mask) return second
def linear_logits(args, bias, bias_start=0.0, scope=None, mask=None, wd=0.0, input_keep_prob=1.0, is_train=None): with tf.variable_scope(scope or "Linear_Logits"): logits = linear(args, 1, bias, bias_start=bias_start, squeeze=True, scope='first', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train) if mask is not None: logits = exp_mask(logits, mask) return logits
def __call__(self, inputs, state, scope=None): """ :param inputs: [N, d + JQ + JQ * d] :param state: [N, d] :param scope: :return: """ with tf.variable_scope(scope or self.__class__.__name__): c_prev, h_prev = state x = tf.slice(inputs, [0, 0], [-1, self._input_size]) q_mask = tf.slice(inputs, [0, self._input_size], [-1, self._q_len]) # [N, JQ] qs = tf.slice(inputs, [0, self._input_size + self._q_len], [-1, -1]) qs = tf.reshape(qs, [-1, self._q_len, self._input_size]) # [N, JQ, d] x_tiled = tf.tile(tf.expand_dims(x, 1), [1, self._q_len, 1]) # [N, JQ, d] h_prev_tiled = tf.tile(tf.expand_dims(h_prev, 1), [1, self._q_len, 1]) # [N, JQ, d] f = tf.tanh( linear([qs, x_tiled, h_prev_tiled], self._input_size, True, scope='f')) # [N, JQ, d] a = tf.nn.softmax( exp_mask(linear(f, 1, True, squeeze=True, scope='a'), q_mask)) # [N, JQ] q = tf.reduce_sum(qs * tf.expand_dims(a, -1), 1) z = tf.concat(1, [x, q]) # [N, 2d] return self._cell(z, state)
def softmax(logits, mask=None, scope=None): with tf.name_scope(scope or "Softmax"): if mask is not None: logits = exp_mask(logits, mask) flat_logits = flatten(logits, 1) flat_out = tf.nn.softmax(flat_logits) out = reconstruct(flat_out, logits, 1) return out
def double_linear_logits(args, size, bias, bias_start=0.0, scope=None, mask=None, wd=0.0, input_keep_prob=1.0, is_train=None): with tf.variable_scope(scope or "Double_Linear_Logits"): first = tf.tanh(linear(args, size, bias, bias_start=bias_start, scope='first', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train)) second = linear(first, 1, bias, bias_start=bias_start, squeeze=True, scope='second', wd=wd, input_keep_prob=input_keep_prob, is_train=is_train) if mask is not None: second = exp_mask(second, mask) return second
def sum_logits(args, mask=None, name=None): with tf.name_scope(name or "sum_logits"): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] rank = len(args[0].get_shape()) logits = sum(tf.reduce_sum(arg, rank - 1) for arg in args) if mask is not None: logits = exp_mask(logits, mask) return logits
def sum_logits(args, mask=None, name=None): with tf.name_scope(name or "sum_logits"): if args is None or (nest.is_sequence(args) and not args): raise ValueError("`args` must be specified") if not nest.is_sequence(args): args = [args] rank = len(args[0].get_shape()) logits = sum(tf.reduce_sum(arg, rank-1) for arg in args) if mask is not None: logits = exp_mask(logits, mask) return logits
def __call__(self, inputs, state, scope=None): """ :param inputs: [N*B, I + B] :param state: [N*B, d] :param scope: :return: [N*B, d] """ with tf.variable_scope(scope or self.__class__.__name__): d = self.state_size x = tf.slice(inputs, [0, 0], [-1, self._input_size]) # [N*B, I] mask = tf.slice(inputs, [0, self._input_size], [-1, -1]) # [N*B, B] B = tf.shape(mask)[1] prev_state = tf.expand_dims(tf.reshape(state, [-1, B, d]), 1) # [N, B, d] -> [N, 1, B, d] mask = tf.tile(tf.expand_dims(tf.reshape(mask, [-1, B, B]), -1), [1, 1, 1, d]) # [N, B, B, d] # prev_state = self._reduce_func(tf.tile(prev_state, [1, B, 1, 1]), 2) prev_state = self._reduce_func(exp_mask(prev_state, mask), 2) # [N, B, d] prev_state = tf.reshape(prev_state, [-1, d]) # [N*B, d] return self._cell(x, prev_state)
def __call__(self, inputs, state, scope=None): """ :param inputs: [N, d + JQ + JQ * d] :param state: [N, d] :param scope: :return: """ with tf.variable_scope(scope or self.__class__.__name__): c_prev, h_prev = state x = tf.slice(inputs, [0, 0], [-1, self._input_size]) q_mask = tf.slice(inputs, [0, self._input_size], [-1, self._q_len]) # [N, JQ] qs = tf.slice(inputs, [0, self._input_size + self._q_len], [-1, -1]) qs = tf.reshape(qs, [-1, self._q_len, self._input_size]) # [N, JQ, d] x_tiled = tf.tile(tf.expand_dims(x, 1), [1, self._q_len, 1]) # [N, JQ, d] h_prev_tiled = tf.tile(tf.expand_dims(h_prev, 1), [1, self._q_len, 1]) # [N, JQ, d] f = tf.tanh(linear([qs, x_tiled, h_prev_tiled], self._input_size, True, scope='f')) # [N, JQ, d] a = tf.nn.softmax(exp_mask(linear(f, 1, True, squeeze=True, scope='a'), q_mask)) # [N, JQ] q = tf.reduce_sum(qs * tf.expand_dims(a, -1), 1) z = tf.concat(1, [x, q]) # [N, 2d] return self._cell(z, state)
def _build_forward(self): config = self.config N, M, JX, JQ, VW, d, dc, W = \ config.batch_size, config.max_num_sents, config.max_sent_size, \ config.max_ques_size, config.word_vocab_size, config.hidden_size, \ config.char_emb_size, config.max_word_size H = config.max_tree_height x_mask = self.x > 0 q_mask = self.q > 0 tx_mask = self.tx > 0 # [N, M, H, JX] with tf.variable_scope("char_emb"): char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float') Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, M, JX, W, dc] Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq) # [N, JQ, W, dc] filter = tf.get_variable("filter", shape=[1, config.char_filter_height, dc, d], dtype='float') bias = tf.get_variable("bias", shape=[d], dtype='float') strides = [1, 1, 1, 1] Acx = tf.reshape(Acx, [-1, JX, W, dc]) Acq = tf.reshape(Acq, [-1, JQ, W, dc]) xxc = tf.nn.conv2d(Acx, filter, strides, "VALID") + bias # [N*M, JX, W/filter_stride, d] qqc = tf.nn.conv2d(Acq, filter, strides, "VALID") + bias # [N, JQ, W/filter_stride, d] xxc = tf.reshape(tf.reduce_max(tf.nn.relu(xxc), 2), [-1, M, JX, d]) qqc = tf.reshape(tf.reduce_max(tf.nn.relu(qqc), 2), [-1, JQ, d]) with tf.variable_scope("word_emb"): if config.mode == 'train': word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, config.word_emb_size], initializer=get_initializer(config.emb_mat)) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, config.word_emb_size], dtype='float') Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, M, JX, d] Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) # [N, JQ, d] # Ax = linear([Ax], d, False, scope='Ax_reshape') # Aq = linear([Aq], d, False, scope='Aq_reshape') xx = tf.concat(3, [xxc, Ax]) # [N, M, JX, 2d] qq = tf.concat(2, [qqc, Aq]) # [N, JQ, 2d] D = d + config.word_emb_size with tf.variable_scope("pos_emb"): pos_emb_mat = tf.get_variable("pos_emb_mat", shape=[config.pos_vocab_size, d], dtype='float') Atx = tf.nn.embedding_lookup(pos_emb_mat, self.tx) # [N, M, H, JX, d] cell = BasicLSTMCell(D, state_is_tuple=True) cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob) x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2) # [N, M] q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1) # [N] with tf.variable_scope("rnn"): (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='start') # [N, M, JX, 2d] tf.get_variable_scope().reuse_variables() (fw_us, bw_us), (_, (fw_u, bw_u)) = bidirectional_dynamic_rnn(cell, cell, qq, q_len, dtype='float', scope='start') # [N, J, d], [N, d] u = (fw_u + bw_u) / 2.0 h = (fw_h + bw_h) / 2.0 with tf.variable_scope("h"): no_op_cell = NoOpCell(D) tree_rnn_cell = TreeRNNCell(no_op_cell, d, tf.reduce_max) initial_state = tf.reshape(h, [N*M*JX, D]) # [N*M*JX, D] inputs = tf.concat(4, [Atx, tf.cast(self.tx_edge_mask, 'float')]) # [N, M, H, JX, d+JX] inputs = tf.reshape(tf.transpose(inputs, [0, 1, 3, 2, 4]), [N*M*JX, H, d + JX]) # [N*M*JX, H, d+JX] length = tf.reshape(tf.reduce_sum(tf.cast(tx_mask, 'int32'), 2), [N*M*JX]) # length = tf.reshape(tf.reduce_sum(tf.cast(tf.transpose(tx_mask, [0, 1, 3, 2]), 'float'), 3), [-1]) h, _ = dynamic_rnn(tree_rnn_cell, inputs, length, initial_state=initial_state) # [N*M*JX, H, D] h = tf.transpose(tf.reshape(h, [N, M, JX, H, D]), [0, 1, 3, 2, 4]) # [N, M, H, JX, D] u = tf.expand_dims(tf.expand_dims(tf.expand_dims(u, 1), 1), 1) # [N, 1, 1, 1, 4d] dot = linear(h * u, 1, True, squeeze=True, scope='dot') # [N, M, H, JX] # self.logits = tf.reshape(dot, [N, M * H * JX]) self.logits = tf.reshape(exp_mask(dot, tx_mask), [N, M * H * JX]) # [N, M, H, JX] self.yp = tf.reshape(tf.nn.softmax(self.logits), [N, M, H, JX])
def _build_forward(self): config = self.config N, M, JX, JQ, VW, VC, d, dc, W = \ config.batch_size, config.max_num_sents, config.max_sent_size, \ config.max_ques_size, config.word_vocab_size, config.char_vocab_size, config.hidden_size, \ config.char_emb_size, config.max_word_size H = config.max_tree_height x_mask = self.x > 0 q_mask = self.q > 0 tx_mask = self.tx > 0 # [N, M, H, JX] with tf.variable_scope("char_emb"): char_emb_mat = tf.get_variable("char_emb_mat", shape=[VC, dc], dtype='float') Acx = tf.nn.embedding_lookup(char_emb_mat, self.cx) # [N, M, JX, W, dc] Acq = tf.nn.embedding_lookup(char_emb_mat, self.cq) # [N, JQ, W, dc] filter = tf.get_variable("filter", shape=[1, config.char_filter_height, dc, d], dtype='float') bias = tf.get_variable("bias", shape=[d], dtype='float') strides = [1, 1, 1, 1] Acx = tf.reshape(Acx, [-1, JX, W, dc]) Acq = tf.reshape(Acq, [-1, JQ, W, dc]) xxc = tf.nn.conv2d(Acx, filter, strides, "VALID") + bias # [N*M, JX, W/filter_stride, d] qqc = tf.nn.conv2d(Acq, filter, strides, "VALID") + bias # [N, JQ, W/filter_stride, d] xxc = tf.reshape(tf.reduce_max(tf.nn.relu(xxc), 2), [-1, M, JX, d]) qqc = tf.reshape(tf.reduce_max(tf.nn.relu(qqc), 2), [-1, JQ, d]) with tf.variable_scope("word_emb"): if config.mode == 'train': word_emb_mat = tf.get_variable("word_emb_mat", dtype='float', shape=[VW, config.word_emb_size], initializer=get_initializer(config.emb_mat)) else: word_emb_mat = tf.get_variable("word_emb_mat", shape=[VW, config.word_emb_size], dtype='float') Ax = tf.nn.embedding_lookup(word_emb_mat, self.x) # [N, M, JX, d] Aq = tf.nn.embedding_lookup(word_emb_mat, self.q) # [N, JQ, d] # Ax = linear([Ax], d, False, scope='Ax_reshape') # Aq = linear([Aq], d, False, scope='Aq_reshape') xx = tf.concat(3, [xxc, Ax]) # [N, M, JX, 2d] qq = tf.concat(2, [qqc, Aq]) # [N, JQ, 2d] D = d + config.word_emb_size with tf.variable_scope("pos_emb"): pos_emb_mat = tf.get_variable("pos_emb_mat", shape=[config.pos_vocab_size, d], dtype='float') Atx = tf.nn.embedding_lookup(pos_emb_mat, self.tx) # [N, M, H, JX, d] cell = BasicLSTMCell(D, state_is_tuple=True) cell = SwitchableDropoutWrapper(cell, self.is_train, input_keep_prob=config.input_keep_prob) x_len = tf.reduce_sum(tf.cast(x_mask, 'int32'), 2) # [N, M] q_len = tf.reduce_sum(tf.cast(q_mask, 'int32'), 1) # [N] with tf.variable_scope("rnn"): (fw_h, bw_h), _ = bidirectional_dynamic_rnn(cell, cell, xx, x_len, dtype='float', scope='start') # [N, M, JX, 2d] tf.get_variable_scope().reuse_variables() (fw_us, bw_us), (_, (fw_u, bw_u)) = bidirectional_dynamic_rnn(cell, cell, qq, q_len, dtype='float', scope='start') # [N, J, d], [N, d] u = (fw_u + bw_u) / 2.0 h = (fw_h + bw_h) / 2.0 with tf.variable_scope("h"): no_op_cell = NoOpCell(D) tree_rnn_cell = TreeRNNCell(no_op_cell, d, tf.reduce_max) initial_state = tf.reshape(h, [N*M*JX, D]) # [N*M*JX, D] inputs = tf.concat(4, [Atx, tf.cast(self.tx_edge_mask, 'float')]) # [N, M, H, JX, d+JX] inputs = tf.reshape(tf.transpose(inputs, [0, 1, 3, 2, 4]), [N*M*JX, H, d + JX]) # [N*M*JX, H, d+JX] length = tf.reshape(tf.reduce_sum(tf.cast(tx_mask, 'int32'), 2), [N*M*JX]) # length = tf.reshape(tf.reduce_sum(tf.cast(tf.transpose(tx_mask, [0, 1, 3, 2]), 'float'), 3), [-1]) h, _ = dynamic_rnn(tree_rnn_cell, inputs, length, initial_state=initial_state) # [N*M*JX, H, D] h = tf.transpose(tf.reshape(h, [N, M, JX, H, D]), [0, 1, 3, 2, 4]) # [N, M, H, JX, D] u = tf.expand_dims(tf.expand_dims(tf.expand_dims(u, 1), 1), 1) # [N, 1, 1, 1, 4d] dot = linear(h * u, 1, True, squeeze=True, scope='dot') # [N, M, H, JX] # self.logits = tf.reshape(dot, [N, M * H * JX]) self.logits = tf.reshape(exp_mask(dot, tx_mask), [N, M * H * JX]) # [N, M, H, JX] self.yp = tf.reshape(tf.nn.softmax(self.logits), [N, M, H, JX])