def block(Q, K, V, Q_lengths=None, K_lengths=None, attention_type='dot', is_layer_norm=True, is_mask=True, mask_value=-2**32 + 1, drop_prob=None, q_time=None, k_time=None, use_len=False, no_mask=False, init=None): '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf. Args: Q: a tensor with shape [batch, Q_time, Q_dimension] K: a tensor with shape [batch, time, K_dimension] V: a tensor with shape [batch, time, V_dimension] Q_length: a tensor with shape [batch] K_length: a tensor with shape [batch] Returns: a tensor with shape [batch, time, dimension] Raises: ''' att = attention(Q, K, V, Q_lengths, K_lengths, attention_type='dot', is_mask=is_mask, mask_value=mask_value, drop_prob=drop_prob, q_time=q_time, k_time=k_time, use_len=use_len, init=init) if is_layer_norm: with tf.variable_scope('attention_layer_norm'): y = op.layer_norm_debug(Q + att) else: y = Q + att z = FFN(y, init=init) if is_layer_norm: with tf.variable_scope('FFN_layer_norm'): w = op.layer_norm_debug(y + z) else: w = y + z return w
def block(Q, K, V, Q_lengths, K_lengths, attention_type='dot', is_layer_norm=True, is_mask=True, mask_value=-2**32 + 1, drop_prob=None): '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf. Args: Q: a tensor with shape [batch, Q_time, Q_dimension] K: a tensor with shape [batch, time, K_dimension] V: a tensor with shape [batch, time, V_dimension] Q_length: a tensor with shape [batch] K_length: a tensor with shape [batch] Returns: a tensor with shape [batch, time, dimension] Raises: ''' # att.shape = (batch_size, max_turn_len, emb_size) att = attention(Q, K, V, Q_lengths, K_lengths, attention_type='dot', is_mask=is_mask, mask_value=mask_value, drop_prob=drop_prob) if is_layer_norm: with tf.variable_scope('attention_layer_norm', reuse=tf.AUTO_REUSE): y = op.layer_norm_debug(Q + att) else: y = Q + att z = FFN(y) if is_layer_norm: with tf.variable_scope('FFN_layer_norm', reuse=tf.AUTO_REUSE): w = op.layer_norm_debug(y + z) else: w = y + z # w.shape = (batch_size, max_turn_len, emb_size) return w
def block( Q, K, V, Q_lengths, K_lengths, attention_type='dot', is_layer_norm=True, is_mask=True, mask_value=-2**32+1, drop_prob=None): '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf. Args: Q: a tensor with shape [batch, Q_time, Q_dimension] K: a tensor with shape [batch, time, K_dimension] V: a tensor with shape [batch, time, V_dimension] eg:Hu[256,50,200] Q_length: a tensor with shape [batch] K_length: a tensor with shape [batch] Returns: a tensor with shape [batch, time, dimension] Raises: ''' att = attention(Q, K, V, Q_lengths, K_lengths, attention_type='dot', is_mask=is_mask, mask_value=mask_value, drop_prob=drop_prob) if is_layer_norm: with tf.variable_scope('attention_layer_norm'): y = op.layer_norm_debug(Q + att) else: y = Q + att ##[256 50 200] z = FFN(y) #[256 50 200] if is_layer_norm: with tf.variable_scope('FFN_layer_norm'): w = op.layer_norm_debug(y + z) else: w = y + z return w #[256 50 200]
def gated_cnn(inputs, shape, conv_type, dilation, residual=True, is_layer_norm=True): conv_w = conv_op(inputs, shape, conv_type, dilation, "linear") conv_v = conv_op(inputs, shape, conv_type, dilation, "gated") conv = conv_w * tf.sigmoid(conv_v) if residual: # add shortcut on last operation conv = tf.add(inputs, conv) if is_layer_norm: with tf.variable_scope('residule_layer_norm'): conv = op.layer_norm_debug(conv) return conv