예제 #1
0
def block(Q,
          K,
          V,
          Q_lengths=None,
          K_lengths=None,
          attention_type='dot',
          is_layer_norm=True,
          is_mask=True,
          mask_value=-2**32 + 1,
          drop_prob=None,
          q_time=None,
          k_time=None,
          use_len=False,
          no_mask=False,
          init=None):
    '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf.
    Args:
        Q: a tensor with shape [batch, Q_time, Q_dimension]
        K: a tensor with shape [batch, time, K_dimension]
        V: a tensor with shape [batch, time, V_dimension]

        Q_length: a tensor with shape [batch]
        K_length: a tensor with shape [batch]

    Returns:
        a tensor with shape [batch, time, dimension]

    Raises:
    '''
    att = attention(Q,
                    K,
                    V,
                    Q_lengths,
                    K_lengths,
                    attention_type='dot',
                    is_mask=is_mask,
                    mask_value=mask_value,
                    drop_prob=drop_prob,
                    q_time=q_time,
                    k_time=k_time,
                    use_len=use_len,
                    init=init)
    if is_layer_norm:
        with tf.variable_scope('attention_layer_norm'):
            y = op.layer_norm_debug(Q + att)
    else:
        y = Q + att

    z = FFN(y, init=init)
    if is_layer_norm:
        with tf.variable_scope('FFN_layer_norm'):
            w = op.layer_norm_debug(y + z)
    else:
        w = y + z
    return w
예제 #2
0
def block(Q,
          K,
          V,
          Q_lengths,
          K_lengths,
          attention_type='dot',
          is_layer_norm=True,
          is_mask=True,
          mask_value=-2**32 + 1,
          drop_prob=None):
    '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf.
    Args:
        Q: a tensor with shape [batch, Q_time, Q_dimension]
        K: a tensor with shape [batch, time, K_dimension]
        V: a tensor with shape [batch, time, V_dimension]

        Q_length: a tensor with shape [batch]
        K_length: a tensor with shape [batch]

    Returns:
        a tensor with shape [batch, time, dimension]

    Raises:
    '''
    # att.shape = (batch_size, max_turn_len, emb_size)
    att = attention(Q,
                    K,
                    V,
                    Q_lengths,
                    K_lengths,
                    attention_type='dot',
                    is_mask=is_mask,
                    mask_value=mask_value,
                    drop_prob=drop_prob)
    if is_layer_norm:
        with tf.variable_scope('attention_layer_norm', reuse=tf.AUTO_REUSE):
            y = op.layer_norm_debug(Q + att)
    else:
        y = Q + att

    z = FFN(y)
    if is_layer_norm:
        with tf.variable_scope('FFN_layer_norm', reuse=tf.AUTO_REUSE):
            w = op.layer_norm_debug(y + z)
    else:
        w = y + z
    # w.shape = (batch_size, max_turn_len, emb_size)
    return w
예제 #3
0
def block(
    Q, K, V, 
    Q_lengths, K_lengths, 
    attention_type='dot', 
    is_layer_norm=True, 
    is_mask=True, mask_value=-2**32+1,
    drop_prob=None):
    '''Add a block unit from https://arxiv.org/pdf/1706.03762.pdf.
    Args:
        Q: a tensor with shape [batch, Q_time, Q_dimension]
        K: a tensor with shape [batch, time, K_dimension]
        V: a tensor with shape [batch, time, V_dimension]
        eg:Hu[256,50,200]
        Q_length: a tensor with shape [batch]
        K_length: a tensor with shape [batch]

    Returns:
        a tensor with shape [batch, time, dimension]

    Raises:
    '''
    att = attention(Q, K, V, 
                    Q_lengths, K_lengths, 
                    attention_type='dot', 
                    is_mask=is_mask, mask_value=mask_value,
                    drop_prob=drop_prob)
    if is_layer_norm:
        with tf.variable_scope('attention_layer_norm'):
            y = op.layer_norm_debug(Q + att)
    else:
        y = Q + att   ##[256 50 200]

    z = FFN(y) #[256 50 200]
    if is_layer_norm:
        with tf.variable_scope('FFN_layer_norm'):
            w = op.layer_norm_debug(y + z)
    else:
        w = y + z
    return w  #[256 50 200]
예제 #4
0
def gated_cnn(inputs,
              shape,
              conv_type,
              dilation,
              residual=True,
              is_layer_norm=True):
    conv_w = conv_op(inputs, shape, conv_type, dilation, "linear")
    conv_v = conv_op(inputs, shape, conv_type, dilation, "gated")
    conv = conv_w * tf.sigmoid(conv_v)

    if residual:  # add shortcut on last operation
        conv = tf.add(inputs, conv)
    if is_layer_norm:
        with tf.variable_scope('residule_layer_norm'):
            conv = op.layer_norm_debug(conv)
    return conv