Пример #1
0
    def __call__(self, inputs, state, scope=None):
        with tf.variable_scope(scope,
                               default_name="gru_cell",
                               values=[inputs, state]):
            if not isinstance(inputs, (list, tuple)):
                inputs = [inputs]

            all_inputs = list(inputs) + [state]
            r = tf.nn.sigmoid(
                linear(all_inputs,
                       self._num_units,
                       False,
                       False,
                       scope="reset_gate"))
            u = tf.nn.sigmoid(
                linear(all_inputs,
                       self._num_units,
                       False,
                       False,
                       scope="update_gate"))
            all_inputs = list(inputs) + [r * state]
            c = linear(all_inputs,
                       self._num_units,
                       True,
                       False,
                       scope="candidate")

            new_state = (1.0 - u) * state + u * tf.tanh(c)

        return new_state, new_state
Пример #2
0
    def __call__(self, inputs, state, scope=None):
        with tf.variable_scope(scope or "lstm_cell"):
            c, h = state
            concat = linear([inputs, h], 4 * self._num_units, True,
                            scope="gates")

            i, j, f, o = tf.split(concat, 4, 1)

            j = self._activation(j)
            new_c = c * tf.nn.sigmoid(f) + tf.nn.sigmoid(i) * j

            if not self._output_activation:
                new_h = new_c * tf.nn.sigmoid(o)
            else:
                new_h = self._output_activation(new_c) * tf.nn.sigmoid(o)

            new_state = (new_c, new_h)

        return new_h, new_state
Пример #3
0
def attention(query,
              memories,
              bias,
              hidden_size,
              cache=None,
              reuse=None,
              dtype=None,
              scope=None):
    """ Standard attention layer

    Args:
        query: A tensor with shape [batch, key_size]
        memories: A tensor with shape [batch, memory_size, key_size]
        bias: A tensor with shape [batch, memory_size]
        hidden_size: An integer
        cache: A dictionary of precomputed value
        reuse: A boolean value, whether to reuse the scope
        dtype: An optional instance of tf.DType
        scope: An optional string, the scope of this layer

    Return:
        A tensor with shape [batch, value_size] and a Tensor with
        shape [batch, memory_size]
    """

    with tf.variable_scope(scope or "attention",
                           reuse=reuse,
                           values=[query, memories, bias],
                           dtype=dtype):
        mem_shape = tf.shape(memories)
        key_size = memories.get_shape().as_list()[-1]

        if cache is None:
            k = tf.reshape(memories, [-1, key_size])
            k = linear(k, hidden_size, False, False, scope="k_transform")

            if query is None:
                return {"key": k}
        else:
            k = cache["key"]

        q = linear(query, hidden_size, False, False, scope="q_transform")
        k = tf.reshape(k, [mem_shape[0], mem_shape[1], hidden_size])

        hidden = tf.tanh(q[:, None, :] + k)
        hidden = tf.reshape(hidden, [-1, hidden_size])

        # Shape: [batch, mem_size, 1]
        logits = linear(hidden, 1, False, False, scope="logits")
        logits = tf.reshape(logits, [-1, mem_shape[1]])

        if bias is not None:
            logits = logits + bias

        alpha = tf.nn.softmax(logits)

        outputs = {
            "value": tf.reduce_sum(alpha[:, :, None] * memories, axis=1),
            "weight": alpha
        }

    return outputs