示例#1
0
    def __init__(self,
                 num_units,
                 use_peepholes=False,
                 initializer=None,
                 num_proj=None,
                 proj_clip=None,
                 num_unit_shards=1,
                 num_proj_shards=1,
                 forget_bias=1.0,
                 state_is_tuple=False,
                 activation=math_ops.tanh):
        """Initialize the parameters for an LSTM cell.

    Args:
      num_units: int, The number of units in the LSTM cell
      use_peepholes: bool, set True to enable diagonal/peephole connections.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      proj_clip: (optional) A float value.  If `num_proj > 0` and `proj_clip` is
      provided, then the projected values are clipped elementwise to within
      `[-proj_clip, proj_clip]`.
      num_unit_shards: How to split the weight matrix.  If >1, the weight
        matrix is stored across num_unit_shards.
      num_proj_shards: How to split the projection matrix.  If >1, the
        projection matrix is stored across num_proj_shards.
      forget_bias: Biases of the forget gate are initialized by default to 1
        in order to reduce the scale of forgetting at the beginning of
        the training.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  By default (False), they are concatenated
        along the column axis.  This default behavior will soon be deprecated.
      activation: Activation function of the inner states.
    """
        if not state_is_tuple:
            logging.warn(
                "%s: Using a concatenated state is slower and will soon be "
                "deprecated.  Use state_is_tuple=True." % self)
        self._num_units = num_units
        self._use_peepholes = use_peepholes
        self._initializer = initializer
        self._num_proj = num_proj
        self._proj_clip = proj_clip
        self._num_unit_shards = num_unit_shards
        self._num_proj_shards = num_proj_shards
        self._forget_bias = forget_bias
        self._state_is_tuple = state_is_tuple
        self._activation = activation

        if num_proj:
            self._state_size = (rnn_cell.LSTMStateTuple(num_units, num_proj)
                                if state_is_tuple else num_units + num_proj)
            self._output_size = num_proj
        else:
            self._state_size = (rnn_cell.LSTMStateTuple(num_units, num_units)
                                if state_is_tuple else 2 * num_units)
            self._output_size = num_units
示例#2
0
    def __init__(self,
                 cell_size,
                 num_copies,
                 input_keys=1,
                 output_keys=1,
                 initializer=None,
                 num_proj=None,
                 forget_bias=1.0,
                 state_is_tuple=True,
                 activation=None,
                 reuse=None,
                 name=None):
        """Initialize the parameters for an Associative LSTM cell.

    Args:
      cell_size: int, The number of units per copy in the ALSTM cell
      num_copies: int, The number of memory copies in the ALSTM cell
      input_keys: int, The number of inputs to be used.
      output_keys: int, The number of outputs to be used.
      initializer: (optional) The initializer to use for the weight and
        projection matrices.
      num_proj: (optional) int, The output dimensionality for the projection
        matrices.  If None, no projection is performed.
      num_unit_shards: How to split the weight matrix.  If >1, the weight
        matrix is stored across num_unit_shards.
      activation: Activation function of the inner states.
    """
        super(AssociativeLSTMCell, self).__init__(_reuse=reuse, name=name)

        if cell_size % 2 != 0:
            raise ValueError("cell_size must be an even number")

        self._cell_size = cell_size
        self._num_copies = num_copies
        self._input_keys = input_keys
        self._output_keys = output_keys
        self._initializer = initializer
        self._num_proj = num_proj

        # Generating key permutations for each copy.
        self._permutations = np.array([
            permutation(self._cell_size // 2) for _ in range(self._num_copies)
        ])
        self._permutations = tf.concat(
            [self._permutations, self._permutations + self._cell_size // 2],
            axis=1,
            name='concat6')

        if num_proj:
            if num_proj % output_keys != 0:
                raise ValueError("num_proj must be divisible by output_keys")
            self._state_size = (rnn_cell.LSTMStateTuple(cell_size, num_proj))
            self._output_size = num_proj
        else:
            num_proj = cell_size * output_keys
            self._state_size = (rnn_cell.LSTMStateTuple(cell_size, cell_size))
            self._output_size = cell_size
示例#3
0
    def testStateTupleDictConversion(self):
        """Test `state_tuple_to_dict` and `dict_to_state_tuple`."""
        cell_sizes = [5, 3, 7]
        # A MultiRNNCell of LSTMCells is both a common choice and an interesting
        # test case, because it has two levels of nesting, with an inner class that
        # is not a plain tuple.
        cell = rnn_cell.MultiRNNCell(
            [rnn_cell.LSTMCell(i) for i in cell_sizes])
        state_dict = {
            dynamic_rnn_estimator._get_state_name(i):
            array_ops.expand_dims(math_ops.range(cell_size), 0)
            for i, cell_size in enumerate([5, 5, 3, 3, 7, 7])
        }
        expected_state = (rnn_cell.LSTMStateTuple(
            np.reshape(np.arange(5), [1, -1]),
            np.reshape(np.arange(5), [1, -1])),
                          rnn_cell.LSTMStateTuple(
                              np.reshape(np.arange(3), [1, -1]),
                              np.reshape(np.arange(3), [1, -1])),
                          rnn_cell.LSTMStateTuple(
                              np.reshape(np.arange(7), [1, -1]),
                              np.reshape(np.arange(7), [1, -1])))
        actual_state = dynamic_rnn_estimator.dict_to_state_tuple(
            state_dict, cell)
        flattened_state = dynamic_rnn_estimator.state_tuple_to_dict(
            actual_state)

        with self.cached_session() as sess:
            (state_dict_val, actual_state_val, flattened_state_val) = sess.run(
                [state_dict, actual_state, flattened_state])

        def _recursive_assert_equal(x, y):
            self.assertEqual(type(x), type(y))
            if isinstance(x, (list, tuple)):
                self.assertEqual(len(x), len(y))
                for i, _ in enumerate(x):
                    _recursive_assert_equal(x[i], y[i])
            elif isinstance(x, np.ndarray):
                np.testing.assert_array_equal(x, y)
            else:
                self.fail('Unexpected type: {}'.format(type(x)))

        for k in state_dict_val.keys():
            np.testing.assert_array_almost_equal(
                state_dict_val[k],
                flattened_state_val[k],
                err_msg='Wrong value for state component {}.'.format(k))
        _recursive_assert_equal(expected_state, actual_state_val)
示例#4
0
  def testBahdanauNotNormalized(self):
    create_attention_mechanism = wrapper.BahdanauAttentionV2
    create_attention_kwargs = {"kernel_initializer": "ones"}
    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype(np.float32), mean=4.8290324),
        sample_id=ResultSummary(shape=(5, 3), dtype=np.dtype(np.int32), mean=0))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype(np.float32), mean=1.6432636),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype(np.float32), mean=0.75866824)),
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype(np.float32), mean=6.7445569),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype(np.float32), mean=0.125),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype(np.float32), mean=0.125),
        alignment_history=())
    expected_final_alignment_history = ResultSummary(
        shape=(3, 5, 8), dtype=np.dtype(np.float32), mean=0.125)

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        alignment_history=True,
        create_query_layer=True,
        expected_final_alignment_history=expected_final_alignment_history,
        create_attention_kwargs=create_attention_kwargs)
示例#5
0
    def DoOneIter(prev_word, prev_c, prev_h):
      # lookup embedding
      prev_embed = tf.nn.embedding_lookup(self._word_embeddings, prev_word)
      prev_embed = tf.expand_dims(prev_embed, 0)

      if params.use_softmax_adaptation:
        prev_embed = prev_embed[:, params.context_embed_size:]

      # one iteration of recurrent layer
      state = rnn_cell.LSTMStateTuple(self.prev_c, self.prev_h)
      with vs.variable_scope('RNN', reuse=True):
        result, (next_c, next_h) = self.cell(prev_embed, state)
      proj_result = tf.matmul(result, self.linear_proj)
      
      if params.use_softmax_adaptation:
        proj_result = tf.concat(1, [self.final_context_embed, proj_result])

      # softmax layer
      bias = self.base_bias
      if params.use_hash_table:
        hval = self.hash_func(self.all_ids, self.context_placeholders)
        bias += hval
      logits = tf.matmul(proj_result, self._word_embeddings, transpose_b=True) + bias
      next_prob = tf.nn.softmax(logits / self.temperature)

      cumsum = tf.cumsum(next_prob, exclusive=True, axis=1)
      idx = tf.less(cumsum, tf.random_uniform([1]))
      selected = tf.reduce_max(tf.where(idx)) 
      #selected = tf.squeeze(tf.argmax(next_prob, 1))
      #selected.set_shape(())
      selected_p = tf.nn.embedding_lookup(tf.transpose(next_prob), selected)

      return next_prob, selected, selected_p, next_c, next_h
示例#6
0
    def __call__(self, inputs, state, scope=None):
        with vs.variable_scope(scope or "nmts_decoder_cell"):
            states, encoder_hs = state
            cur_inp = inputs
            new_states = []

            with vs.variable_scope("cell_0"):
                cur_inp, cur_state = self._cells[0](cur_inp, states[0])
                if self._attention == "luong":
                    c_t = attention_luong(cur_inp, encoder_hs)
                elif self._attention == "nmts":
                    c_t = attention_nmts_fast(cur_inp, encoder_hs)
                else:
                    raise ValueError("Unknown attention type: {}".format(self._attention))

            new_states.append(cur_state)
            states = states[1:]
            for i, cell in enumerate(self._cells[1:]):
                with vs.variable_scope("cell_{}".format(i+1)):
                    cur_state = states[i]
                    prev_inp = cur_inp

                    h_dim = cur_inp.get_shape().with_rank(2)[1].value
                    Wp = vs.get_variable("Wp", [2*h_dim, h_dim])
                    bp = vs.get_variable("bp", [h_dim])
                    cur_inp = math_ops.matmul(array_ops.concat(1, [cur_inp, c_t]), Wp) + bp
                    cur_state = rnn_cell.LSTMStateTuple(cur_state.c, cur_inp)

                    next_inp, new_state = cell(cur_inp, cur_state)
                    cur_inp = prev_inp + next_inp if i < len(self._cells[1:]) - 1 else next_inp
                    new_states.append(new_state)
        new_states = tuple(new_states)
        return cur_inp, (new_states, encoder_hs)
示例#7
0
    def testMaskedLSTMCell(self):
        expected_num_masks = 1
        expected_num_rows = 2 * self.dim
        expected_num_cols = 4 * self.dim
        with self.test_session():
            inputs = variables.Variable(
                random_ops.random_normal([self.batch_size, self.dim]))
            c = variables.Variable(
                random_ops.random_normal([self.batch_size, self.dim]))
            h = variables.Variable(
                random_ops.random_normal([self.batch_size, self.dim]))
            state = tf_rnn_cells.LSTMStateTuple(c, h)
            lstm_cell = rnn_cells.MaskedLSTMCell(self.dim)
            lstm_cell(inputs, state)
            self.assertEqual(len(pruning.get_masks()), expected_num_masks)
            self.assertEqual(len(pruning.get_masked_weights()),
                             expected_num_masks)
            self.assertEqual(len(pruning.get_thresholds()), expected_num_masks)
            self.assertEqual(len(pruning.get_weights()), expected_num_masks)

            for mask in pruning.get_masks():
                self.assertEqual(mask.shape,
                                 (expected_num_rows, expected_num_cols))
            for weight in pruning.get_weights():
                self.assertEqual(weight.shape,
                                 (expected_num_rows, expected_num_cols))
示例#8
0
  def testLuongScaled(self):
    create_attention_mechanism = wrapper.LuongAttentionV2
    create_attention_kwargs = {"scale": True}

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=2.6605489),
        sample_id=ResultSummary(
            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.88403547),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.37819088)),
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype("float32"), mean=4.0846314),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        alignment_history=())

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        attention_mechanism_depth=9,
        create_attention_kwargs=create_attention_kwargs)
示例#9
0
  def testLuongMonotonicScaled(self):
    create_attention_mechanism = wrapper.LuongMonotonicAttentionV2
    create_attention_kwargs = {"scale": True}

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=3.159497),
        sample_id=ResultSummary(
            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=1.072384),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.50331038)),
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype("float32"), mean=5.3079605),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.11467695),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.11467695),
        alignment_history=())
    expected_final_alignment_history = ResultSummary(
        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.11899644)

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        attention_mechanism_depth=9,
        alignment_history=True,
        expected_final_alignment_history=expected_final_alignment_history,
        create_attention_kwargs=create_attention_kwargs)
示例#10
0
  def testBahdanauMonotonicNotNormalized(self):
    create_attention_mechanism = wrapper.BahdanauMonotonicAttentionV2
    create_attention_kwargs = {"kernel_initializer": "ones"}

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=5.9850435),
        sample_id=ResultSummary(
            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=1.6752492),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.76052248)),
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype("float32"), mean=8.361186),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.10989678),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.10989678),
        alignment_history=())
    expected_final_alignment_history = ResultSummary(
        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.117412611)

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        alignment_history=True,
        expected_final_alignment_history=expected_final_alignment_history,
        create_query_layer=True,
        create_attention_kwargs=create_attention_kwargs)
示例#11
0
  def testBahdanauMonotonicNormalized(self):
    create_attention_mechanism = wrapper.BahdanauMonotonicAttentionV2
    create_attention_kwargs = {"kernel_initializer": "ones",
                               "normalize": True}
    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=4.5706983),
        sample_id=ResultSummary(
            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=1.6005473),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.77863038)),
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype("float32"), mean=7.3326721),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.12258384),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.12258384),
        alignment_history=())
    expected_final_alignment_history = ResultSummary(
        shape=(3, 5, 8), dtype=np.dtype("float32"), mean=0.12258384)

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        alignment_history=True,
        expected_final_alignment_history=expected_final_alignment_history,
        create_query_layer=True,
        create_attention_kwargs=create_attention_kwargs)
示例#12
0
    def __call__(self, inputs, state, scope=None):
        """LSTM cell with layer normalization and recurrent dropout."""

        with vs.variable_scope(scope or type(self).__name__) as scope:  # LayerNormBasicLSTMCell  # pylint: disable=unused-variables
            c, h = state
            args = array_ops.concat(1, [inputs, h])
            concat = self._linear(args)

            i, j, f, o = array_ops.split(1, 4, concat)
            if self._layer_norm:
                i = self._norm(i, "input")
                j = self._norm(j, "transform")
                f = self._norm(f, "forget")
                o = self._norm(o, "output")

            g = self._activation(j)
            if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
                g = nn_ops.dropout(g, self._keep_prob, seed=self._seed)

            new_c = (c * math_ops.sigmoid(f + self._forget_bias) +
                     math_ops.sigmoid(i) * g)
            if self._layer_norm:
                new_c = self._norm(new_c, "state")
            new_h = self._activation(new_c) * math_ops.sigmoid(o)

            new_state = rnn_cell.LSTMStateTuple(new_c, new_h)
            return new_h, new_state
示例#13
0
  def testNotUseAttentionLayer(self):
    create_attention_mechanism = wrapper.BahdanauAttentionV2
    create_attention_kwargs = {"kernel_initializer": "ones"}

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 10), dtype=np.dtype("float32"), mean=0.072406612),
        sample_id=ResultSummary(
            shape=(5, 3), dtype=np.dtype("int32"), mean=3.86666666))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=1.032002),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.61177742)),
        attention=ResultSummary(
            shape=(5, 10), dtype=np.dtype("float32"), mean=0.011346335),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        alignment_history=())

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        attention_layer_size=None,
        create_query_layer=True,
        create_attention_kwargs=create_attention_kwargs)
示例#14
0
 def loop_fn(i):
   loop_inputs = [
       array_ops.expand_dims(array_ops.gather(x, i), 0) for x in inputs
   ]
   loop_init_state = rnn_cell.LSTMStateTuple(
       *[array_ops.expand_dims(array_ops.gather(x, i), 0) for x in init_state])
   return model_fn(loop_inputs, loop_init_state)
示例#15
0
  def testBahdanauNormalized(self):
    create_attention_mechanism = wrapper.BahdanauAttentionV2
    create_attention_kwargs = {"kernel_initializer": "ones", "normalize": True}

    expected_final_output = basic_decoder.BasicDecoderOutput(
        rnn_output=ResultSummary(
            shape=(5, 3, 6), dtype=np.dtype("float32"), mean=3.9548259),
        sample_id=ResultSummary(
            shape=(5, 3), dtype=np.dtype("int32"), mean=0.0))
    expected_final_state = wrapper.AttentionWrapperState(
        cell_state=rnn_cell.LSTMStateTuple(
            c=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=1.4652209),
            h=ResultSummary(
                shape=(5, 9), dtype=np.dtype("float32"), mean=0.70997983)),
        attention=ResultSummary(
            shape=(5, 6), dtype=np.dtype("float32"), mean=6.3075728),
        time=3,
        alignments=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        attention_state=ResultSummary(
            shape=(5, 8), dtype=np.dtype("float32"), mean=0.125),
        alignment_history=())

    self._testWithAttention(
        create_attention_mechanism,
        expected_final_output,
        expected_final_state,
        create_query_layer=True,
        create_attention_kwargs=create_attention_kwargs)
示例#16
0
def multi_rnn(inputs,
              layer_sizes,
              sequence_length,
              dropout_keep_prob=1.0,
              attn_length=0,
              base_cell=tf.contrib.rnn.BasicLSTMCell,
              initial_state=None):
    if initial_state is not None:
        batch_size = inputs.shape[0]
        initial_state = tuple([
            rnn_cell.LSTMStateTuple(tf.zeros([batch_size, size]),
                                    initial_state) for size in layer_sizes
        ])
    cells = make_rnn_cells(layer_sizes,
                           dropout_keep_prob=dropout_keep_prob,
                           attn_length=attn_length,
                           base_cell=base_cell)
    cell = tf.contrib.rnn.MultiRNNCell(cells, state_is_tuple=True)
    outputs, states = tf.nn.dynamic_rnn(cell,
                                        inputs,
                                        initial_state=initial_state,
                                        sequence_length=sequence_length,
                                        dtype=tf.float32)
    if attn_length:
        return tf.reduce_sum(outputs, 1)
        return tf.reduce_sum(outputs, 1) / \
              tf.reshape(tf.cast(sequence_length, tf.float32), [-1, 1])
        return tf.concat([states[0][0].h, states[0][1]], 1)  # 82
    return states[-1].h
    return tf.concat([states[-1].c, states[-1].h], 1)
    return tf.concat([states[0].c, states[0].h], 1)
示例#17
0
    def CreateDecodingGraph(self, params):
        """Construct the part of the graph used for decoding."""

        out_embeddings = self.word_embedder.GetAllEmbeddings()

        # placeholders for decoder
        self.prev_word = tf.placeholder(tf.int32, (), name='prev_word')
        self.prev_c = tf.get_variable(
            'prev_c', [1, params.cell_size],
            dtype=tf.float32,
            collections=[tf.GraphKeys.LOCAL_VARIABLES])
        self.prev_h = tf.get_variable(
            'prev_h', [1, params.cell_size],
            dtype=tf.float32,
            collections=[tf.GraphKeys.LOCAL_VARIABLES])
        self.temperature = tf.placeholder_with_default([1.0], [1])

        # lookup embedding
        prev_embed = tf.nn.embedding_lookup(out_embeddings, self.prev_word)
        prev_embed = tf.expand_dims(prev_embed, 0)

        if params.use_softmax_adaptation:
            prev_embed = prev_embed[:, self.context_size:]

        # one iteration of recurrent layer
        state = rnn_cell.LSTMStateTuple(self.prev_c, self.prev_h)
        with tf.variable_scope('RNN', reuse=True):
            result, (self.next_c, self.next_h) = self.cell(prev_embed, state)

        proj_result = tf.matmul(result, self.linear_proj)
        if params.use_softmax_adaptation:
            proj_result = tf.concat(
                axis=1, values=[self.final_context_embed, proj_result])

        # softmax layer
        bias = self.base_bias
        if params.use_hash_table or params.use_context_dependent_bias:
            hval = self.hash_func(self.all_ids, self.context_placeholders)
            bias += hval

        self.beam_size = tf.placeholder_with_default(1, (), name='beam_size')
        logits = tf.matmul(proj_result, out_embeddings,
                           transpose_b=True) + bias
        self.next_prob = tf.nn.softmax(logits / self.temperature)
        #self.selected = tf.multinomial(logits / self.temperature, self.beam_size)
        self.selected = tf.squeeze(
            tf.multinomial(logits / self.temperature, self.beam_size))
        self.selected, _ = tf.unique(self.selected)
        self.selected_p = tf.nn.embedding_lookup(tf.transpose(self.next_prob),
                                                 self.selected)

        assign1 = self.prev_c.assign(self.next_c)
        assign2 = self.prev_h.assign(self.next_h)
        self.assign_op = tf.group(assign1, assign2)

        # reset state
        assign1 = self.prev_c.assign(tf.zeros_like(self.prev_c))
        assign2 = self.prev_h.assign(tf.zeros_like(self.prev_h))
        self.reset_state = tf.group(assign1, assign2)
示例#18
0
def _PopnnLSTM(x, h, c):
    lstm_cell = ipu.ops.rnn_ops.PopnnLSTM(
        num_hidden,
        dtype=dataType,
        weights_initializer=init_ops.zeros_initializer(dtype=dataType),
        bias_initializer=init_ops.zeros_initializer(dtype=dataType))
    state = rnn_cell.LSTMStateTuple(c, h)
    return lstm_cell(x, initial_state=state, training=False)
示例#19
0
  def call(self, inputs, initial_state=None, training=True):
    """Runs the forward step for the LSTM model.

    Args:
      inputs: 3-D tensor with shape [time_len, batch_size, input_size].
      initial_state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, num_units]`. If not provided, the state is
        initialized to zeros.
        DEPRECATED a tuple of tensor (input_h_state, input_c_state)
        each of shape [batch_size, num_units].
      training: whether this operation will be used in training or inference.

    Returns:
      tuple of output and output states:

      * output: a tensor of shape [time_len, batch_size, num_units].
      * output_states: An `LSTMStateTuple` of the same shape and structure as
          initial_state. If the initial state used the deprecated behaviour of
          not passing `LSTMStateTuple`, then a tuple
          (output_h_state, output_c_state) is returned.

    Raises:
      ValueError: if initial_state is not valid.

    """

    dtype = self.dtype
    inputs = ops.convert_to_tensor(inputs, dtype=dtype)

    batch_size = array_ops.shape(inputs)[1]

    uses_old_api = False
    if initial_state is not None and not isinstance(initial_state,
                                                    rnn_cell.LSTMStateTuple):
      if isinstance(initial_state, tuple):
        logging.warning(
            "Passing a tuple as a `initial_state` to PopnnLSTM is "
            "deprecated and will be removed in the future. Pass an "
            "`LSTMStateTuple` instead.")
        initial_state = rnn_cell.LSTMStateTuple(initial_state[1],
                                                initial_state[0])
        uses_old_api = True
      else:
        raise ValueError("Invalid initial_state type: `%s`, expecting "
                         "`LSTMStateTuple`." % type(initial_state))

    if initial_state is None:
      # Create a zero state.
      initial_state = self._zero_state(batch_size)

    c, h = initial_state
    h = ops.convert_to_tensor(h, dtype=dtype)
    c = ops.convert_to_tensor(c, dtype=dtype)
    outputs, state = self._forward(inputs, h, c, self.kernel, self.biases,
                                   training)
    if uses_old_api:
      state = (state.h, state.c)
    return outputs, state
示例#20
0
 def encode(self, x):
     """Probabilistic encoder from inputs to latent distribution parameters;
     a.k.a. inference network q(z|x)
     """
     # np.array -> [float, float]
     feed_dict = {self.input_placeholder: x}
     return self.sess.run(rnn_cell.LSTMStateTuple(self.encoding_cell,
                                                  self.encoding_hidden),
                          feed_dict=feed_dict)
示例#21
0
 def _forward(self, inputs, h, c, kernel, biases, training):
     output, output_h, output_c, _ = gen_popnn_ops.popnn_lstm_layer(
         inputs=inputs,
         num_channels=self._num_units,
         kernel=kernel,
         biases=biases,
         input_h_state=h,
         input_c_state=c,
         is_training=training,
         partials_dtype=self._partials_dtype,
         name=self._name)
     return output, rnn_cell.LSTMStateTuple(output_c, output_h)
示例#22
0
def _tfLSTM(x, h, c):
    lstm_cell = rnn_cell.LSTMCell(
        num_hidden,
        name='basic_lstm_cell',
        forget_bias=0.,
        initializer=init_ops.zeros_initializer(dtype=dataType))
    state = rnn_cell.LSTMStateTuple(c, h)
    return rnn.dynamic_rnn(lstm_cell,
                           x,
                           dtype=dataType,
                           initial_state=state,
                           time_major=True)
def _PopnnLSTM(x, h, c, y):
    lstm_cell = popnn_rnn.PopnnLSTM(
        num_hidden,
        dtype=dataType,
        weights_initializer=init_ops.zeros_initializer(dtype=dataType),
        bias_initializer=init_ops.zeros_initializer(dtype=dataType))
    state = rnn_cell.LSTMStateTuple(c, h)
    outputs, _ = lstm_cell(x, initial_state=state, training=True)
    softmax = nn.softmax_cross_entropy_with_logits_v2(
        logits=outputs[-1], labels=array_ops.stop_gradient(y))
    loss = math_ops.reduce_mean(softmax)
    train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
    return [loss, train]
示例#24
0
    def call(self, inputs, state):
        """Long short-term memory cell (LSTM) with masks for pruning.

    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, self.state_size]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * self.state_size]`.

    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
        sigmoid = math_ops.sigmoid
        one = constant_op.constant(1, dtype=dtypes.int32)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(value=state, num_or_size_splits=2, axis=one)

        gate_inputs = math_ops.matmul(array_ops.concat([inputs, h], 1),
                                      self._masked_kernel)
        gate_inputs = nn_ops.bias_add(gate_inputs, self._bias)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(value=gate_inputs,
                                     num_or_size_splits=4,
                                     axis=one)

        forget_bias_tensor = constant_op.constant(self._forget_bias,
                                                  dtype=f.dtype)
        # Note that using `add` and `multiply` instead of `+` and `*` gives a
        # performance improvement. So using those at the cost of readability.
        add = math_ops.add
        multiply = math_ops.multiply
        new_c = add(multiply(c, sigmoid(add(f, forget_bias_tensor))),
                    multiply(sigmoid(i), self._activation(j)))
        new_h = multiply(self._activation(new_c), sigmoid(o))

        if self._state_is_tuple:
            new_state = tf_rnn.LSTMStateTuple(new_c, new_h)
        else:
            new_state = array_ops.concat([new_c, new_h], 1)
        return new_h, new_state
示例#25
0
 def _LSTMLayerCPU(self, inputs, weights_value, initial_state, forget_bias,
                   training, name):
     with ops.device("/device:CPU:0"):
         lstm_cell = rnn_cell.LSTMCell(
             num_channels,
             name='basic_lstm_cell',
             forget_bias=forget_bias,
             initializer=init_ops.constant_initializer(weights_value,
                                                       dtype=dataType),
             reuse=variable_scope.AUTO_REUSE)
         state = rnn_cell.LSTMStateTuple(initial_state[1], initial_state[0])
         outputs, states = rnn.dynamic_rnn(lstm_cell,
                                           inputs,
                                           dtype=dataType,
                                           initial_state=state,
                                           time_major=True)
         return outputs
示例#26
0
    def __call__(self, inputs, state, scope=None, reuse=None):
        with tf.variable_scope("hyper_lstm_cell", reuse=reuse):
            # Parameters of gates are concatenated into one multiply for efficiency.
            c, h = state
            the_input = tf.concat(axis=1, values=[inputs, h])

            result = tf.matmul(the_input, self.W)

            if self.lowrank_adaptation:
                input_expanded = tf.expand_dims(the_input, 1)
                intermediate = tf.matmul(input_expanded, self.left_adapt)
                final = tf.matmul(intermediate, self.right_adapt)
                result += tf.squeeze(final)
            if self.mikilov_adapt:
                result += self.delta

            result += self.bias

            # j = new_input, f = forget_gate, o = output_gate
            j, f, o = tf.split(axis=1, num_or_size_splits=3, value=result)

            def Norm(inputs, gamma, beta):
                # layer norm helper function
                m, v = tf.nn.moments(inputs, [1], keep_dims=True)
                normalized_input = (inputs - m) / tf.sqrt(v + 1e-5)
                return normalized_input * gamma + beta

            if self.layer_norm:
                j = Norm(j, self.gammas[0], self.betas[0])
                f = Norm(f, self.gammas[1], self.betas[1])
                o = Norm(o, self.gammas[2], self.betas[2])

            g = self._activation(j)

            # recurrent dropout without memory loss
            if (not isinstance(self._keep_prob, float)) or self._keep_prob < 1:
                g = tf.nn.dropout(g, self._keep_prob)

            forget_gate = tf.sigmoid(f + self._forget_bias)
            input_gate = 1.0 - forget_gate  # input and forget gates are coupled

            new_c = (c * forget_gate + input_gate * g)
            new_h = self._activation(new_c) * tf.sigmoid(o)

            new_state = rnn_cell.LSTMStateTuple(new_c, new_h)
            return new_h, new_state
def _tfLSTM(x, h, c, y):
    lstm_cell = rnn_cell.LSTMCell(
        num_hidden,
        name='basic_lstm_cell',
        forget_bias=0.,
        initializer=init_ops.zeros_initializer(dtype=dataType))
    state = rnn_cell.LSTMStateTuple(c, h)
    outputs, _ = rnn.dynamic_rnn(lstm_cell,
                                 x,
                                 dtype=dataType,
                                 initial_state=state,
                                 time_major=True)
    softmax = nn.softmax_cross_entropy_with_logits_v2(
        logits=outputs[-1], labels=array_ops.stop_gradient(y))
    loss = math_ops.reduce_mean(softmax)
    train = gradient_descent.GradientDescentOptimizer(lr).minimize(loss)
    return [loss, train]
示例#28
0
    def _build(self, incoming, state, *args, **kwargs):
        """Long short-term memory cell (LSTM)."""
        self._declare_dependencies()
        activation = getters.get_activation(self.activation)
        inner_activation = getters.get_activation(self.inner_activation)
        # Parameters of gates are concatenated into one multiply for efficiency.
        if self._state_is_tuple:
            c, h = state
        else:
            c, h = array_ops.split(axis=1, num_or_size_splits=2, value=state)
        concat = _linear([incoming, h], 4 * self._num_units, True, 0.,
                         self.weights_init, self.trainable, self.restore)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = array_ops.split(axis=1,
                                     num_or_size_splits=4,
                                     value=concat)

        # apply batch normalization to inner state and gates
        if self.batch_norm:
            i = self._batch_norm_i(i)
            j = self._batch_norm_j(j)
            f = self._batch_norm_f(f)
            o = self._batch_norm_o(o)

        new_c = (c * inner_activation(f + self._forget_bias) +
                 inner_activation(i) * activation(j))

        # hidden-to-hidden batch normalizaiton
        if self.batch_norm:
            batch_norm_new_c = self._batch_norm_c(new_c)
            new_h = activation(batch_norm_new_c) * inner_activation(o)
        else:
            new_h = activation(new_c) * inner_activation(o)

        if self._state_is_tuple:
            new_state = rnn_cell.LSTMStateTuple(new_c, new_h)
        else:
            new_state = tf.concat(values=[new_c, new_h], axis=1)

        # Retrieve RNN Variables
        with get_variable_scope(scope='Linear', reuse=True):
            self._w = tf.get_variable('w')
            self._b = tf.get_variable('b')

        return new_h, new_state
    def __call__(self, inputs, parent_state, cyc_state, scope=None):
        """Modified Long short-term memory for tree structure"""
        with vs.variable_scope(scope or type(self).__name__):   # "BasicTreeLSTMCell"
            # parameters of gates are concatenated into one multiply for efficiency
            parent_c, parent_h = parent_state
            cyc_c, cyc_h = cyc_state
            c = rnn.linear([parent_c, cyc_c], self._num_units, True)
            concat = rnn.linear([inputs, parent_h, cyc_h],
                                4 * self._num_units, True)

            # i = input_gate, j = new_input, f = forget_gate, o = output_gate
            i, j, f, o = array_ops.split(1, 4, concat)

            new_c = [c * rnn_cell.sigmoid(f + self._forget_bias)
                     + rnn_cell.sigmoid(i) * self._activation(j)]
            new_h = self._activation(new_c) * rnn_cell.sigmoid(o)

            new_state = rnn_cell.LSTMStateTuple(new_c, new_h)

            return new_h, new_state
示例#30
0
    def __call__(self, inputs, state, scope=None):
        with vs.variable_scope(scope or "nmts_decoder_cell"):
            states, c_t = state
            cur_inp = inputs
            new_states = []
            for i, cell in enumerate(self._cells):
                with vs.variable_scope("cell_{}".format(i)):
                    cur_state = states[i]
                    prev_inp = cur_inp

                    h_dim = cur_inp.get_shape().with_rank(2)[1].value
                    Wp = vs.get_variable("Wp", [2*h_dim, h_dim])
                    bp = vs.get_variable("bp", [h_dim])
                    cur_inp = math_ops.matmul(array_ops.concat(1, [cur_inp, c_t]), Wp) + bp
                    cur_state = rnn_cell.LSTMStateTuple(cur_state.c, cur_inp)

                    next_inp, new_state = cell(cur_inp, cur_state)
                    cur_inp = prev_inp + next_inp if i < len(self._cells) - 1 else next_inp
                    new_states.append(new_state)
        new_states = tuple(new_states)
        return cur_inp, (new_states, c_t)