Exemplo n.º 1
0
def native_lstm2(x,
                 h_0,
                 c_0,
                 mask,
                 W_f,
                 W_r,
                 b,
                 n_time,
                 n_batch,
                 n_in_dim,
                 n_cells,
                 start=0,
                 step=1,
                 name="native_lstm2"):
    """
  :param tf.Tensor x: (n_time, n_batch, n_in_dim)
  :param tf.Tensor h_0: (n_batch, n_cells)
  :param tf.Tensor c_0: (n_batch, n_cells)
  :param tf.Tensor mask: (n_time, n_batch)
  :param tf.Tensor W_f: (n_in_dim, n_cells * 4)
  :param tf.Tensor W_r: (n_in_dim, n_cells * 4)
  :param tf.Tensor b: (n_cells * 4,)
  :param int n_time:
  :param int n_batch:
  :param int n_in_dim:
  :param int n_cells:
  :param int start:
  :param int step:
  :param str name:
  :return: h, c
  :rtype: (tf.Tensor, tf.Tensor)
  """
    assert n_time > 0
    assert 0 <= start < n_time
    assert step != 0
    x = tf.convert_to_tensor(x)
    h_0 = tf.convert_to_tensor(h_0)
    c_0 = tf.convert_to_tensor(c_0)
    mask = tf.convert_to_tensor(mask)
    W_f = tf.convert_to_tensor(W_f)
    W_r = tf.convert_to_tensor(W_r)
    b = tf.convert_to_tensor(b)
    x.set_shape(tf.TensorShape((n_time, n_batch, n_in_dim)))
    h_0.set_shape(tf.TensorShape((n_batch, n_cells)))
    c_0.set_shape(tf.TensorShape((n_batch, n_cells)))
    mask.set_shape(tf.TensorShape((n_time, n_batch)))
    W_f.set_shape(tf.TensorShape((n_in_dim, n_cells * 4)))
    W_r.set_shape(tf.TensorShape((n_cells, n_cells * 4)))
    b.set_shape(tf.TensorShape((n_cells * 4, )))
    op = make_op(NativeOp.NativeLstm2)
    from TFUtil import dot, expand_multiple_dims
    intern = dot(x, W_f)
    intern.set_shape(tf.TensorShape((n_time, n_batch, n_cells * 4)))
    intern += expand_multiple_dims(b, (0, 1))
    intern.set_shape(tf.TensorShape((n_time, n_batch, n_cells * 4)))
    y, c, _, d = op(intern, W_r, h_0, c_0, mask, start, step)
    y.set_shape(tf.TensorShape((n_time, n_batch, n_cells)))
    c.set_shape(tf.TensorShape((n_time, n_batch, n_cells)))
    d.set_shape(tf.TensorShape((n_batch, n_cells)))
    return y, c, d
Exemplo n.º 2
0
  def __init__(self, activation, with_bias=True, **kwargs):
    super(LinearLayer, self).__init__(**kwargs)

    self.activation = activation
    self.with_bias = with_bias

    input_data = self.input_data
    n_in = input_data.dim
    n_out = self.output.dim
    assert n_in and n_out, "%r and %r" % (input_data, self.output)

    W = self.add_param(
      tf.Variable(
        name="W",
        initial_value=tf.contrib.layers.xavier_initializer(seed=self.network.random.randint(2**31))(
          shape=(n_in, n_out))))

    if self.with_bias:
      b = self.add_param(tf.Variable(
        name="b",
        initial_value=tf.constant_initializer(value=0, dtype=tf.float32)(
          shape=(n_out,))))
    else:
      b = None

    with tf.name_scope("linear"):
      from TFUtil import dot
      x = input_data.placeholder
      ndim = x.get_shape().ndims

      if self.input_data.sparse:
        x = tf.nn.embedding_lookup(W, x)
        ndim += 1
      else:
        x = dot(x, W)
      assert x.get_shape().ndims == ndim

      if self.with_bias:
        x = tf.add(x, b, name="add_bias")
        assert x.get_shape().ndims == ndim

    if self.activation:
      from TFUtil import get_activation_function
      act_func = get_activation_function(self.activation)
      self.output_before_activation = OutputWithActivation(x, act_func=act_func)
    else:
      self.output_before_activation = OutputWithActivation(x)
    x = self.output_before_activation.y

    self.output.batch_dim_axis = self.input_data.batch_dim_axis
    self.output.time_dim_axis = self.input_data.time_dim_axis
    self.output.placeholder = x
Exemplo n.º 3
0
 def __init__(self, unit="lstm", bidirectional=False, direction=None, input_projection=True, **kwargs):
   """
   :param str unit: the RNNCell/etc name, e.g. "nativelstm". see comment below
   :param bool bidirectional: whether we should combine a forward and backward cell
   :param int|None direction: None|1 -> forward, -1 -> backward
   :param bool input_projection: True -> input is multiplied with matrix. False only works if same input dim
   :param dict[str] kwargs: passed on to base class
   """
   super(RecLayer, self).__init__(**kwargs)
   from tensorflow.python.ops import rnn, rnn_cell
   import tensorflow.contrib.rnn as rnn_contrib
   import TFNativeOp
   from TFUtil import swapaxes, dot, sequence_mask_time_major, directed
   if unit in ["lstmp", "lstm"]:
     # Some possible LSTM implementations are:
     # * BasicLSTM, via official TF, pure TF implementation
     # * LSTMBlockFused, via tf.contrib.rnn (both CPU and GPU). should be much faster than BasicLSTM
     # * NativeLSTM, our own native LSTM (both CPU and GPU). should be faster than LSTMBlockFused
     # We default to the fastest one, i.e. NativeLSTM.
     # Note that they are currently not compatible to each other, i.e. the way the parameters are represented.
     unit = "nativelstm"
   if direction is not None:
     assert not bidirectional
     assert direction in [-1, 1]
   if not self._rnn_cells_dict:
     self._create_rnn_cells_dict()
   rnn_cell_class = self._rnn_cells_dict[unit.lower()]
   with tf.variable_scope(
         "rec",
         initializer=tf.contrib.layers.xavier_initializer(
           seed=self.network.random.randint(2**31))) as scope:
     assert isinstance(scope, tf.VariableScope)
     scope_name_prefix = scope.name + "/"  # e.g. "layer1/rec/"
     n_hidden = self.output.dim
     if bidirectional:
       assert n_hidden % 2 == 0
       n_hidden //= 2
     cell_fw = rnn_cell_class(n_hidden)
     assert isinstance(cell_fw, (rnn_cell.RNNCell, rnn_contrib.FusedRNNCell, TFNativeOp.RecSeqCellOp))  # e.g. BasicLSTMCell
     if bidirectional:
       cell_bw = rnn_cell_class(n_hidden)
     else:
       cell_bw = None
     x = self.input_data.placeholder  # (batch,time,dim) or (time,batch,dim)
     if not self.input_data.is_time_major:
       assert self.input_data.batch_dim_axis == 0
       assert self.input_data.time_dim_axis == 1
       x = swapaxes(x, 0, 1)   # (time,batch,[dim])
     seq_len = self.input_data.size_placeholder[0]
     if isinstance(cell_fw, (rnn_cell.RNNCell, rnn_contrib.FusedRNNCell)):
       assert not self.input_data.sparse
       assert input_projection
       if direction == -1:
         x = tf.reverse_sequence(x, seq_lengths=seq_len, batch_dim=1, seq_dim=0)
       if isinstance(cell_fw, rnn_cell.RNNCell):  # e.g. BasicLSTMCell
         if bidirectional:
           # Will get (time,batch,ydim/2).
           (y_fw, y_bw), _ = rnn.bidirectional_dynamic_rnn(
             cell_fw=cell_fw, cell_bw=cell_bw,
             inputs=x, time_major=True, sequence_length=seq_len,
             dtype=tf.float32)
           y = tf.concat(2, (y_fw, y_bw))  # (time,batch,ydim)
         else:
           # Will get (time,batch,ydim).
           y, _ = rnn.dynamic_rnn(cell=cell_fw, inputs=x, time_major=True, sequence_length=seq_len, dtype=tf.float32)
       elif isinstance(cell_fw, rnn_contrib.FusedRNNCell):  # e.g. LSTMBlockFusedCell
         if bidirectional:
           raise NotImplementedError
         # Will get (time,batch,ydim).
         y, _ = cell_fw(inputs=x, sequence_length=seq_len, dtype=tf.float32)
       else:
         raise Exception("invalid type: %s" % type(cell_fw))
       if direction == -1:
         y = tf.reverse_sequence(y, seq_lengths=seq_len, batch_dim=1, seq_dim=0)
     elif isinstance(cell_fw, TFNativeOp.RecSeqCellOp):
       assert not bidirectional
       if input_projection:
         W = tf.get_variable(name="W", shape=(self.input_data.dim, cell_fw.n_input_dim), dtype=tf.float32)
         if self.input_data.sparse:
           x = tf.nn.embedding_lookup(W, x)
         else:
           x = dot(x, W)
       else:
         assert not self.input_data.sparse
         assert self.input_data.dim == cell_fw.n_input_dim
       b = tf.get_variable(name="b", shape=(cell_fw.n_input_dim,), dtype=tf.float32, initializer=tf.constant_initializer(0.0))
       x += b
       index = sequence_mask_time_major(seq_len, maxlen=tf.shape(x)[0])
       y = cell_fw(inputs=directed(x, direction), index=directed(index, direction))
       y = directed(y, direction)
     else:
       raise Exception("invalid type: %s" % type(cell_fw))
     self.output.time_dim_axis = 0
     self.output.batch_dim_axis = 1
     self.output.placeholder = y
     params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=scope_name_prefix)
     assert params
     self.params.update({p.name[len(scope_name_prefix):-2]: p for p in params})