modifiedLSTM.py

# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
# # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Module implementing RNN Cells.
This module provides a number of basic commonly used RNN cells, such as LSTM
(Long Short Term Memory) or GRU (Gated Recurrent Unit), and a number of
operators that allow adding dropouts, projections, or embeddings for inputs.
Constructing multi-layer cells is supported by the class `MultiRNNCell`, or by
calling the `rnn` ops several times.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import collections
import hashlib
import numbers

from tensorflow.python.eager import context
from tensorflow.python.framework import constant_op
from tensorflow.python.framework import dtypes
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.framework import tensor_util
from tensorflow.python.layers import base as base_layer
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import clip_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import partitioned_variables
from tensorflow.python.ops import random_ops
from tensorflow.python.ops import tensor_array_ops
from tensorflow.python.ops import variable_scope as vs
from tensorflow.python.ops import variables as tf_variables
from tensorflow.python.platform import tf_logging as logging
#from tensorflow.python.training import checkpointable
from tensorflow.python.util import nest
from tensorflow.python.util.tf_export import tf_export
from tensorflow.contrib.rnn import RNNCell
from tensorflow.contrib.rnn import LSTMStateTuple
from tensorflow.contrib.rnn import BasicLSTMCell

_BIAS_VARIABLE_NAME = "bias"
_WEIGHTS_VARIABLE_NAME = "kernel"

class LayerRNNCell(RNNCell):
  """Subclass of RNNCells that act like proper `tf.Layer` objects.
  For backwards compatibility purposes, most `RNNCell` instances allow their
  `call` methods to instantiate variables via `tf.get_variable`.  The underlying
  variable scope thus keeps track of any variables, and returning cached
  versions.  This is atypical of `tf.layer` objects, which separate this
  part of layer building into a `build` method that is only called once.
  Here we provide a subclass for `RNNCell` objects that act exactly as
  `Layer` objects do.  They must provide a `build` method and their
  `call` methods do not access Variables `tf.get_variable`.
  """

  def __call__(self, inputs, state, scope=None, *args, **kwargs):
    """Run this RNN cell on inputs, starting from the given state.
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: if `self.state_size` is an integer, this should be a `2-D Tensor`
        with shape `[batch_size, self.state_size]`.  Otherwise, if
        `self.state_size` is a tuple of integers, this should be a tuple
        with shapes `[batch_size, s] for s in self.state_size`.
      scope: optional cell scope.
      *args: Additional positional arguments.
      **kwargs: Additional keyword arguments.
    Returns:
      A pair containing:
      - Output: A `2-D` tensor with shape `[batch_size, self.output_size]`.
      - New state: Either a single `2-D` tensor, or a tuple of tensors matching
        the arity and shapes of `state`.
    """
    # Bypass RNNCell's variable capturing semantics for LayerRNNCell.
    # Instead, it is up to subclasses to provide a proper build
    # method.  See the class docstring for more details.
    return base_layer.Layer.__call__(self, inputs, state, scope=scope,
                                     *args, **kwargs)


# @tf_export("nn.rnn_cell.BasicLSTMCell")
class BasicNeatCell(LayerRNNCell):
  """Basic Neat recurrent network cell.
  The implementation is based on: http://arxiv.org/abs/1409.2329.
  We add forget_bias (default: 1) to the biases of the forget gate in order to
  reduce the scale of forgetting in the beginning of the training.
  It does not allow cell clipping, a projection layer, and does not
  use peep-hole connections: it is the basic baseline.
  For advanced models, please use the full @{tf.nn.rnn_cell.LSTMCell}
  that follows.
  """

  def __init__(self,
               num_units,
               forget_bias=1.0,
               state_is_tuple=True,
               activation=None,
               reuse=None,
               name=None,
               dtype=None):
    """Initialize the basic Neat cell.
    Args:
      num_units: int, The number of units in the Neat cell.
      forget_bias: float, The bias added to forget gates (see above).
        Must set to `0.0` manually when restoring from CudnnLSTM-trained
        checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: String, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.
      dtype: Default dtype of the layer (default of `None` means use the type
        of the first input). Required when `build` is called before `call`.
      When restoring from CudnnLSTM-trained checkpoints, must use
      `CudnnCompatibleLSTMCell` instead.
    """
    super(BasicNeatCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = base_layer.InputSpec(ndim=2)

    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh

  @property
  def state_size(self):
    return (LSTMStateTuple(self._num_units, self._num_units)
            if self._state_is_tuple else 2 * self._num_units)

  @property
  def output_size(self):
    return self._num_units

  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    h_depth = self._num_units
    self._kernel = self.add_variable(
        _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + h_depth, 8 * self._num_units])
    self._bias = self.add_variable(
        _BIAS_VARIABLE_NAME,
        shape=[8 * self._num_units],
        initializer=init_ops.zeros_initializer(dtype=self.dtype))

    self.built = True

  def call(self, inputs, state):
    """Long short-term memory cell (Neat).
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, num_units]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * num_units]`.
    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
    sigmoid = math_ops.sigmoid
    zero = constant_op.constant(0, dtype=dtypes.int32)
    one = constant_op.constant(1, dtype=dtypes.int32)
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2,
              axis=one,name="c_h_-_split")

    # print("c = \n{}\nh = \n{}\n".format(c.get_shape(),h.get_shape()))
    # print("i = \n{}\n".format(inputs.get_shape()))


    input_depth = int(inputs.get_shape()[1])
    shape = int (self._kernel.get_shape()[1])
    ratio =  [self._num_units * 1, self._num_units * 7]

    # print("w = \n{}\n".format(self._kernel.get_shape()))
    # W_fi [5,4] W_fh [5,28]
    W_f,W_r= array_ops.split(
        value=self._kernel, num_or_size_splits=ratio, axis=one,
        name="W-f_W-r_-_split_-kernel")
    # print("w_f = \n{}\nw_r = \n{}\n".format(W_f.get_shape(),W_r.get_shape()))

    # W_fi [1,4] W_fh [4,4]
    W_fi,W_fh = array_ops.split(
        value=W_f, num_or_size_splits=[input_depth,self._num_units],
        axis=zero,name="W-fi_W-fh_-_split_W-f")
    # print("w_fi = \n{}\nw_fh = \n{}\n".format(W_fi.get_shape(),W_fh.get_shape()))
    
    #print("b = \n{}\n".format(self._bias.get_shape()))

    # b_f [_num_units,] b_f [_num_units*7,]
    b_f,b_r = array_ops.split(
        value=self._bias, num_or_size_splits=ratio, axis=zero,
        name="b-f_b-r_-_split_-bias")
    # print("b_f = \n{}\nb_r = \n{}\n".format(b_f.get_shape(),b_r.get_shape()))


    # a [?,_num_units]
    a = math_ops.multiply(math_ops.matmul(h,W_fh), math_ops.matmul(inputs,W_fi))
    # print("a = \n{}\n".format(a.get_shape()))

    a = nn_ops.bias_add(value = a, bias = b_f,name="a")
    # print("a = \n{}\n".format(a.get_shape()))

    # W_ri [input_depth,_num_units*7] W_rh [_num_units,_num_units*7]
    W_ri,W_rh =  array_ops.split(
        value=W_r, num_or_size_splits=[input_depth,self._num_units],
        axis=zero,name="W-ri_W-rh_-_split_W-r")
    # print("w_ri = \n{}\nw_rh = \n{}\n".format(W_ri.get_shape(),W_rh.get_shape()))
    
    # bh [?,_num_units*7]
    bh = math_ops.add(math_ops.matmul(h,W_rh), math_ops.matmul(inputs,W_ri))
    # print("bh = \n{}\n".format(bh.get_shape()))

    bh = nn_ops.bias_add(bh, b_r)
    # print("bh = \n{}\n".format(bh.get_shape()))
    b,c2,d,e,f,g,h = array_ops.split(
        value=bh, num_or_size_splits=7, axis=one,name="b_c2_d_e_f_g_h")

    add = math_ops.add
    multiply = math_ops.multiply
    tanh = math_ops.tanh 
    relu = nn_ops.relu
    identity = array_ops.identity


    #Nas cell 1
    new_c = multiply(tanh(add(c,tanh(multiply(relu(h),sigmoid(g))))),tanh(add(sigmoid(d),relu(a))))
    new_h = tanh(multiply(identity(new_c),tanh(add(tanh(multiply(sigmoid(e),tanh(f))),sigmoid(add(sigmoid(b),tanh(c2)))))))


    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state


# @tf_export("nn.rnn_cell.BasicLSTMCell")
class AdvancedNeatCell(LayerRNNCell):
  """Basic Neat recurrent network cell.
  The implementation is based on: http://arxiv.org/abs/1409.2329.
  We add forget_bias (default: 1) to the biases of the forget gate in order to
  reduce the scale of forgetting in the beginning of the training.
  It does not allow cell clipping, a projection layer, and does not
  use peep-hole connections: it is the basic baseline.
  For advanced models, please use the full @{tf.nn.rnn_cell.LSTMCell}
  that follows.
  """

  def __init__(self,
               num_units,
               forget_bias=1.0,
               state_is_tuple=True,
               activation=None,
               reuse=None,
               name=None,
               dtype=None):
    """Initialize the basic Neat cell.
    Args:
      num_units: int, The number of units in the Neat cell.
      forget_bias: float, The bias added to forget gates (see above).
        Must set to `0.0` manually when restoring from CudnnLSTM-trained
        checkpoints.
      state_is_tuple: If True, accepted and returned states are 2-tuples of
        the `c_state` and `m_state`.  If False, they are concatenated
        along the column axis.  The latter behavior will soon be deprecated.
      activation: Activation function of the inner states.  Default: `tanh`.
      reuse: (optional) Python boolean describing whether to reuse variables
        in an existing scope.  If not `True`, and the existing scope already has
        the given variables, an error is raised.
      name: String, the name of the layer. Layers with the same name will
        share weights, but to avoid mistakes we require reuse=True in such
        cases.
      dtype: Default dtype of the layer (default of `None` means use the type
        of the first input). Required when `build` is called before `call`.
      When restoring from CudnnLSTM-trained checkpoints, must use
      `CudnnCompatibleLSTMCell` instead.
    """
    super(AdvancedNeatCell, self).__init__(_reuse=reuse, name=name, dtype=dtype)
    if not state_is_tuple:
      logging.warn("%s: Using a concatenated state is slower and will soon be "
                   "deprecated.  Use state_is_tuple=True.", self)

    # Inputs must be 2-dimensional.
    self.input_spec = base_layer.InputSpec(ndim=2)

    self._num_units = num_units
    self._forget_bias = forget_bias
    self._state_is_tuple = state_is_tuple
    self._activation = activation or math_ops.tanh

  @property
  def state_size(self):
    return (LSTMStateTuple(self._num_units, self._num_units)
            if self._state_is_tuple else 2 * self._num_units)

  @property
  def output_size(self):
    return self._num_units

  def build(self, inputs_shape):
    if inputs_shape[1].value is None:
      raise ValueError("Expected inputs.shape[-1] to be known, saw shape: %s"
                       % inputs_shape)

    input_depth = inputs_shape[1].value
    h_depth = self._num_units
    self._kernel = self.add_variable(
        _WEIGHTS_VARIABLE_NAME,
        shape=[input_depth + h_depth, 8 * self._num_units])
    self._bias = self.add_variable(
        _BIAS_VARIABLE_NAME,
        shape=[8 * self._num_units],
        initializer=init_ops.zeros_initializer(dtype=self.dtype))

    self.built = True

  def call(self, inputs, state):
    """Long short-term memory cell (Neat).
    Args:
      inputs: `2-D` tensor with shape `[batch_size, input_size]`.
      state: An `LSTMStateTuple` of state tensors, each shaped
        `[batch_size, num_units]`, if `state_is_tuple` has been set to
        `True`.  Otherwise, a `Tensor` shaped
        `[batch_size, 2 * num_units]`.
    Returns:
      A pair containing the new hidden state, and the new state (either a
        `LSTMStateTuple` or a concatenated state, depending on
        `state_is_tuple`).
    """
    sigmoid = math_ops.sigmoid
    zero = constant_op.constant(0, dtype=dtypes.int32)
    one = constant_op.constant(1, dtype=dtypes.int32)
    # Parameters of gates are concatenated into one multiply for efficiency.
    if self._state_is_tuple:
      c, h = state
    else:
      c, h = array_ops.split(value=state, num_or_size_splits=2,
              axis=one,name="c_h_-_split")

    # print("c = \n{}\nh = \n{}\n".format(c.get_shape(),h.get_shape()))
    # print("i = \n{}\n".format(inputs.get_shape()))


    input_depth = int(inputs.get_shape()[1])
    shape = int (self._kernel.get_shape()[1])
    ratio =  [self._num_units * 5, self._num_units * 3]

    # print("w = \n{}\n".format(self._kernel.get_shape()))
    # W_fi [5,4] W_fh [5,28]
    W_f,W_r= array_ops.split(
        value=self._kernel, num_or_size_splits=ratio, axis=one,
        name="W-f_W-r_-_split_-kernel")
    # print("w_f = \n{}\nw_r = \n{}\n".format(W_f.get_shape(),W_r.get_shape()))

    # W_fi [1,4] W_fh [4,4]
    W_fi,W_fh = array_ops.split(
        value=W_f, num_or_size_splits=[input_depth,self._num_units],
        axis=zero,name="W-fi_W-fh_-_split_W-f")
    # print("w_fi = \n{}\nw_fh = \n{}\n".format(W_fi.get_shape(),W_fh.get_shape()))
    
    #print("b = \n{}\n".format(self._bias.get_shape()))

    # b_f [_num_units,] b_f [_num_units*7,]
    b_f,b_r = array_ops.split(
        value=self._bias, num_or_size_splits=ratio, axis=zero,
        name="b-f_b-r_-_split_-bias")
    # print("b_f = \n{}\nb_r = \n{}\n".format(b_f.get_shape(),b_r.get_shape()))


    # a [?,_num_units]
    sw = math_ops.add(math_ops.matmul(h,W_fh), math_ops.matmul(inputs,W_fi))
    # print("a = \n{}\n".format(a.get_shape()))

    sw = nn_ops.bias_add(value = sw, bias = b_f)
    # print("a = \n{}\n".format(a.get_shape()))
    s,t,u,v,w = array_ops.split(
        value=sw, num_or_size_splits=5, axis=one,name="s_t_v_u_w_-_split_sw")

    # W_ri [input_depth,_num_units*7] W_rh [_num_units,_num_units*7]
    W_ri,W_rh =  array_ops.split(
        value=W_r, num_or_size_splits=[input_depth,self._num_units],
        axis=zero,name="W-ri_W-rh_-_split_W-r")
    # print("w_ri = \n{}\nw_rh = \n{}\n".format(W_ri.get_shape(),W_rh.get_shape()))
    
    # bh [?,_num_units*7]
    xz = gen_math_ops.maximum(math_ops.matmul(h,W_rh), math_ops.matmul(inputs,W_ri))
    # print("bh = \n{}\n".format(bh.get_shape()))

    xz = nn_ops.bias_add(xz, b_r)
    # print("bh = \n{}\n".format(bh.get_shape()))

    # b,...,h [?,_num_units]
    x,y,z = array_ops.split(
        value=xz, num_or_size_splits=3, axis=one,name="x_y_z_-_split_xz")

    add = math_ops.add
    multiply = math_ops.multiply
    tanh = math_ops.tanh 
    relu = nn_ops.relu
    identity = array_ops.identity

    #Nas cell 2
    new_c = multiply(identity(add(identity(add(c,tanh(z))),identity(y))),sigmoid(add(relu(v),tanh(s))))
    new_h = tanh(multiply(identity(new_c),sigmoid(multiply(sigmoid(add(tanh(x),tanh(w))),sigmoid(add(identity(u),tanh(t)))))))


    if self._state_is_tuple:
      new_state = LSTMStateTuple(new_c, new_h)
    else:
      new_state = array_ops.concat([new_c, new_h], 1)
    return new_h, new_state