def __call__(self, inputs, state, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """JZS3, mutant 2 with n units cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
        with tf.variable_scope("Zinput"):  # Reset gate and update gate.
          # We start with bias of 1.0 to not reset and not update.
          '''equation 1'''

          z = tf.sigmoid(lfe.enhanced_linear([inputs, tf.tanh(state)], 
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer))

          '''equation 2'''
        with tf.variable_scope("Rinput"):
          r = tf.sigmoid(lfe.enhanced_linear([inputs, state],
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer))
          '''equation 3'''
        with tf.variable_scope("Candidate"):
          component_0 = linear.linear([state*r,inputs],
                            self._num_units, True)
          
          component_2 = (tf.tanh(component_0))*z
          component_3 = state*(1 - z)

        h_t = component_2 + component_3

      return h_t, h_t #there is only one hidden state output to keep track of. 
  def __call__(self, inputs, state, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """JZS1, mutant 1 with n units cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "JZS1Cell"
        with tf.variable_scope("Zinput"):  # Reset gate and update gate.
          # We start with bias of 1.0 to not reset and not update.
          '''equation 1 z = sigm(WxzXt+Bz), x_t is inputs'''

          z = tf.sigmoid(lfe.enhanced_linear([inputs], 
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer)) 

        with tf.variable_scope("Rinput"):
          '''equation 2 r = sigm(WxrXt+Whrht+Br), h_t is the previous state'''

          r = tf.sigmoid(lfe.enhanced_linear([inputs,state],
                            self._num_units, True, 1.0, weight_initializer = self._weight_initializer))
          '''equation 3'''
        with tf.variable_scope("Candidate"):
          component_0 = linear.linear([r*state], 
                            self._num_units, True) 
          component_1 = tf.tanh(tf.tanh(inputs) + component_0)
          component_2 = component_1*z
          component_3 = state*(1 - z)

        h_t = component_2 + component_3

      return h_t, h_t #there is only one hidden state output to keep track of. 
  def __call__(self, inputs, state, scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):
      """Long short-term memory cell (LSTM)."""
      with tf.variable_scope(scope or type(self).__name__):  # "BasicLSTMCell"
        # Parameters of gates are concatenated into one multiply for efficiency.
        c, h = tf.split(1, 2, state)
        concat = lfe.enhanced_linear([inputs, h], 4 * self._num_units, True, weight_initializer = self._weight_initializer)

        # i = input_gate, j = new_input, f = forget_gate, o = output_gate
        i, j, f, o = tf.split(1, 4, concat)

        new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j)
        new_h = tf.tanh(new_c) * tf.sigmoid(o)

      return new_h, tf.concat(1, [new_c, new_h])

      '''important, the second part is the hidden state!, thus a lstm with n cells had a hidden state of dimenson 2n'''
  def __call__(self, inputs, state,scope=None):
    with tf.device("/gpu:"+str(self._gpu_for_layer)):

      """Gated recurrent unit (GRU) with nunits cells."""
      with tf.variable_scope(scope or type(self).__name__):  # "GRUCell"
        with tf.variable_scope("Gates"):  # Reset gate and update gate.
          # We start with bias of 1.0 to not reset and not udpate.
          r, u = tf.split(1, 2, lfe.enhanced_linear([inputs, state],
                                              2 * self._num_units, True, 1.0, weight_initializer = self._weight_initializer))
          r, u = tf.sigmoid(r), tf.sigmoid(u)
        with tf.variable_scope("Candidate"): #you need a different one because you're doing a new linear
          #notice they have the activation/non-linear step right here! 
          c = tf.tanh(linear.linear([inputs, r * state], self._num_units, True))
        new_h = u * state + (1 - u) * c
      return new_h, new_h

      '''nick, notice that for the gru, the output and the hidden state are literally the same thing'''