def __call__(self, inputs, state, timestep=0, scope=None): current_state = state for highway_layer in range(self.num_highway_layers): with tf.variable_scope('highway_factor_{}'.format(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: highway_factor = tf.tanh(multiplicative_integration( [inputs, current_state], self._num_units)) else: highway_factor = tf.tanh(layer_norm( linear([current_state], self._num_units, True))) with tf.variable_scope('gate_for_highway_factor_{}'.format(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: gate_for_highway_factor = tf.sigmoid(multiplicative_integration( [inputs, current_state], self._num_units, initial_bias_value=-3.0)) else: gate_for_highway_factor = tf.sigmoid( linear([current_state], self._num_units, True, -3.0)) gate_for_hidden_factor = 1 - gate_for_highway_factor if self.use_recurrent_dropout and self.is_training: highway_factor = tf.nn.dropout( highway_factor, self.recurrent_dropout_factor) current_state = highway_factor * gate_for_highway_factor + \ current_state * gate_for_hidden_factor return current_state, current_state
def __call__(self, inputs, state, timestep=0, scope=None): '''Normal Gated recurrent unit (GRU) with nunits cells.''' with tf.variable_scope(scope or type(self).__name__): # 'GRUCell' with tf.variable_scope('Gates'): # Reset gate and update gate. # We start with bias of 1.0 to not reset and not update. r, u = tf.split( value=tf.sigmoid( multiplicative_integration( [inputs, state], output_size=self._num_units * 2, initial_bias_value=1.0)), num_or_size_splits=2, axis=1) # you need a different one because you're doing a new linear with tf.variable_scope('Candidate'): # notice they have the activation/non-linear step right here! c = tf.tanh(multiplicative_integration( [inputs, state], self._num_units, 0.0)) if self.use_recurrent_dropout and self.is_training: input_contribution = tf.nn.dropout( c, self.recurrent_dropout_factor) else: input_contribution = c new_h = u * state + (1 - u) * input_contribution return new_h, new_h
def __call__(self, inputs, state, timestep=0, scope=None): '''Most basic RNN: output = new_state = tanh(W * input + U * state + B).''' current_state = state for highway_layer in range(self.num_highway_layers): with tf.variable_scope('highway_factor_{}'.format(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: highway_factor = tf.tanh( multiplicative_integration( [inputs, current_state], output_size=self._num_units)) else: highway_factor = tf.tanh( linear([current_state], output_size=self._num_units, bias=True)) with tf.variable_scope('gate_for_highway_factor_{}'.format(highway_layer)): if self.use_inputs_on_each_layer or highway_layer == 0: gate_for_highway_factor = tf.sigmoid(multiplicative_integration( [inputs, current_state], self._num_units, initial_bias_value=-3.0)) else: gate_for_highway_factor = tf.sigmoid( linear([current_state], output_size=self._num_units, bias=True, bias_start=-3.0)) gate_for_hidden_factor = 1 - gate_for_highway_factor if self.use_recurrent_dropout and self.is_training: highway_factor = tf.nn.dropout( highway_factor, keep_prob=self.recurrent_dropout_factor) current_state = (highway_factor * gate_for_highway_factor + current_state * gate_for_hidden_factor) return current_state, current_state
def __call__(self, inputs, state, timestep=0, scope=None): # with tf.device('/gpu:'+str(self._gpu_for_layer)): '''Long short-term memory cell (LSTM).''' with tf.variable_scope(scope or type(self).__name__): # 'BasicLSTMCell' # Parameters of gates are concatenated into one multiply for # efficiency. h, c = tf.split(value=state, num_or_size_splits=2, axis=1) concat = multiplicative_integration( [inputs, h], output_size=self._num_units * 4, initial_bias_value=0.0) # i = input_gate, j = new_input, f = forget_gate, o = output_gate i, j, f, o = tf.split(value=concat, num_or_size_splits=4, axis=1) if self.use_recurrent_dropout and self.is_training: input_contribution = tf.nn.dropout( tf.tanh(j), keep_prob=self.recurrent_dropout_factor) else: input_contribution = tf.tanh(j) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * input_contribution new_h = tf.tanh(new_c) * tf.sigmoid(o) # purposely reversed return new_h, tf.concat(axis=1, values=[new_h, new_c])
def __call__(self, inputs, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): # Forget Gate bias starts as 1.0 -- TODO: double check if this is correct with tf.variable_scope('Gates'): if self.use_multiplicative_integration: gated_factor = multiplicative_integration( [inputs, state], self._num_units, self.forget_bias_initialization) else: gated_factor = linear([inputs, state], self._num_units, True, self.forget_bias_initialization) gated_factor = tf.sigmoid(gated_factor) with tf.variable_scope('Candidate'): c = tf.tanh(linear([inputs], self._num_units, True, 0.0)) if self.use_recurrent_dropout and self.is_training: input_contribution = tf.nn.dropout( c, self.recurrent_dropout_factor) else: input_contribution = c new_h = (1 - gated_factor) * state + gated_factor * input_contribution return new_h, new_h
def __call__(self, inputs, state, timestep=0, scope=None): '''Most basic RNN: output = new_state = tanh(W * input + U * state + B). ''' # with tf.device('/gpu:'+str(self._gpu_for_layer)): with tf.variable_scope(scope or type(self).__name__): # 'BasicRNNCell' output = tf.tanh(multiplicative_integration( [inputs, state], self._num_units)) return output, output
def __call__(self, inputs, state, timestep=0, scope=None): '''Normal Gated recurrent unit (GRU) with nunits cells.''' with tf.variable_scope(scope or type(self).__name__): # 'GRUCell' with tf.variable_scope('Inputs'): inputs_concat = linear( [inputs], self._num_units * 2, False, 1.0) inputs_concat = layer_norm( inputs_concat, num_variables_in_tensor=2, initial_bias_value=1.0) with tf.variable_scope('Hidden_State'): hidden_state_concat = linear( [state], self._num_units * 2, False) hidden_state_concat = layer_norm( hidden_state_concat, num_variables_in_tensor=2) r, u = tf.split( value=tf.sigmoid( multiplicative_integration( [inputs_concat, hidden_state_concat], output_size=2 * self._num_units, initial_bias_value=1.0, weights_already_calculated=True)), num_or_size_splits=2, axis=1) with tf.variable_scope('Candidate'): with tf.variable_scope('input_portion'): input_portion = layer_norm( linear([inputs], output_size=self._num_units, bias=False)) with tf.variable_scope('reset_portion'): reset_portion = r * layer_norm(linear([state], self._num_units, False)) c = tf.tanh( multiplicative_integration( [input_portion, reset_portion], output_size=self._num_units, initial_bias_value=0.0, weights_already_calculated=True)) new_h = u * state + (1 - u) * c return new_h, new_h
def __call__(self, inputs, state, timestep=0, scope=None): '''Long short-term memory cell (LSTM). The idea with iteration would be to run different batch norm mean and variance stats on timestep greater than 10. ''' with tf.variable_scope(scope or type(self).__name__): # 'BasicLSTMCell' # Parameters of gates are concatenated into one multiply for # efficiency. h, c = tf.split(value=state, num_or_size_splits=2, axis=1) '''note that bias is set to 0 because batch norm bias is added later''' with tf.variable_scope('inputs_weight_matrix'): inputs_concat = linear([inputs], output_size=4 * self._num_units, bias=False) inputs_concat = layer_norm( inputs_concat, num_variables_in_tensor=4, scope='inputs_concat_layer_norm') with tf.variable_scope('state_weight_matrix'): h_concat = linear([h], 4 * self._num_units, False) h_concat = layer_norm( h_concat, num_variables_in_tensor=4, scope='h_concat_layer_norm') i, j, f, o = tf.split( value=multiplicative_integration( [inputs_concat, h_concat], output_size=4 * self._num_units, initial_bias_value=0.0, weights_already_calculated=True), num_or_size_splits=4, axis=1) new_c = c * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * tf.tanh(j) '''apply layer norm to the hidden state transition''' with tf.variable_scope('layer_norm_hidden_state'): new_h = tf.tanh(layer_norm(new_c)) * tf.sigmoid(o) return new_h, tf.concat(axis=1, values=[new_h, new_c]) # reversed this
def __call__(self, inputs, state, timestep=0, scope=None): with tf.variable_scope(scope or type(self).__name__): # 'BasicLSTMCell' # Parameters of gates are concatenated into one multiply for efficiency. hidden_state_plus_c_list = tf.split( state, num_or_size_splits=self.num_memory_arrays + 1, axis=1) h = hidden_state_plus_c_list[0] c_list = hidden_state_plus_c_list[1:] ## very large matrix multiplication to speed up procedure ## -- will split variables out later if self.use_multiplicative_integration: concat = multiplicative_integration( [inputs, h], self._num_units * 4 * self.num_memory_arrays, 0.0) else: concat = linear([inputs, h], self._num_units * 4 * self.num_memory_arrays, True) if self.use_layer_normalization: concat = layer_norm(concat, num_variables_in_tensor=4 * self.num_memory_arrays) # i = input_gate, j = new_input, f = forget_gate, o = output_gate # -- comes in sets of fours all_vars_list = tf.split(concat, num_or_size_splits=4 * self.num_memory_arrays, axis=1) ## memory array loop new_c_list, new_h_list = [], [] for array_counter in range(self.num_memory_arrays): i = all_vars_list[0 + array_counter * 4] j = all_vars_list[1 + array_counter * 4] f = all_vars_list[2 + array_counter * 4] o = all_vars_list[3 + array_counter * 4] if self.use_recurrent_dropout and self.is_training: input_contribution = tf.nn.dropout( tf.tanh(j), self.recurrent_dropout_factor) else: input_contribution = tf.tanh(j) new_c_list.append(c_list[array_counter] * tf.sigmoid(f + self._forget_bias) + tf.sigmoid(i) * input_contribution) if self.use_layer_normalization: new_c = layer_norm(new_c_list[-1]) else: new_c = new_c_list[-1] new_h_list.append(tf.tanh(new_c) * tf.sigmoid(o)) ## sum all new_h components -- could instead do a mean -- but investigate that later new_h = tf.add_n(new_h_list) return new_h, tf.concat([new_h] + new_c_list, axis=1) # purposely reversed