class LSTM(Model): def __init__(self, opts): super(LSTM, self).__init__() self.cell = LSTMCell(opts.lstm_units) # Output layer. self.__out_layer = Dense(opts.output_size, activation=None, use_bias=True) # Interface layer. self.__interface = Dense(opts.interface_size, activation=None, use_bias=True) def __call__(self, inputs, state): hidden, state = self.cell(inputs, state) ctrl_output = { 'output': self.__out_layer(hidden), 'interface': self.__interface(hidden) } return ctrl_output, state def initialize(self, batch_size): return self.cell.get_initial_state(batch_size=batch_size, dtype=tf.float32)
def test_lstm_cell(): n_inputs = 3 n_units = 4 batch_size = 1 inputs = tx.Input(n_units=n_inputs) lstm0 = tx.LSTMCell( inputs, n_units, activation=tf.tanh, gate_activation=tf.sigmoid, forget_bias_init=tf.initializers.ones(), ) lstm1 = LSTMCell(n_units, activation='tanh', recurrent_activation='sigmoid', unit_forget_bias=True, implementation=2) state0 = [s() for s in lstm0.previous_state] # get_initial_state from keras returns either a tuple or a single # state see `test_rnn_cell`, but the __call__ API requires an iterable state1 = lstm1.get_initial_state(inputs, batch_size=1) assert tx.tensor_equal(state1, state0) inputs.value = tf.ones([batch_size, n_inputs]) res1 = lstm1(inputs, state0) res1_ = lstm1(inputs, state0) for r1, r2 in zip(res1, res1_): assert tx.tensor_equal(r1, r2) # the only difference is that keras kernels are fused together kernel = tf.concat([w.weights.value() for w in lstm0.layer_state.w], axis=-1) w_i, _, _, _ = tf.split(kernel, 4, axis=1) assert tx.tensor_equal(w_i, lstm0.w[0].weights.value()) recurrent_kernel = tf.concat([u.weights for u in lstm0.layer_state.u], axis=-1) bias = tf.concat([w.bias for w in lstm0.layer_state.w], axis=-1) assert tx.tensor_equal(tf.shape(kernel), tf.shape(lstm1.kernel)) assert tx.tensor_equal(tf.shape(recurrent_kernel), tf.shape(lstm1.recurrent_kernel)) assert tx.tensor_equal(tf.shape(bias), tf.shape(lstm1.bias)) lstm1.kernel = kernel lstm1.recurrent_kernel = recurrent_kernel lstm1.bias = bias res2 = lstm1(inputs, state0) for i in range(len(res1)): assert not tx.tensor_equal(res1[i], res2[i]) res0 = lstm0() assert tx.tensor_equal(res0, res2[0])
class Encoder(object): def __init__(self, n_neurons=128, batch_size=4, seq_length=10): # パラメタ設定 self.n_neurons = n_neurons self.batch_size = batch_size self.seq_length = seq_length # 再帰セル定義 self.enc_rec_cell = LSTMCell(self.n_neurons) # ネットワーク定義 # Decoderとの対比から、(LSTMレイヤでなく)敢えて明示的に # Loopで記載 def build_model(self, inputs): # Bi-directional LSTMレイヤを挿入 inputs = Bidirectional(LSTM(self.n_neurons, return_sequences=True), merge_mode='concat')(inputs) input_list = tf.transpose(inputs, [1, 0, 2]) enc_outputs, enc_states = [], [] state = self._get_initial_state() for input in tf.unstack(input_list, axis=0): # 再帰ネットワークへの入出力 output, state = self.enc_rec_cell(input, state) enc_outputs.append(output) enc_states.append(state) # 出力の蓄積 enc_outputs = tf.stack(enc_outputs, axis=0) enc_outputs = tf.transpose(enc_outputs, [1, 0, 2]) enc_state = enc_states[-1] return enc_outputs, enc_state def _get_initial_state(self): state = self.enc_rec_cell.get_initial_state(inputs=None, batch_size=self.batch_size, dtype=tf.float32) return state
class Encoder(object): def __init__(self, n_neurons=128, batch_size=4, user=6): """ Actor Encoder class Args: n_neurons: int Hidden layer of LSTM user: int Num of user Outputs: enc_outputs: 3D tensor [batch, user, n_neurons] Whole sequence outputs enc_state: 1D list [tensor, tensor] enc_state[0]: 2D tensor [batch, n_neurons] Final memory state enc_state[1]: 2D tensor [batch, n_neurons] Final carry state """ self.n_neurons = n_neurons self.batch_size = batch_size self.user = user # Define Recursive cell self.enc_rec_cell = LSTMCell(self.n_neurons) def build_model(self, inputs): # Insert Bi-directional LSTM layer inputs = Bidirectional(LSTM(self.n_neurons, return_sequences=True), merge_mode='concat')(inputs) # Reshape [user, batch_size, n_neurons*2] input_list = tf.transpose(inputs, [1, 0, 2]) # Setting inputs and batch_size of LSTMCell state = self._get_initial_state() enc_outputs, enc_states = [], [] for input in tf.unstack(input_list, axis=0): # input is time step of sequence with shape of [batch, n_neurons*2] output, state = self.enc_rec_cell(input, state) enc_outputs.append(output) enc_states.append(state) # Concat & transpose enc_outputs = tf.stack(enc_outputs, axis=0) enc_outputs = tf.transpose(enc_outputs, [1, 0, 2]) enc_state = enc_states[-1] return enc_outputs, enc_state def _get_initial_state(self): state = self.enc_rec_cell.get_initial_state(inputs=None, batch_size=self.batch_size, dtype=tf.float32) return state