def test_recurrence_larger(): """larger recurrence""" sequence = InputLayer((None, None, 3), name='input sequence') initial_cell = InputLayer((None, 20), name='lstm cell zero tick') # step inp = InputLayer((None, 3)) prev_rnn = InputLayer((None, 10)) rnn = RNNCell(prev_rnn, inp, name='rnn') prev_lstm_cell = InputLayer((None, 20)) #lstm cell prev_lstm_hid = InputLayer((None, 20)) #lstm output lstm_cell, lstm_hid = LSTMCell(prev_lstm_cell, prev_lstm_hid, input_or_inputs=rnn) lstm_hid = DropoutLayer( lstm_hid, p=0.5) #dropout hid, but not cell. Just to check it works from collections import OrderedDict #one can use regular dict but that causes a warning rec = agentnet.Recurrence( input_sequences={inp: sequence}, state_variables=OrderedDict({ rnn: prev_rnn, lstm_hid: prev_lstm_hid, lstm_cell: prev_lstm_cell }), state_init={lstm_cell: initial_cell}, # defaults to zeros unroll_scan=False) weights = get_all_params(rec) rnn_states = rec[rnn] lstm_cell_states = rec[lstm_cell] lstm_hid_states = rec[lstm_hid] run = theano.function( [sequence.input_var, initial_cell.input_var], get_output([rnn_states, lstm_cell_states, lstm_hid_states]), updates=rec.get_automatic_updates( ) #if any randomness is used AND unroll_scan, # one has to pass automatic updates ) out = run(np.random.randn(5, 25, 3), np.random.randn(5, 20)) assert tuple(out[0].shape) == (5, 25, 10) #rnn assert tuple(out[1].shape) == (5, 25, 20) #lstm cell assert tuple(out[2].shape) == (5, 25, 20) #lstm hid (aka output)
def __init__(self, input_or_inputs, num_units=None, *args, name=None, **kwargs): self.p_cell = L.InputLayer( (None, num_units), name="previous cell state" if name == None else name + " previous cell state") self.p_out = L.InputLayer( (None, num_units), name="previous out state" if name == None else name + " previous out state") self.cell, self.out = LSTMCell(self.p_cell, self.p_out, input_or_inputs, num_units, *args, name=name, **kwargs)
def __init__(self, vocab, enc): # Define inputs of decoder at each time step. self.prev_cell = InputLayer((None, Config.N_LSTM_UNITS), name='cell') self.prev_hid = InputLayer((None, Config.N_LSTM_UNITS), name='hid') self.input_word = InputLayer((None, )) self.encoder_lstm = InputLayer((None, Config.N_LSTM_UNITS), name='encoder') # Embed input word and use the same embeddings as in the encoder. self.word_embedding = EmbeddingLayer(self.input_word, vocab.n_tokens, Config.EMB_SIZE, W=enc.l_emb.W, name='emb') # This is not WrongLSTMLayer! *Cell is used for one-tick networks. self.new_cell, self.new_hid = LSTMCell( self.prev_cell, self.prev_hid, input_or_inputs=[self.word_embedding, self.encoder_lstm], name='decoder_lstm', peepholes=False) # Define parts for new word prediction. Bottleneck is a hack for reducing time complexity. self.bottleneck = DenseLayer(self.new_hid, Config.BOTTLENECK_UNITS, nonlinearity=T.tanh, name='decoder intermediate') self.next_word_probs = DenseLayer(self.bottleneck, vocab.n_tokens, nonlinearity=lambda probs: T.nnet. softmax(probs / Config.TEMPERATURE), name='decoder next word probas') self.next_words = ProbabilisticResolver(self.next_word_probs, assume_normalized=True)
def test_memory( game_title='SpaceInvaders-v0', n_parallel_games=3, replay_seq_len=2, ): """ :param game_title: name of atari game in Gym :param n_parallel_games: how many games we run in parallel :param replay_seq_len: how long is one replay session from a batch """ atari = gym.make(game_title) atari.reset() # Game Parameters n_actions = atari.action_space.n observation_shape = (None, ) + atari.observation_space.shape action_names = atari.get_action_meanings() del atari # ##### Agent observations # image observation at current tick goes here observation_layer = InputLayer(observation_shape, name="images input") # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2)) # Agent memory states memory_dict = OrderedDict([]) ###Window window_size = 3 # prev state input prev_window = InputLayer( (None, window_size) + tuple(observation_reshape.output_shape[1:]), name="previous window state") # our window window = WindowAugmentation(observation_reshape, prev_window, name="new window state") # pixel-wise maximum over the temporal window (to avoid flickering) window_max = ExpressionLayer(window, lambda a: a.max(axis=1), output_shape=(None, ) + window.output_shape[2:]) memory_dict[window] = prev_window ###Stack #prev stack stack_w, stack_h = 4, 5 stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack") stack_controls = DenseLayer(observation_reshape, 3, nonlinearity=lasagne.nonlinearities.softmax, name="prev_stack") prev_stack = InputLayer((None, stack_h, stack_w), name="previous stack state") stack = StackAugmentation(stack_inputs, prev_stack, stack_controls) memory_dict[stack] = prev_stack stack_top = lasagne.layers.SliceLayer(stack, 0, 1) ###RNN preset prev_rnn = InputLayer((None, 16), name="previous RNN state") new_rnn = RNNCell(prev_rnn, observation_reshape) memory_dict[new_rnn] = prev_rnn ###GRU preset prev_gru = InputLayer((None, 16), name="previous GRUcell state") new_gru = GRUCell(prev_gru, observation_reshape) memory_dict[new_gru] = prev_gru ###GRUmemorylayer prev_gru1 = InputLayer((None, 15), name="previous GRUcell state") new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1) memory_dict[new_gru1] = prev_gru1 #LSTM with peepholes prev_lstm0_cell = InputLayer( (None, 13), name="previous LSTMCell hidden state [with peepholes]") prev_lstm0_out = InputLayer( (None, 13), name="previous LSTMCell output state [with peepholes]") new_lstm0_cell, new_lstm0_out = LSTMCell( prev_lstm0_cell, prev_lstm0_out, input_or_inputs=observation_reshape, peepholes=True, name="newLSTM1 [with peepholes]") memory_dict[new_lstm0_cell] = prev_lstm0_cell memory_dict[new_lstm0_out] = prev_lstm0_out #LSTM without peepholes prev_lstm1_cell = InputLayer( (None, 14), name="previous LSTMCell hidden state [no peepholes]") prev_lstm1_out = InputLayer( (None, 14), name="previous LSTMCell output state [no peepholes]") new_lstm1_cell, new_lstm1_out = LSTMCell( prev_lstm1_cell, prev_lstm1_out, input_or_inputs=observation_reshape, peepholes=False, name="newLSTM1 [no peepholes]") memory_dict[new_lstm1_cell] = prev_lstm1_cell memory_dict[new_lstm1_out] = prev_lstm1_out ##concat everything for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]: print(i.output_shape) all_memory = concat([ flatten(window_max), stack_top, new_rnn, new_gru, new_gru1, new_lstm0_out, new_lstm1_out, ]) # ##### Neural network body # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc # a simple lasagne network (try replacing with any other lasagne network and see what works best) nn = DenseLayer(all_memory, num_units=50, name='dense0') # Agent policy and action picking q_eval = DenseLayer(nn, num_units=n_actions, nonlinearity=lasagne.nonlinearities.linear, name="QEvaluator") # resolver resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver") # agent agent = Agent(observation_layer, memory_dict, q_eval, resolver) # Since it's a single lasagne network, one can get it's weights, output, etc weights = lasagne.layers.get_all_params(resolver, trainable=True) # Agent step function print('compiling react') applier_fun = agent.get_react_function() # a nice pythonic interface def step(observation, prev_memories='zeros', batch_size=n_parallel_games): """ returns actions and new states given observation and prev state Prev state in default setup should be [prev window,]""" # default to zeros if prev_memories == 'zeros': prev_memories = [ np.zeros((batch_size, ) + tuple(mem.output_shape[1:]), dtype='float32') for mem in agent.agent_states ] res = applier_fun(np.array(observation), *prev_memories) action = res[0] memories = res[1:] return action, memories # # Create and manage a pool of atari sessions to play with pool = GamePool(game_title, n_parallel_games) observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50) print(np.array(action_names)[np.array(action_log)[:3, :5]]) # # experience replay pool # Create an environment with all default parameters env = SessionPoolEnvironment(observations=observation_layer, actions=resolver, agent_memories=agent.agent_states) def update_pool(env, pool, n_steps=100): """ a function that creates new sessions and ads them into the pool throwing the old ones away entirely for simplicity""" preceding_memory_states = list(pool.prev_memory_states) # get interaction sessions observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact( step, n_steps=n_steps) # load them into experience replay environment env.load_sessions(observation_tensor, action_tensor, reward_tensor, is_alive_tensor, preceding_memory_states) # load first sessions update_pool(env, pool, replay_seq_len) # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them. # ### Training via experience replay # get agent's Q-values obtained via experience replay _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions( env, session_length=replay_seq_len, batch_size=env.batch_size, optimize_experience_replay=True, ) # Evaluating loss function scaled_reward_seq = env.rewards # For SpaceInvaders, however, not scaling rewards is at least working elwise_mse_loss = qlearning.get_elementwise_objective( q_values_sequence, env.actions[0], scaled_reward_seq, env.is_alive, gamma_or_gammas=0.99, ) # compute mean over "alive" fragments mse_loss = elwise_mse_loss.sum() / env.is_alive.sum() # regularize network weights reg_l2 = regularize_network_params(resolver, l2) * 10**-4 loss = mse_loss + reg_l2 # Compute weight updates updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01) # mean session reward mean_session_reward = env.rewards.sum(axis=1).mean() # # Compile train and evaluation functions print('compiling') train_fun = theano.function([], [loss, mean_session_reward], updates=updates) evaluation_fun = theano.function( [], [loss, mse_loss, reg_l2, mean_session_reward]) print("I've compiled!") # # Training loop for epoch_counter in range(10): update_pool(env, pool, replay_seq_len) loss, avg_reward = train_fun() full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun() print("epoch %i,loss %.5f, rewards: %.5f " % (epoch_counter, full_loss, avg_reward_current)) print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
prev_token = L.InputLayer([None]) prev_rnn = L.InputLayer([None, rnn_num_units]) prev_rnn1 = L.InputLayer([None, rnn_num_units]) # convert character id into embedding prev_token_emb = L.EmbeddingLayer(prev_token, n_tokens, embedding_size) # concatenate x embedding and previous h state #rnn_input = L.ConcatLayer([prev_token_emb, prev_rnn]) # compute next state given x_and_h #new_rnn = L.DenseLayer(rnn_input, rnn_num_units, nonlinearity=T.tanh) (new_rnn, new_rnn1) = LSTMCell(prev_rnn, prev_rnn1, prev_token_emb) #GRUCell(prev_rnn1,[new_rnn]) # get probabilities for language model P(x_next|h_next) next_token_logits = L.DenseLayer( new_rnn1, n_tokens, nonlinearity=None) #L.ConcatLayer([new_rnn,new_rnn1]) next_token_probs = L.NonlinearityLayer(next_token_logits, T.nnet.softmax) next_token_logprobs = L.NonlinearityLayer(next_token_logits, log_softmax) input_sequence = T.imatrix("input tokens [time, batch]") batch_size = input_sequence.shape[1] predicted_probas = [] h0 = T.zeros([batch_size, rnn_num_units]) #initial hidden state h1 = T.zeros([batch_size, rnn_num_units]) probas0 = T.zeros([batch_size, n_tokens])
def test_memory_cells(batch_size=3, seq_len=50, input_dim=8, n_hidden=16): # lasagne way l_in = InputLayer( (None, seq_len, input_dim), input_var=theano.shared( np.random.normal(size=[batch_size, seq_len, input_dim])), name='input seq') l_lstm0 = LSTMLayer(l_in, n_hidden, name='lstm') l_gru0 = GRULayer(l_in, n_hidden, name='gru') f_predict0 = theano.function([], get_output([l_lstm0, l_gru0])) # agentnet way s_in = InputLayer((None, input_dim), name='in') s_prev_cell = InputLayer((None, n_hidden), name='cell') s_prev_hid = InputLayer((None, n_hidden), name='hid') s_lstm_cell, s_lstm_hid = LSTMCell(s_prev_cell, s_prev_hid, s_in, name='lstm') s_prev_gru = InputLayer((None, n_hidden), name='hid') s_gru = GRUCell(s_prev_gru, s_in, name='gru') rec = Recurrence(state_variables=OrderedDict({ s_lstm_cell: s_prev_cell, s_lstm_hid: s_prev_hid, s_gru: s_prev_gru }), input_sequences={s_in: l_in}, unroll_scan=False) state_seqs, _ = rec.get_sequence_layers() l_lstm1 = state_seqs[s_lstm_hid] l_gru1 = state_seqs[s_gru] f_predict1 = theano.function([], get_output([l_lstm1, l_gru1])) # lstm param transfer old_params = sorted(get_all_params(l_lstm0, trainable=True), key=lambda p: p.name) new_params = sorted(get_all_params(s_lstm_hid, trainable=True), key=lambda p: p.name) for old, new in zip(old_params, new_params): print old.name, '<-', new.name assert tuple(old.shape.eval()) == tuple(new.shape.eval()) old.set_value(new.get_value()) # gru param transfer old_params = sorted(get_all_params(l_gru0, trainable=True), key=lambda p: p.name) new_params = sorted(get_all_params(s_gru, trainable=True), key=lambda p: p.name) for old, new in zip(old_params, new_params): print old.name, '<-', new.name assert tuple(old.shape.eval()) == tuple(new.shape.eval()) old.set_value(new.get_value()) lstm0_out, gru0_out = f_predict0() lstm1_out, gru1_out = f_predict1() assert np.allclose(lstm0_out, lstm1_out) assert np.allclose(gru0_out, gru1_out)