Exemplo n.º 1
0
def test_recurrence_larger():
    """larger recurrence"""
    sequence = InputLayer((None, None, 3), name='input sequence')
    initial_cell = InputLayer((None, 20), name='lstm cell zero tick')

    # step
    inp = InputLayer((None, 3))
    prev_rnn = InputLayer((None, 10))
    rnn = RNNCell(prev_rnn, inp, name='rnn')

    prev_lstm_cell = InputLayer((None, 20))  #lstm cell
    prev_lstm_hid = InputLayer((None, 20))  #lstm output
    lstm_cell, lstm_hid = LSTMCell(prev_lstm_cell,
                                   prev_lstm_hid,
                                   input_or_inputs=rnn)

    lstm_hid = DropoutLayer(
        lstm_hid, p=0.5)  #dropout hid, but not cell. Just to check it works

    from collections import OrderedDict  #one can use regular dict but that causes a warning

    rec = agentnet.Recurrence(
        input_sequences={inp: sequence},
        state_variables=OrderedDict({
            rnn: prev_rnn,
            lstm_hid: prev_lstm_hid,
            lstm_cell: prev_lstm_cell
        }),
        state_init={lstm_cell: initial_cell},  # defaults to zeros
        unroll_scan=False)

    weights = get_all_params(rec)

    rnn_states = rec[rnn]
    lstm_cell_states = rec[lstm_cell]
    lstm_hid_states = rec[lstm_hid]

    run = theano.function(
        [sequence.input_var, initial_cell.input_var],
        get_output([rnn_states, lstm_cell_states, lstm_hid_states]),
        updates=rec.get_automatic_updates(
        )  #if any randomness is used AND unroll_scan,
        # one has to pass automatic updates
    )

    out = run(np.random.randn(5, 25, 3), np.random.randn(5, 20))

    assert tuple(out[0].shape) == (5, 25, 10)  #rnn
    assert tuple(out[1].shape) == (5, 25, 20)  #lstm cell
    assert tuple(out[2].shape) == (5, 25, 20)  #lstm hid (aka output)
Exemplo n.º 2
0
 def __init__(self,
              input_or_inputs,
              num_units=None,
              *args,
              name=None,
              **kwargs):
     self.p_cell = L.InputLayer(
         (None, num_units),
         name="previous cell state" if name == None else name +
         " previous cell state")
     self.p_out = L.InputLayer(
         (None, num_units),
         name="previous out state" if name == None else name +
         " previous out state")
     self.cell, self.out = LSTMCell(self.p_cell,
                                    self.p_out,
                                    input_or_inputs,
                                    num_units,
                                    *args,
                                    name=name,
                                    **kwargs)
Exemplo n.º 3
0
    def __init__(self, vocab, enc):
        # Define inputs of decoder at each time step.
        self.prev_cell = InputLayer((None, Config.N_LSTM_UNITS), name='cell')
        self.prev_hid = InputLayer((None, Config.N_LSTM_UNITS), name='hid')
        self.input_word = InputLayer((None, ))
        self.encoder_lstm = InputLayer((None, Config.N_LSTM_UNITS),
                                       name='encoder')

        # Embed input word and use the same embeddings as in the encoder.
        self.word_embedding = EmbeddingLayer(self.input_word,
                                             vocab.n_tokens,
                                             Config.EMB_SIZE,
                                             W=enc.l_emb.W,
                                             name='emb')

        # This is not WrongLSTMLayer! *Cell is used for one-tick networks.
        self.new_cell, self.new_hid = LSTMCell(
            self.prev_cell,
            self.prev_hid,
            input_or_inputs=[self.word_embedding, self.encoder_lstm],
            name='decoder_lstm',
            peepholes=False)

        # Define parts for new word prediction. Bottleneck is a hack for reducing time complexity.
        self.bottleneck = DenseLayer(self.new_hid,
                                     Config.BOTTLENECK_UNITS,
                                     nonlinearity=T.tanh,
                                     name='decoder intermediate')

        self.next_word_probs = DenseLayer(self.bottleneck,
                                          vocab.n_tokens,
                                          nonlinearity=lambda probs: T.nnet.
                                          softmax(probs / Config.TEMPERATURE),
                                          name='decoder next word probas')

        self.next_words = ProbabilisticResolver(self.next_word_probs,
                                                assume_normalized=True)
Exemplo n.º 4
0
def test_memory(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states

    memory_dict = OrderedDict([])

    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    memory_dict[window] = prev_window

    ###Stack
    #prev stack
    stack_w, stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,
                                3,
                                nonlinearity=lasagne.nonlinearities.softmax,
                                name="prev_stack")
    prev_stack = InputLayer((None, stack_h, stack_w),
                            name="previous stack state")
    stack = StackAugmentation(stack_inputs, prev_stack, stack_controls)
    memory_dict[stack] = prev_stack

    stack_top = lasagne.layers.SliceLayer(stack, 0, 1)

    ###RNN preset

    prev_rnn = InputLayer((None, 16), name="previous RNN state")
    new_rnn = RNNCell(prev_rnn, observation_reshape)
    memory_dict[new_rnn] = prev_rnn

    ###GRU preset
    prev_gru = InputLayer((None, 16), name="previous GRUcell state")
    new_gru = GRUCell(prev_gru, observation_reshape)
    memory_dict[new_gru] = prev_gru

    ###GRUmemorylayer
    prev_gru1 = InputLayer((None, 15), name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1)
    memory_dict[new_gru1] = prev_gru1

    #LSTM with peepholes
    prev_lstm0_cell = InputLayer(
        (None, 13), name="previous LSTMCell hidden state [with peepholes]")

    prev_lstm0_out = InputLayer(
        (None, 13), name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell, new_lstm0_out = LSTMCell(
        prev_lstm0_cell,
        prev_lstm0_out,
        input_or_inputs=observation_reshape,
        peepholes=True,
        name="newLSTM1 [with peepholes]")

    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out

    #LSTM without peepholes
    prev_lstm1_cell = InputLayer(
        (None, 14), name="previous LSTMCell hidden state [no peepholes]")

    prev_lstm1_out = InputLayer(
        (None, 14), name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell, new_lstm1_out = LSTMCell(
        prev_lstm1_cell,
        prev_lstm1_out,
        input_or_inputs=observation_reshape,
        peepholes=False,
        name="newLSTM1 [no peepholes]")

    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out

    ##concat everything

    for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]:
        print(i.output_shape)
    all_memory = concat([
        flatten(window_max),
        stack_top,
        new_rnn,
        new_gru,
        new_gru1,
        new_lstm0_out,
        new_lstm1_out,
    ])

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict, q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = qlearning.get_elementwise_objective(
        q_values_sequence,
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
    )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
prev_token = L.InputLayer([None])
prev_rnn = L.InputLayer([None, rnn_num_units])
prev_rnn1 = L.InputLayer([None, rnn_num_units])

# convert character id into embedding

prev_token_emb = L.EmbeddingLayer(prev_token, n_tokens, embedding_size)

# concatenate x embedding and previous h state
#rnn_input = L.ConcatLayer([prev_token_emb, prev_rnn])

# compute next state given x_and_h

#new_rnn = L.DenseLayer(rnn_input, rnn_num_units, nonlinearity=T.tanh)

(new_rnn, new_rnn1) = LSTMCell(prev_rnn, prev_rnn1,
                               prev_token_emb)  #GRUCell(prev_rnn1,[new_rnn])

# get probabilities for language model P(x_next|h_next)
next_token_logits = L.DenseLayer(
    new_rnn1, n_tokens, nonlinearity=None)  #L.ConcatLayer([new_rnn,new_rnn1])

next_token_probs = L.NonlinearityLayer(next_token_logits, T.nnet.softmax)
next_token_logprobs = L.NonlinearityLayer(next_token_logits, log_softmax)

input_sequence = T.imatrix("input tokens [time, batch]")
batch_size = input_sequence.shape[1]

predicted_probas = []
h0 = T.zeros([batch_size, rnn_num_units])  #initial hidden state
h1 = T.zeros([batch_size, rnn_num_units])
probas0 = T.zeros([batch_size, n_tokens])
Exemplo n.º 6
0
def test_memory_cells(batch_size=3, seq_len=50, input_dim=8, n_hidden=16):
    # lasagne way
    l_in = InputLayer(
        (None, seq_len, input_dim),
        input_var=theano.shared(
            np.random.normal(size=[batch_size, seq_len, input_dim])),
        name='input seq')

    l_lstm0 = LSTMLayer(l_in, n_hidden, name='lstm')
    l_gru0 = GRULayer(l_in, n_hidden, name='gru')

    f_predict0 = theano.function([], get_output([l_lstm0, l_gru0]))

    # agentnet way
    s_in = InputLayer((None, input_dim), name='in')

    s_prev_cell = InputLayer((None, n_hidden), name='cell')
    s_prev_hid = InputLayer((None, n_hidden), name='hid')
    s_lstm_cell, s_lstm_hid = LSTMCell(s_prev_cell,
                                       s_prev_hid,
                                       s_in,
                                       name='lstm')

    s_prev_gru = InputLayer((None, n_hidden), name='hid')
    s_gru = GRUCell(s_prev_gru, s_in, name='gru')

    rec = Recurrence(state_variables=OrderedDict({
        s_lstm_cell: s_prev_cell,
        s_lstm_hid: s_prev_hid,
        s_gru: s_prev_gru
    }),
                     input_sequences={s_in: l_in},
                     unroll_scan=False)

    state_seqs, _ = rec.get_sequence_layers()

    l_lstm1 = state_seqs[s_lstm_hid]
    l_gru1 = state_seqs[s_gru]

    f_predict1 = theano.function([], get_output([l_lstm1, l_gru1]))

    # lstm param transfer
    old_params = sorted(get_all_params(l_lstm0, trainable=True),
                        key=lambda p: p.name)
    new_params = sorted(get_all_params(s_lstm_hid, trainable=True),
                        key=lambda p: p.name)

    for old, new in zip(old_params, new_params):
        print old.name, '<-', new.name
        assert tuple(old.shape.eval()) == tuple(new.shape.eval())
        old.set_value(new.get_value())

    # gru param transfer
    old_params = sorted(get_all_params(l_gru0, trainable=True),
                        key=lambda p: p.name)
    new_params = sorted(get_all_params(s_gru, trainable=True),
                        key=lambda p: p.name)

    for old, new in zip(old_params, new_params):
        print old.name, '<-', new.name
        assert tuple(old.shape.eval()) == tuple(new.shape.eval())
        old.set_value(new.get_value())

    lstm0_out, gru0_out = f_predict0()
    lstm1_out, gru1_out = f_predict1()

    assert np.allclose(lstm0_out, lstm1_out)
    assert np.allclose(gru0_out, gru1_out)