Exemplo n.º 1
0
    def test_soak(self):
        state_shape = (50, 50, 6)
        rm = ReplayMemory(self.sess,
                          buffer_size=10000,
                          state_shape=state_shape,
                          action_dim=2,
                          load_factor=1.5)
        self.sess.run(tf.initialize_all_variables())

        def s_for(i):
            return np.random.random(state_shape)

        import random
        i = 0
        for e in xrange(10000):
            # add an episode to rm
            episode_len = random.choice([5, 7, 9, 10, 15])
            initial_state = s_for(i)
            action_reward_state = []
            for i in range(i + 1, i + episode_len + 1):
                a, r, s2 = (i * 10) + 7, (i * 10) + 8, s_for(i)
                action_reward_state.append((a, r, s2))
            rm.add_episode(initial_state, action_reward_state)
            i += episode_len + 1
            # dump
            print rm.current_stats()
            # fetch a batch, of all items, but do nothing with it.
            _ = rm.batch(idxs=range(10))