Exemplo n.º 1
0
    def test_large_var(self):
        ### python replay_memory_test.py TestReplayMemory.test_large_var

        s = StopWatch()

        state_shape = (50, 50, 6)
        s.reset()
        rm = ReplayMemory(self.sess,
                          buffer_size=10000,
                          state_shape=state_shape,
                          action_dim=2,
                          load_factor=1.5)
        self.sess.run(tf.initialize_all_variables())
        print "cstr_and_init", s.time()

        bs1, bs1i, bs2, bs2i = rm.batch_ops()

        # build a simple, useless, net that uses state_1 & state_2 idxs
        # we want this to reduce to a single value to minimise data coming
        # back from GPU
        added_states = bs1 + bs2
        total_value = tf.reduce_sum(added_states)

        def random_s():
            return np.random.random(state_shape)

        for i in xrange(10):
            # add an episode to rm
            episode_len = random.choice([5, 7, 9, 10, 15])
            initial_state = random_s()
            action_reward_state = []
            for i in range(i + 1, i + episode_len + 1):
                a, r, s2 = (i * 10) + 7, (i * 10) + 8, random_s()
                action_reward_state.append((a, r, s2))
            start = time.time()
            s.reset()
            rm.add_episode(initial_state, action_reward_state)
            t = s.time()
            num_states = len(action_reward_state) + 1
            print "add_episode_time", t, "#states=", num_states, "=> s/state", t / num_states
            i += episode_len + 1

            # get a random batch state
            b = rm.batch(batch_size=128)
            s.reset()
            x = self.sess.run(total_value,
                              feed_dict={
                                  bs1i: b.state_1_idx,
                                  bs2i: b.state_2_idx
                              })
            print "fetch_and_run", x, s.time()