Exemplo n.º 1
0
    def get_state_range(self, verbose=True):
        env = self.env

        temp_memory = Memory(500000, self.n_state, 1, 1, infinite=True)
        if True:  #TODO, the other get means functions
            episodes = 15
            for i in range(episodes):
                done = False
                s = env.reset()
                while not done:
                    a = env.action_space.sample()
                    s_ne, reward, done, _ = env.step(a)
                    temp_memory.push(s, a, reward, s_ne)
                    s = s_ne
        s, _, r, s_ne = temp_memory.get_all_sample()

        s_low = np.min(s, axis=0).flatten()
        s_high = np.max(s, axis=0).flatten()
        low = env.observation_space.low
        high = env.observation_space.high

        r_high = np.max(r)
        r_low = np.min(r)

        # high[high == np.inf] = s_high[high == np.inf]
        # low[low == -np.inf] = s_low[low == -np.inf]

        high = np.array([0.407, 0.25, -0.96, 2, 8])
        low = np.array([-0.407, -0.25, -1, -2, -8])

        if verbose:
            print("using ", temp_memory.counter,
                  "samples to build means for features")
            print("original high and low", env.observation_space.high,
                  env.observation_space.low)
            print("observed high and low", s_high, s_low)
            print("manually high and low", high, low)
            print("reward high and low", r_high, r_low)

        del temp_memory, s
        return low, high, r_low, r_high