def test_create_dict(self): env_dict = create_env_dict(self.env) self.assertIn("obs", env_dict) self.assertIn("act0", env_dict) self.assertIn("act1", env_dict) self.assertIn("act2", env_dict)
def __init__(self, max_length, seed_number, env): env_dict = create_env_dict(env) #override the observation length in the replay memory env_dict['obs'] = {"dtype": numpy.float32, "shape": (17, )} env_dict['next_obs'] = {"dtype": numpy.float32, "shape": (17, )} print('!!!!', env_dict['obs']) self.before_add = create_before_add_func(env) self.storage = ReplayBuffer(max_length, env_dict)
def test_add(self): env_dict = create_env_dict(self.env) before_add_func = create_before_add_func(self.env) rb = ReplayBuffer(256, env_dict) obs = self.env.reset() for i in range(100): act = self.env.action_space.sample() next_obs, rew, done, _ = self.env.step(act) rb.add(**before_add_func(obs, act, next_obs, rew, done)) if done: obs = self.env.reset() else: obs = next_obs
def __init__(self, max_length, seed_number, env): env_dict = create_env_dict(env) self.before_add = create_before_add_func(env) self.storage = ReplayBuffer(max_length, env_dict)
# q.put(w) # %% run_cell if __name__ == "__main__": ENV_ID = 'pong' THREADS = 4 mp.set_start_method('spawn') device = 'cuda' if torch.cuda.is_available() else 'cpu' # device = 'cpu' params = data.params[ENV_ID] test_env = createEnv(params.env) shape = test_env.observation_space.shape actions = test_env.action_space.n env_dict = create_env_dict(test_env) net = model.DDQN(shape, actions).to(device) net.share_memory() tgt_net = ptan.agent.TargetNet(net) selector = ptan.actions.ArgmaxActionSelector() agent = ptan.agent.DQNAgent(net, selector, device=device, preprocessor=preprocess) optimizer = torch.optim.Adam(net.parameters(), lr=params.lr) done_training = mp.Event() done_training.clear()
# create_buffer_with_helper_func.py # # Create `ReplayBuffer` for non simple space `gym.Env` with helper functions. import gym from cpprb import ReplayBuffer, create_env_dict, create_before_add_func env = gym.make("Blackjack-v0") # https://github.com/openai/gym/blob/master/gym/envs/toy_text/blackjack.py # BlackjackEnv # observation_space: Tuple(Discrete(32),Discrete(11),Discrete(2)) # action_space : Discrete(2) env_dict = create_env_dict(env) # >>> env_dict #{'act': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'done': {'add_shape': array([-1, 1]), 'dtype': numpy.float32, 'shape': 1}, # 'next_obs0': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'next_obs1': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'next_obs2': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'obs0': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'obs1': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'obs2': {'add_shape': array([-1, 1]), 'dtype': numpy.int32, 'shape': 1}, # 'rew': {'add_shape': array([-1, 1]), 'dtype': numpy.float32, 'shape': 1}} rb = ReplayBuffer(256, env_dict)