def evaluate_actor_critic(params, path): model = ActorCritic(params.stack_size, get_action_space()) model.load_state_dict(torch.load(path)) model.eval() env = gym.make('CarRacing-v0') env_wrapper = EnvironmentWrapper(env, params.stack_size) total_reward = 0 num_of_episodes = 100 for episode in range(num_of_episodes): state = env_wrapper.reset() state = torch.Tensor([state]) done = False score = 0 while not done: probs, _, _ = model(state) action = get_actions(probs) state, reward, done = env_wrapper.step(action[0]) print(probs.detach().numpy(), "\n", action, reward) state = torch.Tensor([state]) score += reward env_wrapper.render() print('Episode: {0} Score: {1:.2f}'.format(episode, score)) total_reward += score return total_reward / num_of_episodes
def __init__(self, params, model_path): self.params = params self.model_path = model_path self.num_of_processes = mp.cpu_count() self.global_model = ActorCritic(self.params.stack_size, get_action_space()) self.global_model.share_memory()
def __init__(self, process_num, global_model, params, autosave=False): # CHANGE super().__init__() self.process_num = process_num self.global_model = global_model self.params = params env = gym.make('CarRacing-v0') self.environment = EnvironmentWrapper(env, self.params.stack_size) self.model = ActorCritic(self.params.stack_size, get_action_space()) self.optimizer = Adam(self.global_model.parameters(), lr=self.params.lr) self.storage = Storage(self.params.steps_per_update) self.current_observation = torch.zeros( 1, *self.environment.get_state_shape()) #NEW: self.lr = self.params.lr self.autosave = autosave self.log_loss = [] self.log_tmp = np.array([]) self.log_reward = np.array([])
def __init__(self, process_num, global_model, params): super().__init__() self.process_num = process_num self.global_model = global_model self.params = params env = gym.make('CarRacing-v0') self.environment = EnvironmentWrapper(env, self.params.stack_size) self.model = ActorCritic(self.params.stack_size, get_action_space()) self.optimizer = Adam(self.global_model.parameters(), lr=self.params.lr) self.storage = Storage(self.params.steps_per_update) self.current_observation = torch.zeros( 1, *self.environment.get_state_shape())
def __init__(self, params, model_path): self.params = params self.model_path = model_path self.num_of_processes = multiprocessing.cpu_count() self.parallel_environments = ParallelEnvironments( self.params.stack_size, number_of_processes=self.num_of_processes) self.actor_critic = ActorCritic(self.params.stack_size, get_action_space()) self.optimizer = Adam(self.actor_critic.parameters(), lr=self.params.lr) self.storage = Storage(self.params.steps_per_update, self.num_of_processes) self.current_observations = torch.zeros( self.num_of_processes, *self.parallel_environments.get_state_shape())
def actor_critic_inference(params, path): model = ActorCritic(params.stack_size, get_action_space()) model.load_state_dict(torch.load(path)) model.eval() env = gym.make('CarRacing-v0') env_wrapper = EnvironmentWrapper(env, params.stack_size) state = env_wrapper.reset() state = torch.Tensor([state]) done = False total_score = 0 while not done: probs, _, _ = model(state) action = get_actions(probs) print(action) state, reward, done = env_wrapper.step(action[0]) state = torch.Tensor([state]) total_score += reward env_wrapper.render() return total_score