envs = VecEnv([ make_visual_env('./scenarios/deathmatch_maze.cfg') for i in range(num_envs) ]) else: envs = VecEnv([ make_env(0, './scenarios/deathmatch_maze.cfg') for i in range(num_envs) ]) # Define some actions. Each list entry corresponds to declared buttons: # MOVE_LEFT, MOVE_RIGHT, ATTACK # 5 more combinations are naturally possible but only 3 are included for transparency when watching. # actions = [[True, False, False], [False, True, False], [False, False, True]] actions = range(envs.action_space_shape) episode_num = 0 while True: print('Episode #', episode_num) for j in range(1000): action_array = [choice(actions) for i in range(num_envs)] # print (action_array) obs, reward, done, info = envs.step(action_array) if done: game_vars = envs.get_game_variables(0) print('Kills : ', game_vars[2]) episode_num += 1 break # print ('Reward:', reward) sleep(0.01) envs.reset() sleep(0.1)
while episode_cnt < num_episodes: # sleep(0.01) print(actor_critic.get_probs(Variable(current_obs, volatile=True))) value, action = actor_critic.act(Variable(current_obs, volatile=True), deterministic=True) cpu_actions = action.data.cpu().numpy() print('Action:', [cpu_actions[0]]) # Obser reward and next obs obs, reward, done, _ = envs.step([cpu_actions[0]]) episode_reward += reward[0] if done: total_reward += episode_reward episode_cnt += 1 episode_reward = 0.0 episode_game_variables = envs.get_game_variables(0) if episode_game_variables != None: total_kills += episode_game_variables[2] obs = envs.reset() actor_critic = torch.load( os.path.join(args.load_dir, args.env_name + ".pt")) actor_critic.eval() update_current_obs(obs) print('Avg reward:', round(total_reward / num_episodes)) print('Avg kills:', (total_kills / num_episodes)) envs.close()