def displayImage(image, step, reward, value): clear_output(True) title = "step " + str(step) + " reward: " + str(reward) + " Value: " + str(value[0][0]) plt.title(title) im.set_data(image) fig.canvas.draw() plt.pause(0.1) # init environment env = MiniPacman(mode=mode, frame_cap=1000) # load model agentPath = "actor_critic_pacman_" + mode actor_critic = ActorCritic(env.observation_space.shape, env.action_space.n) pretrained_dict = torch.load(agentPath) actor_critic.load_state_dict(pretrained_dict) if USE_CUDA: actor_critic = actor_critic.cuda() # init game done = False state = env.reset() total_reward = 0 step = 1 #while not done: while True: current_state = torch.FloatTensor(state).unsqueeze(0)
if USE_CUDA: actor_critic = actor_critic.cuda() rollout.cuda() rollout.states[0].copy_(state) episode_rewards = torch.zeros(num_envs, 1) final_rewards = torch.zeros(num_envs, 1) writer = new_writer(LABEL, arg) a2c_model_path = './trained_models/tmp_a2c_{}_{}'.format( arg.mode, arg.global_seed) if os.path.exists(a2c_model_path): print('Load A2C model from ', a2c_model_path) actor_critic.load_state_dict(torch.load(a2c_model_path)) else: print('Start training A2C model') for i_update in tqdm(range(arg.num_frames)): for step in range(num_steps): action = actor_critic.act(state.cuda()) next_state, reward, done, _ = envs.step( action.squeeze(1).cpu().data.numpy()) reward = process_reward(reward, MODE_REWARDS[mode]) reward = torch.FloatTensor(reward).unsqueeze(1) episode_rewards += reward masks = torch.FloatTensor(1 - np.array(done)).unsqueeze(1) final_rewards *= masks