def run_agent(player: MarioPlayer, env: Wrapper, record: bool, vids_path: str, index): if record: rec_output_path = os.path.join(vids_path, "{name}.mp4".format(name=index)) rec = monitor.video_recorder.VideoRecorder(env, path=rec_output_path) state = env.reset() done = False for step in range(steps_limit): if done: break action = player.act(state) state, reward, done, info = env.step(action) env.render() if record: rec.capture_frame() player.update_info(info) player.update_reward(reward) if info['flag_get']: # if got to the flag - run is ended. done = True if record: rec.close() player.calculate_fitness() outcome = player.get_run_info() outcome['index'] = index return outcome
def _test(id: int, env: gym.Wrapper, model: TD3Network, render: bool = False, recording_path=None, save_video=False): episode_rewards = [] action_repeats = [] state = env.reset() done = False episode_images = [] while not done: # get action state = torch.FloatTensor(state).unsqueeze(0) action = model.actor(state) repeat_q = model.critic_1(state, action) repeat_idx = repeat_q.argmax(1).item() action = action.data.cpu().numpy()[0] repeat = model.action_repeats[repeat_idx] action_repeats.append(repeat) for _ in range(repeat): if render: if save_video: img = env.render(mode='rgb_array') episode_images.append(img) else: env.render(mode='human') # step state, reward, done, info = env.step(action) episode_rewards.append(reward) if done: break if render and save_video: write_gif(episode_images, action_repeats, episode_rewards, os.path.join(recording_path, 'ep_{}.gif'.format(id))) return sum(episode_rewards), action_repeats