def run(config): model_path = (Path('./models') / config.env_id / config.model_name / ('run%i' % config.run_num)) if config.incremental is not None: model_path = model_path / 'incremental' / ('model_ep%i.pt' % config.incremental) else: model_path = model_path / 'model.pt' if config.save_gifs: gif_path = model_path.parent / 'gifs' if not config.mixed_policies else model_path.parent / 'gifs_mixed' gif_path.mkdir(exist_ok=True) torch.manual_seed(config.seed) np.random.seed(config.seed) if config.mixed_policies: maddpg = MADDPG.init_from_directory( Path('./models') / config.env_id / config.model_name) else: maddpg = MADDPG.init_from_save(model_path) env = make_env(config.env_id, benchmark=True, discrete_action=maddpg.discrete_action) env.world.seed(config.seed) maddpg.prep_rollouts(device='cpu') ifi = 1 / config.fps # inter-frame interval all_infos = np.empty( (config.n_episodes, config.episode_length, maddpg.nagents, 10)) all_positions = np.zeros( (config.n_episodes, config.episode_length, maddpg.nagents, 2)) for ep_i in range(config.n_episodes): print("Episode %i of %i" % (ep_i + 1, config.n_episodes)) obs = env.reset() if config.save_gifs: frames = [] frames.append(env.render('rgb_array')[0]) env.render('human') for t_i in range(config.episode_length): calc_start = time.time() # rearrange observations to be per agent, and convert to torch Variable torch_obs = [ Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False) if not obs[i].ndim == 4 else Variable(torch.Tensor(obs[i]), requires_grad=False) for i in range(maddpg.nagents) ] all_positions[ep_i, t_i] = env.get_positions() # get actions as torch Variables torch_actions = maddpg.step(torch_obs, explore=False) # convert actions to numpy arrays actions = [ac.data.numpy().flatten() for ac in torch_actions] obs, rewards, dones, infos = env.step(actions) if config.save_gifs: frames.append(env.render('rgb_array')[0]) # frames.append(env.world.viewers[0].render(return_rgb_array = True)) uncomment if local views visible calc_end = time.time() elapsed = calc_end - calc_start if elapsed < ifi: time.sleep(ifi - elapsed) env.render('human') if len(np.array(infos['n']).shape) < 4: all_infos[ep_i, t_i, :, :len(infos['n'][-1])] = np.array(infos['n']) if config.save_gifs: gif_num = 0 while (gif_path / ('%i_%i.gif' % (gif_num, ep_i))).exists(): gif_num += 1 imageio.mimsave(str(gif_path / ('%i_%i.gif' % (gif_num, ep_i))), frames, duration=ifi) env.close() if config.save_stats: stats_path = model_path.parent / 'stats' if not config.mixed_policies else model_path.parent / 'stats_mixed' stats_path.mkdir(exist_ok=True) save(f'{stats_path}/all_infos.npy', all_infos) save(f'{stats_path}/all_positions.npy', all_positions)
def run(config): model_path = (Path('../models') / config.env_id / config.model_name / ('run%i' % config.run_num)) if config.incremental is not None: model_path = model_path / 'incremental' / ('model_ep%i.pt' % config.incremental) else: model_path = model_path / 'model.pt' gif_path = model_path.parent / 'stats' if not config.mixed_policies else model_path.parent / 'stats_mixed' gif_path.mkdir(exist_ok=True) torch.manual_seed(config.seed) np.random.seed(config.seed) if config.mixed_policies: maddpg = MADDPG.init_from_directory( Path('../models') / config.env_id / config.model_name) else: maddpg = MADDPG.init_from_save(model_path) env = make_env(config.env_id, benchmark=True, discrete_action=maddpg.discrete_action) env.seed(config.seed) maddpg.prep_rollouts(device='cpu') ifi = 1 / config.fps # inter-frame interval all_infos = np.empty( (config.n_episodes, config.episode_length, maddpg.nagents, 10)) n_movable_agents = sum([1 if a.movable else 0 for a in env.agents]) n_speaking_agents = sum([0 if a.silent else 1 for a in env.agents]) all_positions = np.zeros((config.n_episodes, config.episode_length, n_movable_agents, env.world.dim_p)) all_communications = np.zeros((config.n_episodes, config.episode_length, n_speaking_agents, env.world.dim_c)) all_actions = np.zeros((config.n_episodes, config.episode_length, len(env.agents), env.world.dim_c)) obs_space = sum([obsp.shape[0] for obsp in env.observation_space]) all_obs = np.zeros((config.n_episodes, config.episode_length, obs_space)) for ep_i in range(config.n_episodes): print("Episode %i of %i" % (ep_i + 1, config.n_episodes)) obs = env.reset() # env.agents[1].state.p_pos = np.array([0., 0.]) for t_i in range(config.episode_length): calc_start = time.time() # rearrange observations to be per agent, and convert to torch Variable torch_obs = [ Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False) if not obs[i].ndim == 4 else Variable(torch.Tensor(obs[i]), requires_grad=False) for i in range(maddpg.nagents) ] all_positions[ep_i, t_i] = env.get_positions() all_communications[ep_i, t_i] = env.get_communications() # get actions as torch Variables torch_actions = maddpg.step(torch_obs, explore=False) # convert actions to numpy arrays actions = [ac.data.numpy().flatten() for ac in torch_actions] # actions[0] = np.array([0., 0., 0., 0., 0.], dtype=np.float32) # actions[0][ep_i] = 1. obs, rewards, dones, infos = env.step(actions) all_actions[ep_i, t_i, :, :] = actions all_obs[ep_i, t_i, :] = np.concatenate(np.asarray(obs)) calc_end = time.time() elapsed = calc_end - calc_start if elapsed < ifi: time.sleep(ifi - elapsed) if len(np.array(infos['n']).shape) < 4: all_infos[ep_i, t_i, :, :len(infos['n'][-1])] = np.array(infos['n']) env.close() if config.save_stats: stats_path = model_path.parent / 'stats' if not config.mixed_policies else model_path.parent / 'stats_mixed' stats_path.mkdir(exist_ok=True) save(f'{stats_path}/all_infos.npy', all_infos) save(f'{stats_path}/all_positions.npy', all_positions) save(f'{stats_path}/all_communications.npy', all_communications) save(f'{stats_path}/all_actions.npy', all_actions) save(f'{stats_path}/all_observations.npy', all_obs)