Python InteractivePolicy примеры использования

Язык программирования: Python

Пространство имен/Пакет: multiagent.policy

Класс/Тип: InteractivePolicy

Примеров на hotexamples.com: 4

Python InteractivePolicy - 4 примера найдено. Это лучшие примеры Python кода для multiagent.policy.InteractivePolicy, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

InteractivePolicy(4)

action(1)

Основные методы

InteractivePolicy (4)

action (1)

Пример #1

Показать файл

if __name__ == '__main__':
    # parse arguments
    parser = argparse.ArgumentParser(description=None)
    parser.add_argument('-s', '--scenario', default='simple.py', help='Path of the scenario Python script.')
    args = parser.parse_args()

    # load scenario from script
    scenario = scenarios.load(args.scenario).Scenario()
    # create world
    world = scenario.make_world()
    # create multiagent environment
    env = MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation, info_callback=None, shared_viewer = False)
    # render call to create viewer window (necessary only for interactive policies)
    env.render()
    # create interactive policies for each agent
    policies = [InteractivePolicy(env,i) for i in range(env.n)]
    # execution loop
    obs_n = env.reset()
    while True:
        # query for action from each agent's policy
        act_n = []
        for i, policy in enumerate(policies):
            act_n.append(policy.action(obs_n[i]))
        # step environment
        obs_n, reward_n, done_n, _ = env.step(act_n)
        # render all agent views
        env.render()
        # display rewards
        #for agent in env.world.agents:
        #    print(agent.name + " reward: %0.3f" % env._get_reward(agent))

Пример #2

Показать файл

def evaluate(config):
    DirectoryManager.root = Path(config.root)

    if config.seed_num is None:
        all_seeds = list((DirectoryManager.root / config.storage_name /
                          f"experiment{config.experiment_num}").iterdir())
        config.seed_num = all_seeds[0].stem.strip('seed')

    # Creates paths and directories

    seed_path = DirectoryManager.root / config.storage_name / f"experiment{config.experiment_num}" / f"seed{config.seed_num}"
    dir_manager = DirectoryManager.init_from_seed_path(seed_path)
    if config.incremental is not None:
        model_path = dir_manager.incrementals_dir / (
            f'model_ep{config.incremental}.pt')
    elif config.last_model:
        last_models = [
            path for path in dir_manager.seed_dir.iterdir()
            if path.suffix == ".pt" and not path.stem.endswith('best')
        ]
        assert len(last_models) == 1
        model_path = last_models[0]
    else:
        best_models = [
            path for path in dir_manager.seed_dir.iterdir()
            if path.suffix == ".pt" and path.stem.endswith('best')
        ]
        assert len(best_models) == 1
        model_path = best_models[0]

    # Retrieves world_params if there were any (see make_world function in multiagent.scenarios)
    if (dir_manager.seed_dir / 'world_params.json').exists():
        world_params = load_dict_from_json(
            str(dir_manager.seed_dir / 'world_params.json'))
    else:
        world_params = {}

    # Overwrites world_params if specified
    if config.shuffle_landmarks is not None:
        world_params['shuffle_landmarks'] = config.shuffle_landmarks

    if config.color_objects is not None:
        world_params['color_objects'] = config.color_objects

    if config.small_agents is not None:
        world_params['small_agents'] = config.small_agents

    if config.individual_reward is not None:
        world_params['individual_reward'] = config.individual_reward

    if config.use_dense_rewards is not None:
        world_params['use_dense_rewards'] = config.use_dense_rewards

    # Retrieves env_params (see multiagent.environment.MultiAgentEnv)
    if (dir_manager.seed_dir / 'env_params.json').exists():
        env_params = load_dict_from_json(
            str(dir_manager.seed_dir / 'env_params.json'))
    else:
        env_params = {}
        env_params['use_max_speed'] = False

    # Initializes model and environment
    set_seeds(config.rollout_seed)
    algorithm = init_from_save(model_path)
    env = make_env(scenario_name=env_params['env_name'],
                   use_discrete_action=algorithm.use_discrete_action,
                   use_max_speed=env_params['use_max_speed'],
                   world_params=world_params)
    if config.render:
        env.render()

    if config.runner_prey:
        # makes sure the environment involves a prey
        assert config.env_name.endswith('tag')
        runner_policy = RunnerPolicy()

        for agent in env.world.agents:
            if agent.adversary:
                agent.action_callback = runner_policy.action

    if config.rusher_predators:
        # makes sure the environment involves predators
        assert config.env_name.endswith('tag')
        rusher_policy = RusherPolicy()

        for agent in env.world.agents:
            if not agent.adversary:
                agent.action_callback = rusher_policy.action

    if config.pendulum_agent is not None:
        # makes sure the agent to be controlled has a valid id
        assert config.pendulum_agent in list(range(len(env.world.agents)))

        pendulum_policy = DoublePendulumPolicy()
        env.world.agents[
            config.pendulum_agent].action_callback = pendulum_policy.action

    if config.interactive_agent is not None:
        # makes sure the agent to be controlled has a valid id
        assert config.interactive_agent in list(range(len(env.world.agents)))

        interactive_policy = InteractivePolicy(env, viewer_id=0)
        env.world.agents[
            config.
            interactive_agent].action_callback = interactive_policy.action

    algorithm.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval
    total_reward = []

    # EPISODES LOOP
    for ep_i in range(config.n_episodes):
        ep_recorder = EpisodeRecorder(stuff_to_record=['reward'])

        # Resets the environment
        obs = env.reset()

        if config.save_gifs:
            frames = [] if ep_i == 0 else frames
            frames.append(env.render('rgb_array')[0])
        if config.render:
            env.render('human')

        if not algorithm.soft:
            # Resets exploration noise
            algorithm.scale_noise(config.noise_scale)
            algorithm.reset_noise()

        # STEPS LOOP
        for t_i in range(config.episode_length):
            calc_start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False)
                for i in range(algorithm.nagents)
            ]
            # get actions as torch Variables
            torch_actions = algorithm.select_action(
                torch_obs,
                is_exploring=False if config.noise_scale is None else True)
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            # steps forward in the environment
            obs, rewards, done, infos = env.step(actions)
            ep_recorder.add_step(None, None, rewards, None)

            # record frames
            if config.save_gifs:
                frames.append(env.render('rgb_array')[0])

            if config.render or config.save_gifs:
                # Enforces the fps config
                calc_end = time.time()
                elapsed = calc_end - calc_start
                if elapsed < ifi:
                    time.sleep(ifi - elapsed)
                env.render('human', close=False)

            if done and config.interrupt_episode:
                if config.render:
                    time.sleep(2)
                break

        total_reward.append(ep_recorder.get_total_reward())

    # Saves gif of all the episodes
    if config.save_gifs:
        gif_path = dir_manager.storage_dir / 'gifs'
        gif_path.mkdir(exist_ok=True)

        gif_num = 0
        while (gif_path /
               f"{env_params['env_name']}__experiment{config.experiment_num}_seed{config.seed_num}_{gif_num}.gif"
               ).exists():
            gif_num += 1
        imageio.mimsave(str(
            gif_path /
            f"{env_params['env_name']}__experiment{config.experiment_num}_seed{config.seed_num}_{gif_num}.gif"
        ),
                        frames,
                        duration=ifi)
    env.close()

    return total_reward

Пример #3

Показать файл

def play(arglist):
    with U.single_threaded_session():
        # Create environment
        env = make_env(arglist.scenario, arglist, arglist.benchmark)
        # Create agent trainers
        obs_shape_n = [env.observation_space[i].shape for i in range(env.n)]
        num_adversaries = min(env.n, arglist.num_adversaries)
        trainers = get_trainers(env, num_adversaries, obs_shape_n, arglist)
        print('Using good policy {} and adv policy {}'.format(
            arglist.good_policy, arglist.adv_policy))

        # Initialize
        U.initialize()

        # Load previous results, if necessary
        if arglist.load_dir == "":
            arglist.load_dir = arglist.save_dir
        if arglist.display or arglist.restore or arglist.benchmark:
            print('Loading previous state...')
            U.load_state(arglist.load_dir)

        episode_rewards = [0.0]  # sum of rewards for all agents
        agent_rewards = [[0.0]
                         for _ in range(env.n)]  # individual agent reward
        final_ep_rewards = []  # sum of rewards for training curve
        final_ep_ag_rewards = []  # agent rewards for training curve
        agent_info = [[[]]]  # placeholder for benchmarking info
        saver = tf.train.Saver()
        obs_n = env.reset()
        episode_step = 0
        train_step = 0
        t_start = time.time()

        print('Starting iterations...')

        # create world
        world = scenario.make_world()
        # create multiagent environment
        env = MultiAgentEnv(world,
                            scenario.reset_world,
                            scenario.reward,
                            scenario.observation,
                            info_callback=None,
                            shared_viewer=True)
        env.window_pos = 'right'
        # render call to create viewer window (necessary only for interactive policies)
        env.render()
        # create interactive policies for one agent
        policy = InteractivePolicy(env, -1)
        # execution loop
        obs_n = env.reset()
        while True:
            # query for action from each agent's policy
            act_n = [agent.action(obs)
                     for agent, obs in zip(trainers, obs_n)]  # trained policy
            act_n[-1] = policy.action(obs_n[-1])  # interactive keyboard policy
            # step environment
            new_obs_n, reward_n, done_n, _ = env.step(act_n)
            episode_step += 1
            done = all(done_n)
            terminal = (episode_step >= arglist.max_episode_len)

            # collect experience
            for i, agent in enumerate(trainers):
                agent.experience(obs_n[i], act_n[i], reward_n[i], new_obs_n[i],
                                 done_n[i], terminal)
            obs_n = new_obs_n

            for i, rew in enumerate(reward_n):
                episode_rewards[-1] += rew
                agent_rewards[i][-1] += rew

            if done or terminal:
                obs_n = env.reset()
                episode_step = 0
                episode_rewards.append(0)
                for a in agent_rewards:
                    a.append(0)
                agent_info.append([[]])

            # increment global step counter
            train_step += 1

            # for game over
            try:
                if scenario.game_over:
                    sys.exit(0)
            except AttributeError:
                pass

            # for benchmarking learned policies
            if arglist.benchmark:
                for i, info in enumerate(_):
                    agent_info[-1][i].append(_['n'])
                if train_step > arglist.benchmark_iters and (done or terminal):
                    file_name = arglist.benchmark_dir + arglist.exp_name + '.pkl'
                    print('Finished benchmarking, now saving...')
                    with open(file_name, 'wb') as fp:
                        pickle.dump(agent_info[:-1], fp)
                    break
                continue

            # render all agent views
            time.sleep(arglist.delay)
            env.render()
            # display rewards
            for agent in env.world.agents:
                pass  # print(agent.name + " reward: %0.3f" % env._get_reward(agent))

            # update all trainers, if not in display or benchmark mode
            loss = None
            for agent in trainers:
                agent.preupdate()
            for agent in trainers:
                loss = agent.update(trainers, train_step)

            # save model, display training output
            if terminal and (len(episode_rewards) % arglist.save_rate == 0):
                U.save_state(arglist.save_dir, saver=saver)
                # print statement depends on whether or not there are adversaries
                if num_adversaries == 0:
                    print(
                        "steps: {}, episodes: {}, mean episode reward: {}, time: {}"
                        .format(train_step, len(episode_rewards),
                                np.mean(episode_rewards[-arglist.save_rate:]),
                                round(time.time() - t_start, 3)))
                else:
                    print(
                        "steps: {}, episodes: {}, mean episode reward: {}, agent episode reward: {}, time: {}"
                        .format(train_step, len(episode_rewards),
                                np.mean(episode_rewards[-arglist.save_rate:]),
                                [
                                    np.mean(rew[-arglist.save_rate:])
                                    for rew in agent_rewards
                                ], round(time.time() - t_start, 3)))
                t_start = time.time()
                # Keep track of final episode reward
                final_ep_rewards.append(
                    np.mean(episode_rewards[-arglist.save_rate:]))
                for rew in agent_rewards:
                    final_ep_ag_rewards.append(
                        np.mean(rew[-arglist.save_rate:]))

            # saves final episode reward for plotting training curve later
            if len(episode_rewards) > arglist.num_episodes:
                U.save_state(arglist.plots_dir, saver=saver)
                rew_file_name = arglist.plots_dir + arglist.exp_name + '_rewards.pkl'
                with open(rew_file_name, 'wb') as fp:
                    pickle.dump(final_ep_rewards, fp)
                agrew_file_name = arglist.plots_dir + arglist.exp_name + '_agrewards.pkl'
                with open(agrew_file_name, 'wb') as fp:
                    pickle.dump(final_ep_ag_rewards, fp)
                print('...Finished total of {} episodes.'.format(
                    len(episode_rewards)))
                break

Пример #4

Показать файл

Файл: evaluate_coach.py Проект: yuanleirl/coordination-marl

def evaluate(config):
    if config.seed_num is None:
        all_seeds = list((DirectoryManager.root / config.storage_name /
                          f"experiment{config.experiment_num}").iterdir())
        config.seed_num = all_seeds[0].stem.strip('seed')

    # Creates paths and directories

    seed_path = DirectoryManager.root / config.storage_name / f"experiment{config.experiment_num}" / f"seed{config.seed_num}"
    dir_manager = DirectoryManager.init_from_seed_path(seed_path)
    if config.incremental is not None:
        model_path = dir_manager.incrementals_dir / (
            f'model_ep{config.incremental}.pt')
    elif config.last_model:
        last_models = [
            path for path in dir_manager.seed_dir.iterdir()
            if path.suffix == ".pt" and not path.stem.endswith('best')
        ]
        assert len(last_models) == 1
        model_path = last_models[0]
    else:
        best_models = [
            path for path in dir_manager.seed_dir.iterdir()
            if path.suffix == ".pt" and path.stem.endswith('best')
        ]
        assert len(best_models) == 1
        model_path = best_models[0]

    # Retrieves world_params if there were any (see make_world function in multiagent.scenarios)
    if (dir_manager.seed_dir / 'world_params.json').exists():
        world_params = load_dict_from_json(
            str(dir_manager.seed_dir / 'world_params.json'))
    else:
        world_params = {}

    # Overwrites world_params if specified
    if config.shuffle_landmarks is not None:
        world_params['shuffle_landmarks'] = config.shuffle_landmarks

    if config.color_objects is not None:
        world_params['color_objects'] = config.color_objects

    if config.small_agents is not None:
        world_params['small_agents'] = config.small_agents

    if config.individual_reward is not None:
        world_params['individual_reward'] = config.individual_reward

    if config.use_dense_rewards is not None:
        world_params['use_dense_rewards'] = config.use_dense_rewards

    # Retrieves env_params (see multiagent.environment.MultiAgentEnv)
    if (dir_manager.seed_dir / 'env_params.json').exists():
        env_params = load_dict_from_json(
            str(dir_manager.seed_dir / 'env_params.json'))
    else:
        env_params = {}
        env_params['use_max_speed'] = False

    # Initializes model and environment
    algorithm = init_from_save(model_path)
    env = make_env(scenario_name=env_params['env_name'],
                   use_discrete_action=algorithm.use_discrete_action,
                   use_max_speed=env_params['use_max_speed'],
                   world_params=world_params)

    if config.render:
        env.render()

    if config.runner_prey:
        # makes sure the environment involves a prey
        assert config.env_name.endswith('tag')
        runner_policy = RunnerPolicy()

        for agent in env.world.agents:
            if agent.adversary:
                agent.action_callback = runner_policy.action

    if config.rusher_predators:
        # makes sure the environment involves predators
        assert config.env_name.endswith('tag')
        rusher_policy = RusherPolicy()

        for agent in env.world.agents:
            if not agent.adversary:
                agent.action_callback = rusher_policy.action

    if config.pendulum_agent is not None:
        # makes sure the agent to be controlled has a valid id
        assert config.pendulum_agent in list(range(len(env.world.agents)))

        pendulum_policy = DoublePendulumPolicy()
        env.world.agents[
            config.pendulum_agent].action_callback = pendulum_policy.action

    if config.interactive_agent is not None:
        # makes sure the agent to be controlled has a valid id
        assert config.interactive_agent in list(range(len(env.world.agents)))

        interactive_policy = InteractivePolicy(env, viewer_id=0)
        env.world.agents[
            config.
            interactive_agent].action_callback = interactive_policy.action

    algorithm.prep_rollouts(device='cpu')
    ifi = 1 / config.fps  # inter-frame interval
    total_reward = []
    all_episodes_agent_embeddings = []
    all_episodes_coach_embeddings = []
    all_trajs = []

    overide_color = None

    color_agents = True

    if env_params['env_name'] == 'bounce':
        env.agents[0].size = 1. * env.agents[0].size
        env.world.overwrite = config.overwrite
    elif env_params['env_name'] == 'spread':
        color_agents = False
    elif env_params['env_name'] == 'compromise':
        env.agents[0].lightness = 0.9
        env.world.landmarks[0].lightness = 0.9
        env.agents[1].lightness = 0.5
        env.world.landmarks[1].lightness = 0.5
        # cmo = plt.cm.get_cmap('viridis')
        env.world.overwrite = config.overwrite
        # overide_color = [np.array(cmo(float(i) / float(2))[:3]) for i in range(2)]

    # set_seeds_env(2, env)
    # EPISODES LOOP
    for ep_i in range(config.n_episodes):
        # set_seeds(2)
        # set_seeds_env(2, env)
        agent_embeddings = []
        coach_embeddings = []
        traj = []
        ep_recorder = EpisodeRecorder(stuff_to_record=['reward'])

        # Resets the environment
        obs = env.reset()

        if config.save_gifs:
            frames = None
        if config.render:
            env.render('human')

        if not algorithm.soft:
            # Resets exploration noise
            algorithm.scale_noise(config.noise_scale)
            algorithm.reset_noise()

        # STEPS LOOP
        for t_i in range(config.episode_length):
            calc_start = time.time()
            # rearrange observations to be per agent, and convert to torch Variable
            torch_obs = [
                Variable(torch.Tensor(obs[i]).view(1, -1), requires_grad=False)
                for i in range(algorithm.nagents)
            ]
            # get actions as torch Variables
            torch_actions, torch_embed = algorithm.select_action(
                torch_obs,
                is_exploring=False if config.noise_scale is None else True,
                return_embed=True)
            torch_total_obs = torch.cat(torch_obs, dim=-1)
            coach_embed = onehot_from_logits(
                algorithm.coach.model(torch_total_obs))
            coach_embeddings.append(coach_embed.data.numpy().squeeze())
            # convert actions to numpy arrays
            actions = [ac.data.numpy().flatten() for ac in torch_actions]
            embeds = [emb.data.numpy().squeeze() for emb in torch_embed]
            agent_embeddings.append(embeds)
            # steps forward in the environment
            next_obs, rewards, dones, infos = env.step(actions)
            ep_recorder.add_step(None, None, rewards, None)
            traj.append((obs, actions, next_obs, rewards, dones))
            obs = next_obs
            colors = list(cm.get_cmap('Set1').colors[:len(embeds[0])])
            if overide_color is not None:
                colors[0] = overide_color[0]
                colors[2] = overide_color[1]
            if color_agents:
                for agent, emb in zip(env.agents, embeds):
                    agent.color = colors[np.argmax(emb)]

            # record frames
            if config.save_gifs:
                frames = [] if frames is None else frames
                frames.append(env.render('rgb_array')[0])

            if config.render or config.save_gifs:
                # Enforces the fps config
                calc_end = time.time()
                elapsed = calc_end - calc_start
                if elapsed < ifi:
                    time.sleep(ifi - elapsed)
                env.render('human')

            if all(dones) and config.interrupt_episode:
                if config.render:
                    time.sleep(2)
                break

        # print(ep_recorder.get_total_reward())
        total_reward.append(ep_recorder.get_total_reward())
        all_episodes_agent_embeddings.append(agent_embeddings)
        all_episodes_coach_embeddings.append(coach_embeddings)
        all_trajs.append(traj)

    # Saves gif of all the episodes
    if config.save_gifs:
        gif_path = dir_manager.storage_dir / 'gifs'
        gif_path.mkdir(exist_ok=True)

        gif_num = 0
        while (gif_path /
               f"{env_params['env_name']}__experiment{config.experiment_num}_seed{config.seed_num}_{gif_num}.gif"
               ).exists():
            gif_num += 1
        imageio.mimsave(str(
            gif_path /
            f"{env_params['env_name']}__experiment{config.experiment_num}_seed{config.seed_num}_{gif_num}.gif"
        ),
                        frames,
                        duration=ifi)
    env.close()

    embeddings = {
        'agents': all_episodes_agent_embeddings,
        'coach': all_episodes_coach_embeddings
    }

    save_folder = dir_manager.experiment_dir if config.save_to_exp_folder else dir_manager.seed_dir
    embeddings_path = U.directory_tree.uniquify(
        save_folder / f"{config.file_name_to_save}.pkl")
    trajs_path = osp.splitext(embeddings_path)[0] + "_trajs.pkl"

    with open(embeddings_path, 'wb') as fp:
        pickle.dump(embeddings, fp)
        fp.close()

    with open(trajs_path, 'wb') as fp:
        pickle.dump(all_trajs, fp)
        fp.close()

    return total_reward, str(embeddings_path)