Пример #1
0
def experiment(variant):
    num_agent = variant['num_agent']
    from differential_game import DifferentialGame
    expl_env = DifferentialGame(game_name=args.exp_name)
    eval_env = DifferentialGame(game_name=args.exp_name)
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    from rlkit.torch.networks.graph_builders import FullGraphBuilder
    graph_builder_1 = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    from rlkit.torch.networks.graph_context_network import GraphContextNet
    cg1 = GraphContextNet(
        graph_builder_1,
        obs_dim,
        action_dim,
        use_attention=variant['graph_kwargs']['use_attention'],
        num_layer=variant['graph_kwargs']['num_layer'],
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    target_cg1 = copy.deepcopy(cg1)

    graph_builder_2 = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    cg2 = GraphContextNet(
        graph_builder_2,
        obs_dim,
        action_dim,
        use_attention=variant['graph_kwargs']['use_attention'],
        num_layer=variant['graph_kwargs']['num_layer'],
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    target_cg2 = copy.deepcopy(cg2)

    graph_builder_ca = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    cgca = GraphContextNet(
        graph_builder_ca,
        obs_dim,
        action_dim,
        use_attention=variant['graph_kwargs']['use_attention'],
        num_layer=variant['graph_kwargs']['num_layer'],
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )

    policy_n, expl_policy_n, eval_policy_n = [], [], []
    qf1_n, target_qf1_n, qf2_n, target_qf2_n = [], [], [], []
    cactor_n = []
    for i in range(num_agent):
        from rlkit.torch.networks.networks import FlattenMlp
        qf1 = FlattenMlp(
            input_size=variant['graph_kwargs']['hidden_dim'] + action_dim,
            output_size=1,
            hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
            (variant['qf_kwargs']['num_layer'] - 1),
        )
        target_qf1 = copy.deepcopy(qf1)
        qf2 = FlattenMlp(
            input_size=variant['graph_kwargs']['hidden_dim'] + action_dim,
            output_size=1,
            hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
            (variant['qf_kwargs']['num_layer'] - 1),
        )
        target_qf2 = copy.deepcopy(qf2)

        from rlkit.torch.networks.layers import SplitLayer
        cactor = nn.Sequential(
            FlattenMlp(
                input_size=variant['graph_kwargs']['hidden_dim'],
                output_size=variant['cactor_kwargs']['hidden_dim'],
                hidden_sizes=[variant['cactor_kwargs']['hidden_dim']] *
                (variant['cactor_kwargs']['num_layer'] - 1),
            ), nn.ReLU(),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
        cactor = TanhGaussianPolicy(module=cactor)

        policy = nn.Sequential(
            FlattenMlp(
                input_size=obs_dim,
                output_size=variant['policy_kwargs']['hidden_dim'],
                hidden_sizes=[variant['policy_kwargs']['hidden_dim']] *
                (variant['policy_kwargs']['num_layer'] - 1),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        expl_policy = policy

        policy_n.append(policy)
        expl_policy_n.append(expl_policy)
        eval_policy_n.append(eval_policy)
        qf1_n.append(qf1)
        target_qf1_n.append(target_qf1)
        qf2_n.append(qf2)
        target_qf2_n.append(target_qf2)
        cactor_n.append(cactor)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.r2g.r2g_gnn4 import R2GGNNTrainer
    trainer = R2GGNNTrainer(env=expl_env,
                            cg1=cg1,
                            target_cg1=target_cg1,
                            qf1_n=qf1_n,
                            target_qf1_n=target_qf1_n,
                            cg2=cg2,
                            target_cg2=target_cg2,
                            qf2_n=qf2_n,
                            target_qf2_n=target_qf2_n,
                            cgca=cgca,
                            cactor_n=cactor_n,
                            policy_n=policy_n,
                            **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #2
0
def experiment(variant):
    import sys
    sys.path.append("./multiagent-particle-envs")
    from make_env import make_env
    from particle_env_wrapper import ParticleEnv
    expl_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    eval_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    num_agent = expl_env.num_agent
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    from rlkit.torch.networks.graph_builders import FullGraphBuilder
    graph_builder_1 = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    from rlkit.torch.networks.graph_context_network import GraphContextNet
    cg1 = GraphContextNet(graph_builder_1,
                          obs_dim,
                          action_dim,
                          output_activation='lrelu0.2',
                          **variant['graph_kwargs'])
    target_cg1 = copy.deepcopy(cg1)
    from rlkit.torch.networks.networks import FlattenMlp
    qf1 = FlattenMlp(
        input_size=variant['graph_kwargs']['node_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
        hidden_activation=nn.LeakyReLU(negative_slope=0.2),
    )
    target_qf1 = copy.deepcopy(qf1)

    graph_builder_2 = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    cg2 = GraphContextNet(graph_builder_2,
                          obs_dim,
                          action_dim,
                          output_activation='lrelu0.2',
                          **variant['graph_kwargs'])
    target_cg2 = copy.deepcopy(cg2)
    qf2 = FlattenMlp(
        input_size=variant['graph_kwargs']['node_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
        hidden_activation=nn.LeakyReLU(negative_slope=0.2),
    )
    target_qf2 = copy.deepcopy(qf2)

    graph_builder_ca = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    from rlkit.torch.networks.gnn_networks import GNNNet
    cgca = GNNNet(
        pre_graph_builder=graph_builder_ca,
        node_dim=variant['graph_kwargs']['node_dim'],
        conv_type='GSage',
        num_conv_layers=variant['graph_kwargs']['num_layer'],
        hidden_activation='lrelu0.2',
        output_activation='lrelu0.2',
    )
    from rlkit.torch.networks.layers import SplitLayer
    from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
    cactor = nn.Sequential(
        FlattenMlp(
            input_size=variant['graph_kwargs']['node_dim'],
            output_size=variant['cactor_kwargs']['hidden_dim'],
            hidden_sizes=[variant['cactor_kwargs']['hidden_dim']] *
            (variant['cactor_kwargs']['num_layer'] - 1),
            hidden_activation=nn.LeakyReLU(negative_slope=0.2),
            output_activation=nn.LeakyReLU(negative_slope=0.2),
        ), nn.LeakyReLU(negative_slope=0.2),
        SplitLayer(layers=[
            nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
            nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
        ]))
    cactor = TanhGaussianPolicy(module=cactor)

    policy_n, expl_policy_n, eval_policy_n = [], [], []
    for i in range(num_agent):
        policy = nn.Sequential(
            FlattenMlp(
                input_size=obs_dim,
                output_size=variant['policy_kwargs']['hidden_dim'],
                hidden_sizes=[variant['policy_kwargs']['hidden_dim']] *
                (variant['policy_kwargs']['num_layer'] - 1),
                hidden_activation=nn.LeakyReLU(negative_slope=0.2),
                output_activation=nn.LeakyReLU(negative_slope=0.2),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        expl_policy = policy

        policy_n.append(policy)
        expl_policy_n.append(expl_policy)
        eval_policy_n.append(eval_policy)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.r2g.r2g_gnn3 import R2GGNNTrainer
    trainer = R2GGNNTrainer(env=expl_env,
                            cg1=cg1,
                            target_cg1=target_cg1,
                            qf1=qf1,
                            target_qf1=target_qf1,
                            cg2=cg2,
                            target_cg2=target_cg2,
                            qf2=qf2,
                            target_qf2=target_qf2,
                            cgca=cgca,
                            cactor=cactor,
                            policy_n=policy_n,
                            **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    # save init params
    from rlkit.core import logger
    snapshot = algorithm._get_snapshot()
    file_name = osp.join(logger._snapshot_dir, 'itr_-1.pkl')
    torch.save(snapshot, file_name)

    algorithm.train()
Пример #3
0
def experiment(variant):
    from multi_differential_game import MultiDifferentialGame
    expl_env = MultiDifferentialGame(**variant['env_kwargs'])
    eval_env = MultiDifferentialGame(**variant['env_kwargs'])
    num_agent = expl_env.agent_num
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    policy_n, expl_policy_n, eval_policy_n = [], [], []
    cg1_n, target_cg1_n, cg2_n, target_cg2_n = [], [], [], []
    qf1_n, target_qf1_n, qf2_n, target_qf2_n = [], [], [], []
    cgca_n, cactor_n = [], []
    for i in range(num_agent):
        from rlkit.torch.networks.graph_builders import FullGraphBuilder
        graph_builder_1 = FullGraphBuilder(
            input_node_dim=obs_dim + action_dim,
            num_node=num_agent,
            batch_size=variant['algorithm_kwargs']['batch_size'],
            contain_self_loop=False)
        from rlkit.torch.networks.graph_context_network import GraphContextNet
        cg1 = GraphContextNet(graph_builder_1,
                              obs_dim,
                              action_dim,
                              output_activation='lrelu0.2',
                              **variant['graph_kwargs'])
        target_cg1 = copy.deepcopy(cg1)

        from rlkit.torch.networks.networks import FlattenMlp
        qf1 = FlattenMlp(
            input_size=variant['graph_kwargs']['node_dim'] + action_dim,
            output_size=1,
            hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
            (variant['qf_kwargs']['num_layer'] - 1),
            hidden_activation=nn.LeakyReLU(negative_slope=0.2),
        )
        target_qf1 = copy.deepcopy(qf1)

        graph_builder_2 = FullGraphBuilder(
            input_node_dim=obs_dim + action_dim,
            num_node=num_agent,
            batch_size=variant['algorithm_kwargs']['batch_size'],
            contain_self_loop=False)
        cg2 = GraphContextNet(graph_builder_2,
                              obs_dim,
                              action_dim,
                              output_activation='lrelu0.2',
                              **variant['graph_kwargs'])
        target_cg2 = copy.deepcopy(cg2)

        qf2 = FlattenMlp(
            input_size=variant['graph_kwargs']['node_dim'] + action_dim,
            output_size=1,
            hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
            (variant['qf_kwargs']['num_layer'] - 1),
            hidden_activation=nn.LeakyReLU(negative_slope=0.2),
        )
        target_qf2 = copy.deepcopy(qf2)

        graph_builder_ca = FullGraphBuilder(
            input_node_dim=obs_dim + action_dim,
            num_node=num_agent,
            batch_size=variant['algorithm_kwargs']['batch_size'],
            contain_self_loop=False)
        cgca = GraphContextNet(graph_builder_ca,
                               obs_dim,
                               action_dim,
                               output_activation='lrelu0.2',
                               **variant['graph_kwargs'])

        from rlkit.torch.networks.layers import SplitLayer
        cactor = nn.Sequential(
            FlattenMlp(
                input_size=variant['graph_kwargs']['node_dim'],
                output_size=variant['cactor_kwargs']['hidden_dim'],
                hidden_sizes=[variant['cactor_kwargs']['hidden_dim']] *
                (variant['cactor_kwargs']['num_layer'] - 1),
                hidden_activation=nn.LeakyReLU(negative_slope=0.2),
                output_activation=nn.LeakyReLU(negative_slope=0.2),
            ), nn.ReLU(),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
        cactor = TanhGaussianPolicy(module=cactor)

        policy = nn.Sequential(
            FlattenMlp(
                input_size=obs_dim,
                output_size=variant['policy_kwargs']['hidden_dim'],
                hidden_sizes=[variant['policy_kwargs']['hidden_dim']] *
                (variant['policy_kwargs']['num_layer'] - 1),
                hidden_activation=nn.LeakyReLU(negative_slope=0.2),
                output_activation=nn.LeakyReLU(negative_slope=0.2),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        if variant['random_exploration']:
            from rlkit.exploration_strategies.base import PolicyWrappedWithExplorationStrategy
            from rlkit.exploration_strategies.epsilon_greedy import EpsilonGreedy
            expl_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=EpsilonGreedy(expl_env.action_space,
                                                   prob_random_action=1.0),
                policy=policy,
            )
        else:
            expl_policy = policy

        policy_n.append(policy)
        expl_policy_n.append(expl_policy)
        eval_policy_n.append(eval_policy)
        cg1_n.append(cg1)
        target_cg1_n.append(target_cg1)
        qf1_n.append(qf1)
        target_qf1_n.append(target_qf1)
        cg2_n.append(cg2)
        target_cg2_n.append(target_cg2)
        qf2_n.append(qf2)
        target_qf2_n.append(target_qf2)
        cgca_n.append(cgca)
        cactor_n.append(cactor)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.r2g.r2g_gnn_sep import R2GGNNTrainer
    trainer = R2GGNNTrainer(env=expl_env,
                            cg1_n=cg1_n,
                            target_cg1_n=target_cg1_n,
                            qf1_n=qf1_n,
                            target_qf1_n=target_qf1_n,
                            cg2_n=cg2_n,
                            target_cg2_n=target_cg2_n,
                            qf2_n=qf2_n,
                            target_qf2_n=target_qf2_n,
                            cgca_n=cgca_n,
                            cactor_n=cactor_n,
                            policy_n=policy_n,
                            **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #4
0
def experiment(variant):
    num_agent = variant['num_agent']
    from differential_game import DifferentialGame
    expl_env = DifferentialGame(game_name=args.exp_name)
    eval_env = DifferentialGame(game_name=args.exp_name)
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    from rlkit.torch.networks.graph_builders import FullGraphBuilder
    graph_builder_1 = FullGraphBuilder(input_node_dim=obs_dim + action_dim,
                                       num_node=num_agent,
                                       contain_self_loop=False)
    from rlkit.torch.networks.graph_context_network import GraphContextNet
    cg1 = GraphContextNet(
        graph_builder_1,
        obs_dim,
        action_dim,
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    target_cg1 = copy.deepcopy(cg1)
    qf1 = nn.Sequential(
        nn.Linear(variant['graph_kwargs']['hidden_dim'] + action_dim,
                  variant['qf_kwargs']['hidden_dim']), nn.ReLU(),
        nn.Linear(variant['qf_kwargs']['hidden_dim'], 1))
    target_qf1 = copy.deepcopy(qf1)

    graph_builder_2 = FullGraphBuilder(input_node_dim=obs_dim + action_dim,
                                       num_node=num_agent,
                                       contain_self_loop=False)
    cg2 = GraphContextNet(
        graph_builder_2,
        obs_dim,
        action_dim,
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    target_cg2 = copy.deepcopy(cg2)
    qf2 = nn.Sequential(
        nn.Linear(variant['graph_kwargs']['hidden_dim'] + action_dim,
                  variant['qf_kwargs']['hidden_dim']), nn.ReLU(),
        nn.Linear(variant['qf_kwargs']['hidden_dim'], 1))
    target_qf2 = copy.deepcopy(qf2)

    graph_builder_ca = FullGraphBuilder(input_node_dim=obs_dim + action_dim,
                                        num_node=num_agent,
                                        contain_self_loop=False)
    cgca = GraphContextNet(
        graph_builder_ca,
        obs_dim,
        action_dim,
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    from rlkit.torch.networks.layers import SplitLayer
    from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
    cactor = nn.Sequential(
        cgca,
        nn.Linear(variant['graph_kwargs']['hidden_dim'],
                  variant['cactor_kwargs']['hidden_dim']), nn.ReLU(),
        SplitLayer(layers=[
            nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
            nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
        ]))
    cactor = TanhGaussianPolicy(module=cactor)

    policy_n, expl_policy_n, eval_policy_n = [], [], []
    for i in range(num_agent):
        policy = nn.Sequential(
            nn.Linear(obs_dim, variant['policy_kwargs']['hidden_dim']),
            nn.ReLU(),
            nn.Linear(variant['policy_kwargs']['hidden_dim'],
                      variant['policy_kwargs']['hidden_dim']), nn.ReLU(),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        from rlkit.exploration_strategies.base import PolicyWrappedWithExplorationStrategy
        if variant['random_exploration']:
            from rlkit.exploration_strategies.epsilon_greedy import EpsilonGreedy
            expl_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=EpsilonGreedy(expl_env.action_space,
                                                   prob_random_action=1.0),
                policy=policy,
            )
        else:
            expl_policy = policy

        policy_n.append(policy)
        expl_policy_n.append(expl_policy)
        eval_policy_n.append(eval_policy)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.r2g.r2g_gnn2 import R2GGNNTrainer
    trainer = R2GGNNTrainer(env=expl_env,
                            cg1=cg1,
                            target_cg1=target_cg1,
                            qf1=qf1,
                            target_qf1=target_qf1,
                            cg2=cg2,
                            target_cg2=target_cg2,
                            qf2=qf2,
                            target_qf2=target_qf2,
                            cactor=cactor,
                            policy_n=policy_n,
                            **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
def experiment(variant):
    import sys
    sys.path.append("./multiagent-particle-envs")
    from make_env import make_env
    from particle_env_wrapper import ParticleEnv
    expl_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    eval_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    num_agent = expl_env.num_agent
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    from rlkit.torch.networks.graph_builders import FullGraphBuilder
    graph_builder_1 = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    from rlkit.torch.networks.graph_context_network import GraphContextNet
    cg1 = GraphContextNet(
        graph_builder_1,
        obs_dim,
        action_dim,
        use_attention=variant['graph_kwargs']['use_attention'],
        num_layer=variant['graph_kwargs']['num_layer'],
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    target_cg1 = copy.deepcopy(cg1)
    from rlkit.torch.networks.networks import FlattenMlp
    qf1 = FlattenMlp(
        input_size=variant['graph_kwargs']['hidden_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
    )
    target_qf1 = copy.deepcopy(qf1)

    graph_builder_2 = FullGraphBuilder(
        input_node_dim=obs_dim + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    cg2 = GraphContextNet(
        graph_builder_2,
        obs_dim,
        action_dim,
        use_attention=variant['graph_kwargs']['use_attention'],
        num_layer=variant['graph_kwargs']['num_layer'],
        node_dim=variant['graph_kwargs']['hidden_dim'],
        output_activation='relu',
    )
    target_cg2 = copy.deepcopy(cg2)
    qf2 = FlattenMlp(
        input_size=variant['graph_kwargs']['hidden_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
    )
    target_qf2 = copy.deepcopy(qf2)

    policy_n, expl_policy_n, eval_policy_n = [], [], []
    cactor_n = []
    for i in range(num_agent):
        from rlkit.torch.networks.layers import SplitLayer
        if variant['trainer_kwargs']['dec_cactor']:
            input_size = obs_dim + action_dim * (num_agent - 1)
        else:
            input_size = obs_dim * num_agent + action_dim * (num_agent - 1)
        cactor = nn.Sequential(
            FlattenMlp(
                input_size=input_size,
                output_size=variant['cactor_kwargs']['hidden_dim'],
                hidden_sizes=[variant['cactor_kwargs']['hidden_dim']] *
                (variant['cactor_kwargs']['num_layer'] - 1),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['cactor_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['cactor_kwargs']['hidden_dim'], action_dim)
            ]))
        from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
        cactor = TanhGaussianPolicy(module=cactor)

        policy = nn.Sequential(
            FlattenMlp(
                input_size=obs_dim,
                output_size=variant['policy_kwargs']['hidden_dim'],
                hidden_sizes=[variant['policy_kwargs']['hidden_dim']] *
                (variant['policy_kwargs']['num_layer'] - 1),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        expl_policy = policy

        policy_n.append(policy)
        expl_policy_n.append(expl_policy)
        eval_policy_n.append(eval_policy)
        cactor_n.append(cactor)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.r2g.r2g_gnn3_onlyq import R2GGNNTrainer
    trainer = R2GGNNTrainer(env=expl_env,
                            cg1=cg1,
                            target_cg1=target_cg1,
                            qf1=qf1,
                            target_qf1=target_qf1,
                            cg2=cg2,
                            target_cg2=target_cg2,
                            qf2=qf2,
                            target_qf2=target_qf2,
                            cactor_n=cactor_n,
                            policy_n=policy_n,
                            **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #6
0
def experiment(variant):
    import sys
    sys.path.append("./multiagent-particle-envs")
    from make_env import make_env
    from particle_env_wrapper import ParticleEnv
    expl_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    eval_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    num_agent = expl_env.num_agent
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    from simple_spread_graph import SimpleSpreadGraphBuilder
    graph_builder_1 = SimpleSpreadGraphBuilder(
        num_agents=expl_env.scenario.num_agents,
        num_landmarks=expl_env.scenario.num_landmarks,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        append_action=True,
        single_observe=False,
        contain_self_loop=False,
    )
    from rlkit.torch.networks.graph_context_network import GraphContextNet
    from rlkit.torch.networks.layers import SelectLayer
    cg1 = nn.Sequential(
        GraphContextNet(graph_builder_1,
                        graph_builder_1.output_node_dim - action_dim,
                        action_dim,
                        output_activation='lrelu0.2',
                        **variant['graph_kwargs']),
        SelectLayer(dim=1, index=torch.arange(num_agent)),
    )
    target_cg1 = copy.deepcopy(cg1)
    from rlkit.torch.networks.networks import FlattenMlp
    qf1 = FlattenMlp(
        input_size=variant['graph_kwargs']['node_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
        hidden_activation=nn.LeakyReLU(negative_slope=0.2),
    )
    target_qf1 = copy.deepcopy(qf1)

    graph_builder_2 = SimpleSpreadGraphBuilder(
        num_agents=expl_env.scenario.num_agents,
        num_landmarks=expl_env.scenario.num_landmarks,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        append_action=True,
        single_observe=False,
        contain_self_loop=False,
    )
    cg2 = nn.Sequential(
        GraphContextNet(graph_builder_2,
                        graph_builder_1.output_node_dim - action_dim,
                        action_dim,
                        output_activation='lrelu0.2',
                        **variant['graph_kwargs']),
        SelectLayer(dim=1, index=torch.arange(num_agent)),
    )
    target_cg2 = copy.deepcopy(cg2)
    qf2 = FlattenMlp(
        input_size=variant['graph_kwargs']['node_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
        hidden_activation=nn.LeakyReLU(negative_slope=0.2),
    )
    target_qf2 = copy.deepcopy(qf2)

    policy_n, eval_policy_n, expl_policy_n = [], [], []
    for i in range(num_agent):
        from rlkit.torch.networks.layers import SplitLayer
        policy = nn.Sequential(
            FlattenMlp(
                input_size=obs_dim,
                output_size=variant['policy_kwargs']['hidden_dim'],
                hidden_sizes=[variant['policy_kwargs']['hidden_dim']] *
                (variant['policy_kwargs']['num_layer'] - 1),
                hidden_activation=nn.LeakyReLU(negative_slope=0.2),
                output_activation=nn.LeakyReLU(negative_slope=0.2),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        from rlkit.exploration_strategies.base import PolicyWrappedWithExplorationStrategy
        if variant['random_exploration']:
            from rlkit.exploration_strategies.epsilon_greedy import EpsilonGreedy
            expl_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=EpsilonGreedy(expl_env.action_space,
                                                   prob_random_action=1.0),
                policy=policy,
            )
        else:
            expl_policy = policy

        policy_n.append(policy)
        eval_policy_n.append(eval_policy)
        expl_policy_n.append(expl_policy)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.masac.masac_gnn_gcontext import MASACGNNTrainer
    trainer = MASACGNNTrainer(env=expl_env,
                              cg1=cg1,
                              target_cg1=target_cg1,
                              qf1=qf1,
                              target_qf1=target_qf1,
                              cg2=cg2,
                              target_cg2=target_cg2,
                              qf2=qf2,
                              target_qf2=target_qf2,
                              policy_n=policy_n,
                              **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    algorithm.train()
Пример #7
0
def experiment(variant):
    import sys
    sys.path.append("./multiagent-particle-envs")
    from make_env import make_env
    from particle_env_wrapper import ParticleEnv
    expl_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    eval_env = ParticleEnv(
        make_env(args.exp_name,
                 discrete_action_space=False,
                 world_args=variant['world_args']))
    num_agent = expl_env.num_agent
    obs_dim = eval_env.observation_space.low.size
    action_dim = eval_env.action_space.low.size

    from simple_spread_graph import SimpleSpreadGraphBuilder
    og_builder_1 = SimpleSpreadGraphBuilder(
        num_agents=expl_env.scenario.num_agents,
        num_landmarks=expl_env.scenario.num_landmarks,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        append_action=False,
        single_observe=False,
        contain_self_loop=True,
    )
    from rlkit.torch.networks.gnn_networks import GNNNet
    from rlkit.torch.networks.layers import SelectLayer
    og1 = nn.Sequential(
        GNNNet(
            og_builder_1,
            node_dim=variant['graph_kwargs']['node_dim'],
            conv_type='GSage',
            num_conv_layers=variant['graph_kwargs']['num_layer'],
            hidden_activation='lrelu0.2',
            output_activation='lrelu0.2',
        ),
        SelectLayer(dim=1, index=torch.arange(num_agent)),
    )
    target_og1 = copy.deepcopy(og1)
    from rlkit.torch.networks.graph_builders import FullGraphBuilder
    cg_builder_1 = FullGraphBuilder(
        input_node_dim=variant['graph_kwargs']['node_dim'] + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    from rlkit.torch.networks.graph_context_network import GraphContextNet
    cg1 = GraphContextNet(cg_builder_1,
                          variant['graph_kwargs']['node_dim'],
                          action_dim,
                          output_activation='lrelu0.2',
                          **variant['graph_kwargs'])
    target_cg1 = copy.deepcopy(cg1)
    from rlkit.torch.networks.networks import FlattenMlp
    qf1 = FlattenMlp(
        input_size=variant['graph_kwargs']['node_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
        hidden_activation=nn.LeakyReLU(negative_slope=0.2),
    )
    target_qf1 = copy.deepcopy(qf1)

    og_builder_2 = SimpleSpreadGraphBuilder(
        num_agents=expl_env.scenario.num_agents,
        num_landmarks=expl_env.scenario.num_landmarks,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        append_action=False,
        single_observe=False,
        contain_self_loop=True,
    )
    from rlkit.torch.networks.gnn_networks import GNNNet
    og2 = nn.Sequential(
        GNNNet(
            og_builder_2,
            node_dim=variant['graph_kwargs']['node_dim'],
            conv_type='GSage',
            num_conv_layers=variant['graph_kwargs']['num_layer'],
            hidden_activation='lrelu0.2',
            output_activation='lrelu0.2',
        ),
        SelectLayer(dim=1, index=torch.arange(num_agent)),
    )
    target_og2 = copy.deepcopy(og2)
    cg_builder_2 = FullGraphBuilder(
        input_node_dim=variant['graph_kwargs']['node_dim'] + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    cg2 = GraphContextNet(cg_builder_2,
                          variant['graph_kwargs']['node_dim'],
                          action_dim,
                          output_activation='lrelu0.2',
                          **variant['graph_kwargs'])
    target_cg2 = copy.deepcopy(cg2)
    qf2 = FlattenMlp(
        input_size=variant['graph_kwargs']['node_dim'] + action_dim,
        output_size=1,
        hidden_sizes=[variant['qf_kwargs']['hidden_dim']] *
        (variant['qf_kwargs']['num_layer'] - 1),
        hidden_activation=nn.LeakyReLU(negative_slope=0.2),
    )
    target_qf2 = copy.deepcopy(qf2)

    og_builder_ca = SimpleSpreadGraphBuilder(
        num_agents=expl_env.scenario.num_agents,
        num_landmarks=expl_env.scenario.num_landmarks,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        append_action=False,
        single_observe=False,
        contain_self_loop=True,
    )
    from rlkit.torch.networks.gnn_networks import GNNNet
    ogca = nn.Sequential(
        GNNNet(
            og_builder_ca,
            node_dim=variant['graph_kwargs']['node_dim'],
            conv_type='GSage',
            num_conv_layers=variant['graph_kwargs']['num_layer'],
            hidden_activation='lrelu0.2',
            output_activation='lrelu0.2',
        ),
        SelectLayer(dim=1, index=torch.arange(num_agent)),
    )
    cg_builder_ca = FullGraphBuilder(
        input_node_dim=variant['graph_kwargs']['node_dim'] + action_dim,
        num_node=num_agent,
        batch_size=variant['algorithm_kwargs']['batch_size'],
        contain_self_loop=False)
    cgca = GraphContextNet(cg_builder_ca,
                           variant['graph_kwargs']['node_dim'],
                           action_dim,
                           output_activation='lrelu0.2',
                           **variant['graph_kwargs'])
    from rlkit.torch.networks.layers import SplitLayer
    from rlkit.torch.policies.tanh_gaussian_policy import TanhGaussianPolicy
    cactor = nn.Sequential(
        FlattenMlp(
            input_size=variant['graph_kwargs']['node_dim'],
            output_size=variant['cactor_kwargs']['hidden_dim'],
            hidden_sizes=[variant['cactor_kwargs']['hidden_dim']] *
            (variant['cactor_kwargs']['num_layer'] - 1),
            hidden_activation=nn.LeakyReLU(negative_slope=0.2),
            output_activation=nn.LeakyReLU(negative_slope=0.2),
        ), nn.LeakyReLU(negative_slope=0.2),
        SplitLayer(layers=[
            nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
            nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
        ]))
    cactor = TanhGaussianPolicy(module=cactor)

    policy_n, expl_policy_n, eval_policy_n = [], [], []
    for i in range(num_agent):
        graph_builder_policy = SimpleSpreadGraphBuilder(
            num_agents=expl_env.scenario.num_agents,
            num_landmarks=expl_env.scenario.num_landmarks,
            batch_size=variant['algorithm_kwargs']['batch_size'],
            append_action=False,
            single_observe=True,
            contain_self_loop=True,
        )
        from rlkit.torch.networks.gnn_networks import GNNNet
        gnn_policy = GNNNet(
            graph_builder_policy,
            hidden_activation='lrelu0.2',
            output_activation='lrelu0.2',
            conv_type='GSage',
            node_dim=variant['graph_kwargs']['node_dim'],
            num_conv_layers=args.glayer,
        )
        from rlkit.torch.networks.layers import SplitLayer, FlattenLayer
        policy = nn.Sequential(
            gnn_policy, SelectLayer(dim=1, index=0), FlattenLayer(),
            FlattenMlp(
                input_size=variant['graph_kwargs']['node_dim'],
                output_size=variant['policy_kwargs']['hidden_dim'],
                hidden_sizes=[variant['policy_kwargs']['hidden_dim']] *
                (variant['policy_kwargs']['num_layer'] - 1),
                hidden_activation=nn.LeakyReLU(negative_slope=0.2),
                output_activation=nn.LeakyReLU(negative_slope=0.2),
            ),
            SplitLayer(layers=[
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim),
                nn.Linear(variant['policy_kwargs']['hidden_dim'], action_dim)
            ]))
        policy = TanhGaussianPolicy(module=policy)
        from rlkit.torch.policies.make_deterministic import MakeDeterministic
        eval_policy = MakeDeterministic(policy)
        if variant['random_exploration']:
            from rlkit.exploration_strategies.base import PolicyWrappedWithExplorationStrategy
            from rlkit.exploration_strategies.epsilon_greedy import EpsilonGreedy
            expl_policy = PolicyWrappedWithExplorationStrategy(
                exploration_strategy=EpsilonGreedy(expl_env.action_space,
                                                   prob_random_action=1.0),
                policy=policy,
            )
        else:
            expl_policy = policy

        policy_n.append(policy)
        expl_policy_n.append(expl_policy)
        eval_policy_n.append(eval_policy)

    from rlkit.samplers.data_collector.ma_path_collector import MAMdpPathCollector
    eval_path_collector = MAMdpPathCollector(eval_env, eval_policy_n)
    expl_path_collector = MAMdpPathCollector(expl_env, expl_policy_n)

    from rlkit.data_management.ma_env_replay_buffer import MAEnvReplayBuffer
    replay_buffer = MAEnvReplayBuffer(variant['replay_buffer_size'],
                                      expl_env,
                                      num_agent=num_agent)

    from rlkit.torch.r2g.r2g_gnn8 import R2GGNNTrainer
    trainer = R2GGNNTrainer(env=expl_env,
                            og1=og1,
                            target_og1=target_og1,
                            cg1=cg1,
                            target_cg1=target_cg1,
                            qf1=qf1,
                            target_qf1=target_qf1,
                            og2=og2,
                            target_og2=target_og2,
                            cg2=cg2,
                            target_cg2=target_cg2,
                            qf2=qf2,
                            target_qf2=target_qf2,
                            ogca=ogca,
                            cgca=cgca,
                            cactor=cactor,
                            policy_n=policy_n,
                            **variant['trainer_kwargs'])

    from rlkit.torch.torch_rl_algorithm import TorchBatchRLAlgorithm
    algorithm = TorchBatchRLAlgorithm(
        trainer=trainer,
        exploration_env=expl_env,
        evaluation_env=eval_env,
        exploration_data_collector=expl_path_collector,
        evaluation_data_collector=eval_path_collector,
        replay_buffer=replay_buffer,
        log_path_function=get_generic_ma_path_information,
        **variant['algorithm_kwargs'])
    algorithm.to(ptu.device)
    # save init params
    from rlkit.core import logger
    snapshot = algorithm._get_snapshot()
    file_name = osp.join(logger._snapshot_dir, 'itr_-1.pkl')
    torch.save(snapshot, file_name)

    algorithm.train()