示例#1
0
def regression_test(local_mode=False):
    num_agents = 3
    local_dir = tempfile.mkdtemp()
    initialize_ray(test_mode=True, local_mode=local_mode)
    train(DiCESACTrainer, {
        "gamma":
        0.95,
        "target_network_update_freq":
        32,
        "tau":
        1.0,
        "train_batch_size":
        200,
        "rollout_fragment_length":
        50,
        "optimization": {
            "actor_learning_rate": 0.005,
            "critic_learning_rate": 0.005,
            "entropy_learning_rate": 0.0001
        },
        **get_marl_env_config("CartPole-v0",
                              num_agents,
                              normalize_actions=False)
    }, {"episode_reward_mean": 150 * num_agents},
          exp_name="DELETEME",
          local_dir=local_dir,
          test_mode=True)
    shutil.rmtree(local_dir, ignore_errors=True)
示例#2
0
def dice_sac_trainer():
    initialize_ray(test_mode=True, local_mode=True)
    env_name = "BipedalWalker-v2"
    num_agents = 3
    env = gym.make(env_name)
    trainer = DiCESACTrainer(get_marl_env_config(env_name,
                                                 num_agents,
                                                 normalize_actions=False),
                             env=MultiAgentEnvWrapper)
    return env, trainer
示例#3
0
def regression_test2(local_mode=False):
    from ray import tune
    num_agents = 3
    local_dir = tempfile.mkdtemp()
    initialize_ray(test_mode=True, local_mode=local_mode)
    train(
        DiCESACTrainer,
        {
            "soft_horizon":
            True,
            "clip_actions":
            False,
            "normalize_actions":
            False,  # <<== Handle in MARL env
            "metrics_smoothing_episodes":
            5,
            "no_done_at_end":
            True,
            "train_batch_size":
            1000,
            "rollout_fragment_length":
            50,
            constants.DELAY_UPDATE:
            tune.grid_search([True, False]),
            # constants.NOR: tune.grid_search([True, False]),

            # "optimization": {
            #     "actor_learning_rate": 0.005,
            #     "critic_learning_rate": 0.005,
            #     "entropy_learning_rate": 0.0001
            # },
            **get_marl_env_config("Pendulum-v0",
                                  num_agents,
                                  normalize_actions=True)
        },
        {
            "episode_reward_mean": -300 * num_agents,
            "timesteps_total": 13000 * num_agents
        },
        exp_name="DELETEME",
        local_dir=local_dir,
        test_mode=True)
    shutil.rmtree(local_dir, ignore_errors=True)
示例#4
0
    stop = int(5e7)

    config = {
        "num_sgd_iter": 10,
        "num_envs_per_worker": 1,
        "entropy_coeff": 0.001,
        "lambda": 0.95,
        "lr": 2.5e-4,

        # 'sample_batch_size': 200 if large else 50,
        # 'sgd_minibatch_size': 100 if large else 64,
        # 'train_batch_size': 10000 if large else 2048,
        "num_gpus": 1,
        "num_cpus_per_worker": 1,
        "num_cpus_for_driver": 2,
        'num_workers': 16
    }

    config.update(
        get_marl_env_config(env_name, tune.grid_search([args.num_agents])))

    train(
        DiCETrainer,
        config=config,
        stop=stop,
        exp_name=exp_name,
        num_seeds=args.num_seeds,
        num_gpus=args.num_gpus,
        test_mode=args.test,
    )
示例#5
0
        1,
        "timesteps_per_iteration":
        1000,
        "learning_starts":
        10000,
        "clip_actions":
        True,
        # "normalize_actions": True,  <<== This is handled by MARL env
        "evaluation_interval":
        1,
        "metrics_smoothing_episodes":
        5,
    }
    config.update(
        get_marl_env_config(config["env"],
                            args.num_agents,
                            normalize_actions=True))

    config["evaluation_config"] = dict(
        # env_config=config["env_config"],
        explore=False)

    train(DiCESACTrainer,
          config=config,
          stop=stop,
          exp_name=exp_name,
          num_seeds=args.num_seeds,
          num_gpus=args.num_gpus,
          test_mode=args.test,
          keep_checkpoints_num=5)
示例#6
0
        dice_utils.TWO_SIDE_CLIP_LOSS: False,
        dice_utils.ONLY_TNB: True,
        dice_utils.NORMALIZE_ADVANTAGE: True,  # May be need to set false
    }
)


DiESTrainer = DiCETrainer.with_updates(
    name="DiES",
    default_config=dies_default_config,
    after_train_result=run_evolution_strategies
)

if __name__ == '__main__':
    env_name = "CartPole-v0"
    num_agents = 3
    config = {
        "num_sgd_iter": 2,
        "train_batch_size": 400,
        "update_steps": 1000,
        **get_marl_env_config(env_name, num_agents)
    }
    initialize_ray(test_mode=True, local_mode=True)
    train(
        DiESTrainer,
        config,
        exp_name="DELETE_ME_TEST",
        stop={"timesteps_total": 10000},
        test_mode=True
    )