示例#1
0
def demo2_ppo():
    from ray_elegantrl.configs.configs_ppo import config

    env = {
        'id': 'AntBulletEnv-v0',
        'state_dim': 28,
        'action_dim': 8,
        'if_discrete_action': False,
        'reward_dim': 1,
        'target_reward': 2500,
        'max_step': 1000,
    }
    config['agent']['lambda_entropy'] = 0.05
    config['agent']['lambda_gae_adv'] = 0.97
    config['interactor']['rollout_num'] = 4
    config['trainer']['sample_step'] = 2 ** 11  # env['max_step'] * config['interactor']['rollout_num']
    config['trainer']['batch_size'] = 2 ** 11
    config['interactor']['horizon_step'] = config['trainer']['sample_step']
    config['trainer']['policy_reuse'] = 2 ** 3
    config['interactor']['gamma'] = 0.99
    config['evaluator']['break_step'] = int(8e6 * 8)
    config['buffer']['max_buf'] = config['interactor']['horizon_step']
    config['env'] = env
    config['gpu_id'] = 2
    config['if_cwd_time'] = True
    config['random_seed'] = 0
    beginer(config)
示例#2
0
def demo1_ppo():
    from ray_elegantrl.configs.configs_ppo import config
    env = {
        'id': 'ReacherBulletEnv-v0',
        'state_dim': 9,
        'action_dim': 2,
        'if_discrete_action': False,
        'reward_dim': 1,
        'target_reward': 20,
        'max_step': 150,
    }
    config['agent']['lambda_entropy'] = 0.01
    config['agent']['lambda_gae_adv'] = 0.98
    config['interactor']['rollout_num'] = 4
    config['trainer'][
        'sample_step'] = 4096  # env['max_step'] * config['interactor']['rollout_num']
    config['interactor']['horizon_step'] = config['trainer']['sample_step']
    config['trainer']['policy_reuse'] = 2**4
    config['interactor']['gamma'] = 0.99
    config['buffer']['max_buf'] = config['interactor']['horizon_step']
    config['env'] = env
    config['gpu_id'] = 2
    config['if_cwd_time'] = True
    config['random_seed'] = 10087
    beginer(config)
示例#3
0
def demo_carla_ppo():
    from ray_elegantrl.configs.configs_ppo import config
    from gym_carla_feature.env_a.mp_config import params
    env = {
        'id': 'carla-v0',
        'state_dim': 42,
        'action_dim': 2,
        'if_discrete_action': False,
        'reward_dim': 1,
        'target_reward': 600,
        'max_step': 512,
        'params_name': {'params': params}
    }
    config['agent']['lambda_entropy'] = 0.05
    config['agent']['lambda_gae_adv'] = 0.97
    env['params_name']['params']['port'] = 2016
    config['interactor']['rollout_num'] = 6
    config['agent']['net_dim'] = 2 ** 9
    config['trainer']['batch_size'] = 2 ** 9
    config['trainer']['policy_reuse'] = 2 ** 3
    config['interactor']['gamma'] = 0.99
    config['trainer']['sample_step'] = 2 * env['max_step'] * config['interactor']['rollout_num']
    config['interactor']['horizon_step'] = config['trainer']['sample_step']
    config['env'] = env
    config['gpu_id'] = 2
    config['if_cwd_time'] = True
    config['random_seed'] = 0
    beginer(config)
示例#4
0
def demo_carla_sac():
    from ray_elegantrl.configs.configs_modsac import config
    from gym_carla_feature.env_a.mp_config import params
    env = {
        'id': 'carla-v0',
        'state_dim': 42,
        'action_dim': 2,
        'if_discrete_action': False,
        'reward_dim': 1,
        'target_reward': 600,
        'max_step': 512,
        'params_name': {
            'params': params
        }
    }
    config['interactor']['rollout_num'] = 4
    config['trainer'][
        'sample_step'] = env['max_step'] * config['interactor']['rollout_num']
    config['interactor']['horizon_step'] = config['trainer']['sample_step']
    config['interactor']['gamma'] = 0.99
    config['env'] = env
    config['gpu_id'] = 2
    config['if_cwd_time'] = True
    config['random_seed'] = 0
    beginer(config)
示例#5
0
def demo1_sac():
    from ray_elegantrl.configs.configs_modsac import config
    env = {
        'id': 'ReacherBulletEnv-v0',
        'state_dim': 9,
        'action_dim': 2,
        'if_discrete_action': False,
        'reward_dim': 1,
        'target_reward': 20,
        'max_step': 150,
    }
    config['interactor']['rollout_num'] = 4
    config['trainer']['sample_step'] = 1024  # env['max_step'] * config['interactor']['rollout_num']
    config['interactor']['horizon_step'] = config['trainer']['sample_step']
    config['interactor']['gamma'] = 0.99
    config['env'] = env
    config['gpu_id'] = 2
    config['if_cwd_time'] = True
    config['random_seed'] = 10087
    beginer(config)