# save the configuration and other files
rsg_root = os.path.dirname(os.path.abspath(__file__)) + '/../'
log_dir = rsg_root + '/QuadrotorTrainingdata'
saver = ConfigurationSaver(
    log_dir=log_dir + '/quadrotor_position_tracking',
    save_items=[
        rsg_root + 'raisim_gym/env/env/hummingbird/Environment.hpp',
        cfg_abs_path
    ])

# create environment from the configuration file
if args.mode == "test":  # for test mode, force # of env to 1
    cfg['environment']['num_envs'] = 1
env = Environment(
    RaisimGymEnv(__RSCDIR__, dump(cfg['environment'], Dumper=RoundTripDumper)))

if mode == 'train':

    # Get algorithm
    model = PPO2(
        tensorboard_log=saver.data_dir,
        policy=MlpPolicy,
        policy_kwargs=dict(net_arch=[dict(pi=[96, 64], vf=[96, 64])]),
        env=env,
        gamma=0.998,
        n_steps=math.floor(cfg['environment']['max_time'] /
                           cfg['environment']['control_dt']),
        ent_coef=0,
        learning_rate=1e-3,
        vf_coef=0.5,
示例#2
0
mode = args.mode
cfg_abs_path = parser.parse_args().cfg
cfg = YAML().load(open(cfg_abs_path, 'r'))

# save the configuration and other files
rsg_root = os.path.dirname(os.path.abspath(__file__)) + '/../cartpole'
log_dir = rsg_root + '/data'
saver = ConfigurationSaver(
    log_dir=log_dir + '/Cartpole_tutorial',
    save_items=[rsg_root + '/Environment.hpp', cfg_abs_path])

# create environment from the configuration file
if args.mode == "test":  # for test mode, force # of env to 1
    cfg['environment']['num_envs'] = 1
env = Environment(
    RaisimGymEnv(current_dir + "/rsc",
                 dump(cfg['environment'], Dumper=RoundTripDumper)))

if mode == 'train':
    # tensorboard, this will open your default browser.
    TensorboardLauncher(saver.data_dir + '/PPO2_1')
    # Get algorithm
    model = PPO2(
        tensorboard_log=saver.data_dir,
        policy=MlpPolicy,
        policy_kwargs=dict(net_arch=[dict(pi=[128, 128], vf=[128, 128])]),
        env=env,
        gamma=0.998,
        n_steps=math.floor(cfg['environment']['max_time'] /
                           cfg['environment']['control_dt']),
        ent_coef=0,
        learning_rate=cfg['environment']['learning_rate'],