cfg['environment']['num_envs'] = 1
env = Environment(
    RaisimGymEnv(__RSCDIR__, dump(cfg['environment'], Dumper=RoundTripDumper)))

if mode == 'train':

    # Get algorithm
    model = PPO2(
        tensorboard_log=saver.data_dir,
        policy=MlpPolicy,
        policy_kwargs=dict(net_arch=[dict(pi=[96, 64], vf=[96, 64])]),
        env=env,
        gamma=0.998,
        n_steps=math.floor(cfg['environment']['max_time'] /
                           cfg['environment']['control_dt']),
        ent_coef=0,
        learning_rate=1e-3,
        vf_coef=0.5,
        max_grad_norm=0.5,
        lam=0.95,
        nminibatches=1,
        noptepochs=10,
        cliprange=0.2,
        verbose=1,
    )

    # tensorboard
    # Make sure that your chrome browser is already on.
    TensorboardLauncher(saver.data_dir + '/PPO2_1')

    # PPO run
    # Originally the total timestep is 500000000
Exemplo n.º 2
0
fd = fault_detection()

# create environment from the configuration file
cfg['environment']['num_envs'] = 1
env_no = cfg['environment']['num_envs']
env = Environment(
    RaisimGymEnv(__RSCDIR__, dump(cfg['environment'], Dumper=RoundTripDumper)))

base_path = "/home/rohit/Documents/raisim_stuff/prop_loss_final/quadcopter_weights/"
weight_prop = [base_path + "4_working_prop/2020-06-01-07-50-00_Iteration_4500.pkl",\
               base_path + "3_working_prop/2020-05-31-13-36-06_Iteration_4500.pkl",\
               base_path + "2_working_prop/2020-05-30-07-37-15_Iteration_4500.pkl"]

model_list = []
for i in range(3):
    model_list.append(PPO2.load(weight_prop[i]))

obs = env.reset()
running_reward = 0.0
ep_len = 0

switch_off1 = 1000
switch_off2 = 1000

pos = np.zeros((15, 3), dtype=np.float32)
pos_plot = np.zeros((2000, 3), dtype=np.float32)
setpoint = np.zeros((2000, 3), dtype=np.float32)
setpoint[1000:, 1] = 1.0
# setpoint[:,-1] = -9.0
angV_plot = np.zeros((2000, 3), dtype=np.float32)