cfg['environment']['num_envs'] = 1 env = Environment( RaisimGymEnv(__RSCDIR__, dump(cfg['environment'], Dumper=RoundTripDumper))) if mode == 'train': # Get algorithm model = PPO2( tensorboard_log=saver.data_dir, policy=MlpPolicy, policy_kwargs=dict(net_arch=[dict(pi=[96, 64], vf=[96, 64])]), env=env, gamma=0.998, n_steps=math.floor(cfg['environment']['max_time'] / cfg['environment']['control_dt']), ent_coef=0, learning_rate=1e-3, vf_coef=0.5, max_grad_norm=0.5, lam=0.95, nminibatches=1, noptepochs=10, cliprange=0.2, verbose=1, ) # tensorboard # Make sure that your chrome browser is already on. TensorboardLauncher(saver.data_dir + '/PPO2_1') # PPO run # Originally the total timestep is 500000000
fd = fault_detection() # create environment from the configuration file cfg['environment']['num_envs'] = 1 env_no = cfg['environment']['num_envs'] env = Environment( RaisimGymEnv(__RSCDIR__, dump(cfg['environment'], Dumper=RoundTripDumper))) base_path = "/home/rohit/Documents/raisim_stuff/prop_loss_final/quadcopter_weights/" weight_prop = [base_path + "4_working_prop/2020-06-01-07-50-00_Iteration_4500.pkl",\ base_path + "3_working_prop/2020-05-31-13-36-06_Iteration_4500.pkl",\ base_path + "2_working_prop/2020-05-30-07-37-15_Iteration_4500.pkl"] model_list = [] for i in range(3): model_list.append(PPO2.load(weight_prop[i])) obs = env.reset() running_reward = 0.0 ep_len = 0 switch_off1 = 1000 switch_off2 = 1000 pos = np.zeros((15, 3), dtype=np.float32) pos_plot = np.zeros((2000, 3), dtype=np.float32) setpoint = np.zeros((2000, 3), dtype=np.float32) setpoint[1000:, 1] = 1.0 # setpoint[:,-1] = -9.0 angV_plot = np.zeros((2000, 3), dtype=np.float32)