def f_clw_instantiate_envs(self): """ Instantiate both the Training and Test Gym Env - They provide the same dynamical model and the same reward """ temp = 'gym_quadcopter:quadcopter-v' + str(self.env_desc.get_env_id()) # TODO FIXME: Some models cannot handle multiple envs. N = self.env_desc.get_n_envs() if N < 1: raise RuntimeError(f"Got NumEnvs needs to be >=1 but got NumEnvs={N}") logging.info(f"[SETUP] Creating {N} Training Environments - START") # Instantiating all the Envs and storing them into a private var self.__envs_training = [f_fwgym_get_env( env_id=temp, used_states=self.used_states, instance_index=i, query_classes=self.query_classes, query_class=self.query_class, params=self.args.training_params ) for i in range(N)] # Passing references to previously created envs self.env = DummyVecEnv([lambda: self.__envs_training[i] for i in range(N)]) logging.info(f"[SETUP] Creating {N} Training Environments - DONE") logging.info(f"[SETUP] Creating 1 Test Environments - START") self.env_test = f_fwgym_get_env( env_id=temp, used_states=self.used_states, instance_index=0, query_classes=self.query_classes, query_class=self.query_class, params=self.args.testing_params ) logging.info(f"[SETUP] Creating 1 Test Environments - DONE")
def test_env_make(self): """ Testing Env0 State Space Size """ instance_index = 0 action = np.array([1.0, 2.0, 3.0]) env_id = 'gym_quadcopter:quadcopter-v2' env1 = f_fwgym_get_env( env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'], instance_index=instance_index, query_class='something', query_classes={}, params=self.testing_params ) env1.reset() self.assertEqual(env1.params, self.testing_params) quadcopter = Quadcopter(T=20, dt_commands=0.03, dt=0.01) env2 = GymEnvBase.make( env_id=env_id, instance_index=instance_index, params=self.testing_params, quadcopter=quadcopter, used_states=['e_p', 'e_q', 'e_r'] ) env2.reset() self.assertEqual(env2.params, self.testing_params) self.assertEqual(env1.env_id, env2.env_id) self.assertEqual(env1.instance_index, env2.instance_index) self.assertEqual(env1.continuous, env2.continuous)
def run_test(self): """ Runs the old tests or the new properties observer based ones """ if self.args.eval_properties_observer['is_active']: return self.compute_traces() logging.info(f"Start test at {dt.now().strftime('%Y%m%d_%H%M')}") logging.info(f"Test Arguments\n{self._args2str(self.args)}") self.f_fwtfw_init() temp = 'gym_quadcopter:quadcopter-v' + str(self.args.env) self.env = f_fwgym_get_env(env_id=temp, used_states=self.args.used_states, instance_index=0, query_classes=self.args.query_classes, query_class=self.args.query_class, params=self.args.testing_params) logging.info( f"[eval_models.py] Instantiated env {str(temp)} with continuous {str(self.args.continuous)}" ) checkpoints = self.range_checkpoints() rewards, s_rates = self.f_checkpoints_range_2_mean_performance( checkpoints=checkpoints) starting_min_reward = -10 temp_x = [0] + [ self.args.num_iterations_checkpoint * i for i in range( self.args.start_index, self.args.end_index, self.args.step) ] temp_y = [starting_min_reward] + list(rewards) f_iofsw_plot( x=temp_x, y=temp_y, x_ticks=np.array(temp_x), y_ticks=np.array(temp_y), title="Rewards", label_x="Training Timesteps", label_y="Average Rewards", filename= f"{self.args.plots_dir}/Timesteps_Rewards_{dt.now().strftime('%Y%m%d_%H%M%S')}.png", ) temp_x = [0] + [ self.args.num_iterations_checkpoint * i for i in range( self.args.start_index, self.args.end_index, self.args.step) ] temp_y = [0] + list(s_rates) f_iofsw_plot( x=temp_x, y=temp_y, x_ticks=np.array(temp_x), y_ticks=np.array(temp_y), title="Success Rate", label_x="Training Timesteps", label_y="Average Success Rate", filename= f"{self.args.plots_dir}/Timesteps_SuccessRate_{dt.now().strftime('%Y%m%d_%H%M%S')}.png", )
def test_env_set_z_velocity_angles_reset_function(self): """ Tests the possibility to set the Velocity Reset Function from Config Training """ env_desc = EnvDict(env_dict=self.args.env) tp_desc = TrainingParamsDict(tp_dict=self.args.training_params) self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value']) self.assertEqual(self.args.model, self.training_config['model']) self.assertEqual(self.args.n_steps, self.training_config['n_steps']) self.assertEqual(self.args.training_params, self.training_config['training_params']) for i in range(0,3): env_id = f'gym_quadcopter:quadcopter-v{i}' env = f_fwgym_get_env( env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'], instance_index=0, query_class='something', query_classes={}, params=self.args.training_params ) self.assertEqual(env.params, self.args.training_params) env.reset() val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_z']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_z']['params'][1]) self.assertTrue(val_min <= env.quadcopter.z <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['velocity_x']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['velocity_x']['params'][1]) self.assertTrue(val_min <= env.quadcopter.velocity_x <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['velocity_y']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['velocity_y']['params'][1]) self.assertTrue(val_min <= env.quadcopter.velocity_y <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['velocity_z']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['velocity_z']['params'][1]) self.assertTrue(val_min <= env.quadcopter.velocity_z <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_roll']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_roll']['params'][1]) self.assertTrue(val_min <= env.quadcopter.abs_roll <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_pitch']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_pitch']['params'][1]) self.assertTrue(val_min <= env.quadcopter.abs_pitch <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['abs_yaw']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['abs_yaw']['params'][1]) self.assertTrue(val_min <= env.quadcopter.abs_yaw <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['rate_roll']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['rate_roll']['params'][1]) self.assertTrue(val_min <= env.quadcopter.rate_roll <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['rate_pitch']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['rate_pitch']['params'][1]) self.assertTrue(val_min <= env.quadcopter.rate_pitch <= val_max) val_min = float(self.args.training_params['quadcopter']['reset_policy']['rate_yaw']['params'][0]) val_max = float(self.args.training_params['quadcopter']['reset_policy']['rate_yaw']['params'][1]) self.assertTrue(val_min <= env.quadcopter.rate_yaw <= val_max)
def test_env_altitude_controller_temporal_consistency(self): """ Tests that env.altitude_controller.compute_thrust() has no random component """ for pol in self.reset_policy: self.training_config['training_params']['quadcopter']['reset_policy'][pol]['pdf'] = 'none' self.training_config['model']['name'] = 'ddpg' self.args = training_args_from_configs(base_config=self.base_config, config=self.training_config, debug_info=False) env_desc = EnvDict(env_dict=self.args.env) tp_desc = TrainingParamsDict(tp_dict=self.args.training_params) self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value']) self.assertEqual(self.args.model, self.training_config['model']) self.assertEqual(self.args.n_steps, self.training_config['n_steps']) self.assertEqual(self.args.training_params, self.training_config['training_params']) supported_envs = [0] for i in supported_envs: env_id = f'gym_quadcopter:quadcopter-v{i}' print(f"Checking EnvID={env_id}") env = f_fwgym_get_env( env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'], instance_index=0, query_class='something', query_classes={}, params=self.args.training_params ) self.assertEqual(env.params, self.args.training_params) self.assertEqual(env.altitude_controller.p, 3000) self.assertEqual(env.altitude_controller.i, 300) self.assertEqual(env.altitude_controller.d, 500) obs_trace = np.zeros(10) exp_trace = np.zeros(10) env.reset() env.set_target_z(1.0) self.assertEqual(env.quadcopter.z, 0.0) self.assertEqual(env.target_z, 1.0) self.assertEqual(env.altitude_controller.z_integral, 0.0) self.assertEqual(env.previous_z_error, 0.0) for i in range(obs_trace.shape[0]): self.assertEqual(env.quadcopter.z, 0.0) exp_trace[i] = env.altitude_controller.compute_thrust(target_z=env.target_z, current_z=env.quadcopter.z) env.reset() env.set_target_z(1.0) self.assertEqual(env.quadcopter.z, 0.0) self.assertEqual(env.target_z, 1.0) self.assertEqual(env.altitude_controller.z_integral, 0.0) self.assertEqual(env.previous_z_error, 0.0) for i in range(obs_trace.shape[0]): self.assertEqual(env.quadcopter.z, 0.0) obs_trace[i] = env.altitude_controller.compute_thrust(target_z=env.target_z, current_z=env.quadcopter.z) self.assertTrue(np.allclose(obs_trace, exp_trace, atol=1e-5), msg=f"Temporal Consistency Check: EnvID={env_id} ObsTrace={obs_trace}, ExpTrace={exp_trace}, Delta={obs_trace-exp_trace}")
def test_env_controller_temporal_consistency_on_altitude(self): """ Tests if given the same initial conditions the and target_z the selected altitude controller leads to the same z after the same number of iterations WARNING - This test is currently failing due to some precision issues hence it has been disabled but it revealed an issue that needs to be deeply investigated hence it needs to be commited in the repo """ for pol in self.reset_policy: self.training_config['training_params']['quadcopter']['reset_policy'][pol]['pdf'] = 'none' self.training_config['model']['name'] = 'ddpg' self.args = training_args_from_configs(base_config=self.base_config, config=self.training_config, debug_info=False) env_desc = EnvDict(env_dict=self.args.env) tp_desc = TrainingParamsDict(tp_dict=self.args.training_params) self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value']) self.assertEqual(self.args.model, self.training_config['model']) self.assertEqual(self.args.n_steps, self.training_config['n_steps']) self.assertEqual(self.args.training_params, self.training_config['training_params']) for i in self.supported_envs: env_id = f'gym_quadcopter:quadcopter-v{i}' print(f"Checking EnvID={env_id}") env = f_fwgym_get_env( env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'], instance_index=0, query_class='something', query_classes={}, params=self.args.training_params ) self.assertEqual(env.params, self.args.training_params) self.assertEqual(env.altitude_controller.p, 3000) self.assertEqual(env.altitude_controller.i, 300) self.assertEqual(env.altitude_controller.d, 500) obs_trace = np.zeros(10) exp_trace = np.zeros(10) env.reset() env.set_target_z(1.0) self.assertEqual(env.quadcopter.z, 0.0) self.assertEqual(env.target_z, 1.0) self.assertEqual(env.z_integral, 0.0) self.assertEqual(env.previous_z_error, 0.0) for i in range(obs_trace.shape[0]): env.step(action=np.zeros(3)) exp_trace[i] = env.quadcopter.z env.reset() env.set_target_z(1.0) self.assertEqual(env.quadcopter.z, 0.0) self.assertEqual(env.target_z, 1.0) self.assertEqual(env.z_integral, 0.0) self.assertEqual(env.previous_z_error, 0.0) for i in range(obs_trace.shape[0]): env.step(action=np.zeros(3)) obs_trace[i] = env.quadcopter.z self.assertTrue(np.allclose(obs_trace, exp_trace, atol=1e-5), msg=f"Temporal Consistency Check: EnvID={env_id} ObsTrace={obs_trace}, ExpTrace={exp_trace}, Delta={obs_trace-exp_trace}")
def test_set_saturation(self): """Test the saturation of the motors """ env_id = 'gym_quadcopter:quadcopter-v0' env = f_fwgym_get_env( env_id=env_id, instance_index=0, query_class='something', query_classes={}, params=self.args.training_params, used_states=['e_p', 'e_q', 'e_r'] ) self.assertSequenceEqual( list(env.quadcopter.saturation), list([self.args.training_params['quadcopter']['saturation_motor']*65535.,65535.,65535.,65535.]) )
def test_env(self): env_desc = EnvDict(env_dict=self.args.env) tp_desc = TrainingParamsDict(tp_dict=self.args.training_params) self.assertEqual(env_desc.get_env_id(), self.training_config['env']['value']) self.assertEqual(self.args.model, self.training_config['model']) self.assertEqual(self.args.n_steps, self.training_config['n_steps']) self.assertEqual(self.args.training_params, self.training_config['training_params']) env_id = 'gym_quadcopter:quadcopter-v' + str(env_desc.get_env_id()) env = f_fwgym_get_env( env_id=env_id, used_states = ['e_p', 'e_q', 'e_r'], instance_index=0, query_class='something', query_classes={}, params=self.args.training_params ) self.assertEqual(env.params, self.args.training_params)
def setUp(self): """Prepare the test environment """ with open('run/config/default.yaml', 'r') as f: self.base_config = yaml.safe_load(f) with open(self.base_config['filenames']['config_training'], 'r') as f: self.training_config = yaml.safe_load(f)['training'] self.testing_params = self.training_config['training_params'] self.is_continuous = (self.testing_params['query_generation']['value'] == "continuous") self.env_id = 'gym_quadcopter:quadcopter-v0' self.env = f_fwgym_get_env( env_id=self.env_id, used_states=['e_p', 'e_q', 'e_r'], instance_index=0, query_class='something', query_classes={}, params=self.testing_params )
def compute_traces(self): """ For each Stable Baseline Checkpoin in the experiment dir, given a certain Query Generator Configuration it compute traces All the Stable Baseline Checkpoints are loaded and run in an Env with a properly istantiated Query Generator """ # Instantiate the Env for the Tests env_id = 'gym_quadcopter:quadcopter-v' + str(self.args.env) self.env = f_fwgym_get_env(env_id=env_id, used_states=self.args.used_states, instance_index=0, query_classes=self.args.query_classes, query_class=self.args.query_class, params=self.args.testing_params) # Register Callback self.env.quadcopter.cb_step = self.cb_quadcopter_step logging.info( f"[eval_models.py] Instantiated env {env_id} with continuous {str(self.args.continuous)}" ) checkpoints = self.range_checkpoints() # Iterate over the checkpoints for i in checkpoints: # Input: Checkpoints Dir cp_path = get_checkpoint_path( base_path=self.args.training_base_path, idx=i, suffix=self.args.suffix) # Loads the model from the Checkpoint model = load_model(path=cp_path, env=self.env, desc=self.args.model['name']) # Create a dir for the traces related to a given checkpoint, using `self.args.log_dir` as base base_path_cp_id = self.args.log_dir + f"/checkpoint_{i}/" Path(base_path_cp_id).mkdir() # Iterate over the episodes to test each checkpoint for j in range(self.args.n_episodes): # Get the evaluation results filenames query_full_path, commands_full_path, signals_full_path = get_signals_path( basepath_checkpoint=base_path_cp_id, episode_idx=j) # Get the data structures for the evaluated data self.query, self.cmds, self.signals = get_signals_containers() # Reset the env at the beginning of each episode obs = self.env.reset() # Simulation loop while True: # NOTE: The time granularity here should be the dt_commands one and not the dt one as it is sync with step() method query_item = { 't': self.env.quadcopter.t.hex(), 'query_p': float(self.env.query[0]).hex(), 'query_q': float(self.env.query[1]).hex(), 'query_r': float(self.env.query[2]).hex() } self.query = self.query.append(query_item, ignore_index=True) # Get the action from the Actor action, _ = model.predict(obs, deterministic=True) obs, reward, done, _ = self.env.step(action) if done: break self.query.to_csv(query_full_path, index=False) self.cmds.to_csv(commands_full_path, index=False) self.signals.to_csv(signals_full_path, index=False)