def test_custom_multi_action_distribution(self): class Model: pass ray.init( object_store_memory=1000 * 1024 * 1024, ignore_reinit_error=True ) # otherwise fails sometimes locally # registration ModelCatalog.register_custom_action_dist("test", CustomMultiActionDistribution) s1 = Discrete(5) s2 = Box(0, 1, shape=(3,), dtype=np.float32) spaces = dict(action_1=s1, action_2=s2) action_space = Dict(spaces) # test retrieving it model_config = MODEL_DEFAULTS.copy() model_config["custom_action_dist"] = "test" dist_cls, param_shape = ModelCatalog.get_action_dist(action_space, model_config) self.assertIsInstance(dist_cls, partial) self.assertEqual(param_shape, s1.n + 2 * s2.shape[0]) # test the class works as a distribution dist_input = tf1.placeholder(tf.float32, (None, param_shape)) model = Model() model.model_config = model_config dist = dist_cls(dist_input, model=model) self.assertIsInstance(dist.sample(), dict) self.assertIn("action_1", dist.sample()) self.assertIn("action_2", dist.sample()) self.assertEqual(dist.sample()["action_1"].dtype, tf.int64) self.assertEqual(dist.sample()["action_2"].shape[1:], s2.shape) with self.assertRaises(NotImplementedError): dist.entropy()
def testCustomActionDistribution(self): ray.init() # registration ModelCatalog.register_custom_action_dist("test", CustomActionDistribution) action_space = Box(0, 1, shape=(5, 3), dtype=np.float32) # test retrieving it model_config = MODEL_DEFAULTS.copy() model_config["custom_action_dist"] = "test" dist_cls, param_shape = ModelCatalog.get_action_dist( action_space, model_config) self.assertEqual(str(dist_cls), str(CustomActionDistribution)) self.assertEqual(param_shape, action_space.shape) # test the class works as a distribution dist_input = tf.placeholder(tf.float32, (None, ) + param_shape) dist = dist_cls(dist_input, model_config=model_config) self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:]) self.assertIsInstance(dist.sample(), tf.Tensor) with self.assertRaises(NotImplementedError): dist.entropy() # test passing the options to it model_config["custom_options"].update({"output_dim": (3, )}) dist_cls, param_shape = ModelCatalog.get_action_dist( action_space, model_config) self.assertEqual(param_shape, (3, )) dist_input = tf.placeholder(tf.float32, (None, ) + param_shape) dist = dist_cls(dist_input, model_config=model_config) self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:]) self.assertIsInstance(dist.sample(), tf.Tensor) with self.assertRaises(NotImplementedError): dist.entropy()
def test_custom_action_distribution(self): class Model(): pass ray.init(object_store_memory=1000 * 1024 * 1024, ignore_reinit_error=True) # otherwise fails sometimes locally # registration ModelCatalog.register_custom_action_dist("test", CustomActionDistribution) action_space = Box(0, 1, shape=(5, 3), dtype=np.float32) # test retrieving it model_config = MODEL_DEFAULTS.copy() model_config["custom_action_dist"] = "test" dist_cls, param_shape = ModelCatalog.get_action_dist( action_space, model_config) self.assertEqual(str(dist_cls), str(CustomActionDistribution)) self.assertEqual(param_shape, action_space.shape) # test the class works as a distribution dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape) model = Model() model.model_config = model_config dist = dist_cls(dist_input, model=model) self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:]) self.assertIsInstance(dist.sample(), tf.Tensor) with self.assertRaises(NotImplementedError): dist.entropy() # test passing the options to it model_config["custom_model_config"].update({"output_dim": (3, )}) dist_cls, param_shape = ModelCatalog.get_action_dist( action_space, model_config) self.assertEqual(param_shape, (3, )) dist_input = tf1.placeholder(tf.float32, (None, ) + param_shape) model.model_config = model_config dist = dist_cls(dist_input, model=model) self.assertEqual(dist.sample().shape[1:], dist_input.shape[1:]) self.assertIsInstance(dist.sample(), tf.Tensor) with self.assertRaises(NotImplementedError): dist.entropy()
def build_sac_model(policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: TrainerConfigDict) -> ModelV2: """Constructs the necessary ModelV2 for the Policy and returns it. Args: policy (Policy): The TFPolicy that will use the models. obs_space (gym.spaces.Space): The observation space. action_space (gym.spaces.Space): The action space. config (TrainerConfigDict): The SAC trainer's config dict. Returns: ModelV2: The ModelV2 to be used by the Policy. Note: An additional target model will be created in this function and assigned to `policy.target_model`. """ # Force-ignore any additionally provided hidden layer sizes. # Everything should be configured using SAC's "Q_model" and "policy_model" # settings. policy_model_config = MODEL_DEFAULTS.copy() policy_model_config.update(config["policy_model"]) q_model_config = MODEL_DEFAULTS.copy() q_model_config.update(config["Q_model"]) default_model_cls = SACTorchModel if config["framework"] == "torch" \ else SACTFModel model = ModelCatalog.get_model_v2(obs_space=obs_space, action_space=action_space, num_outputs=None, model_config=config["model"], framework=config["framework"], default_model=default_model_cls, name="sac_model", policy_model_config=policy_model_config, q_model_config=q_model_config, twin_q=config["twin_q"], initial_alpha=config["initial_alpha"], target_entropy=config["target_entropy"]) assert isinstance(model, default_model_cls) # Create an exact copy of the model and store it in `policy.target_model`. # This will be used for tau-synched Q-target models that run behind the # actual Q-networks and are used for target q-value calculations in the # loss terms. policy.target_model = ModelCatalog.get_model_v2( obs_space=obs_space, action_space=action_space, num_outputs=None, model_config=config["model"], framework=config["framework"], default_model=default_model_cls, name="target_sac_model", policy_model_config=policy_model_config, q_model_config=q_model_config, twin_q=config["twin_q"], initial_alpha=config["initial_alpha"], target_entropy=config["target_entropy"]) assert isinstance(policy.target_model, default_model_cls) return model
from ray.rllib.agents.ppo import (PPOTrainer as Trainer, DEFAULT_CONFIG as AGENT_DEFAULT_CONFIG) from gym_jiminy.toolbox.rllib.utilities import initialize, train, test # Register learning environment register_env("env", lambda env_config: gym.make(GYM_ENV_NAME, **env_config)) # ============= Initialize Ray and Tensorboard daemons ============= logger_creator = initialize(num_cpus=N_THREADS, num_gpus=N_GPU, debug=DEBUG) # ======================== Configure model ========================= # Copy the default model configuration mdl_cfg = MODEL_DEFAULTS.copy() # Fully-connected network settings mdl_cfg[ "fcnet_activation"] = "tanh" # Nonlinearity for built-in fully connected net ["tanh", "relu", "linear"] mdl_cfg["fcnet_hiddens"] = [ 64, 64 ] # Number of hidden layers for fully connected net mdl_cfg[ "no_final_linear"] = False # Whether to skip the final linear layer used to resize the outputs to `num_outputs` mdl_cfg[ "free_log_std"] = True # The last half of the output layer does not dependent on the input mdl_cfg[ "vf_share_layers"] = False # Whether layers should be shared for the value function. # ========================= Configure RLlib ========================
def build_rnnsac_model( policy: Policy, obs_space: gym.spaces.Space, action_space: gym.spaces.Space, config: AlgorithmConfigDict, ) -> ModelV2: """Constructs the necessary ModelV2 for the Policy and returns it. Args: policy: The TFPolicy that will use the models. obs_space (gym.spaces.Space): The observation space. action_space (gym.spaces.Space): The action space. config: The SAC's config dict. Returns: ModelV2: The ModelV2 to be used by the Policy. Note: An additional target model will be created in this function and assigned to `policy.target_model`. """ # With separate state-preprocessor (before obs+action concat). num_outputs = int(np.product(obs_space.shape)) # Force-ignore any additionally provided hidden layer sizes. # Everything should be configured using SAC's `q_model_config` and # `policy_model_config` config settings. policy_model_config = MODEL_DEFAULTS.copy() policy_model_config.update(config["policy_model_config"]) q_model_config = MODEL_DEFAULTS.copy() q_model_config.update(config["q_model_config"]) default_model_cls = RNNSACTorchModel model = ModelCatalog.get_model_v2( obs_space=obs_space, action_space=action_space, num_outputs=num_outputs, model_config=config["model"], framework=config["framework"], default_model=default_model_cls, name="sac_model", policy_model_config=policy_model_config, q_model_config=q_model_config, twin_q=config["twin_q"], initial_alpha=config["initial_alpha"], target_entropy=config["target_entropy"], ) assert isinstance(model, default_model_cls) # Create an exact copy of the model and store it in `policy.target_model`. # This will be used for tau-synched Q-target models that run behind the # actual Q-networks and are used for target q-value calculations in the # loss terms. policy.target_model = ModelCatalog.get_model_v2( obs_space=obs_space, action_space=action_space, num_outputs=num_outputs, model_config=config["model"], framework=config["framework"], default_model=default_model_cls, name="target_sac_model", policy_model_config=policy_model_config, q_model_config=q_model_config, twin_q=config["twin_q"], initial_alpha=config["initial_alpha"], target_entropy=config["target_entropy"], ) assert isinstance(policy.target_model, default_model_cls) return model