def _make_actor(obs_space, action_space, config): config = deep_merge(BASE_CONFIG, config.get("actor", {}), False, ["encoder"]) if not config["encoder"].get( "layer_norm") and config["perturbed_policy"]: warnings.warn( "'layer_norm' is deactivated even though a perturbed policy was " "requested. For optimal stability, set 'layer_norm': True.") actor = DeterministicPolicy.from_scratch(obs_space, action_space, config) behavior = actor if config["perturbed_policy"]: behavior = DeterministicPolicy.from_scratch( obs_space, action_space, config) target_actor = actor if config["separate_target_policy"]: target_actor = DeterministicPolicy.from_scratch( obs_space, action_space, config) target_actor.load_state_dict(actor.state_dict()) if config["smooth_target_policy"]: target_actor = DeterministicPolicy.from_existing( target_actor, noise=config["target_gaussian_sigma"], ) return { "actor": actor, "behavior": behavior, "target_actor": target_actor }
def _make_actor(self, obs_space, action_space, config): config = deep_merge( BASE_CONFIG, config.get("actor", {}), False, ["obs_encoder", "flow"], ["flow"], ) assert isinstance( action_space, spaces.Box ), f"Normalizing Flow incompatible with action space type {type(action_space)}" # PRIOR ======================================================================== params_module, base_dist = self._make_actor_prior( obs_space, action_space, config) # NormalizingFlow ============================================================== transforms = self._make_actor_transforms(action_space, params_module.state_size, config) dist_module = ptd.TransformedDistribution( base_dist=base_dist, transform=ptd.flows.CompositeTransform(transforms), ) return {"actor": StochasticPolicy(params_module, dist_module)}
def __init__(self, obs_space, action_space, config): config = deep_merge(BASE_CONFIG, config, False, ["actor", "critic", "model"]) super().__init__(obs_space, action_space, config) if config.get("replay_kl") is False: old = self._make_actor(obs_space, action_space, config) self.old_actor = old["actor"].requires_grad_(False)
def _make_model(obs_space, action_space, config): config = deep_merge(BASE_CONFIG, config.get("model", {}), False, ["encoder"]) params_module = SVGDynamicsParams(obs_space, action_space, config) dist_module = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1) model = StochasticModel.assemble(params_module, dist_module, config) return {"model": model}
def process_config(config): return deep_merge( BASE_CONFIG, config.get("model", {}), False, ["input_encoder", "flow"], ["flow"], )
def _make_critic(obs_space, action_space, config): config = deep_merge(BASE_CONFIG, config.get("critic", {}), False, ["encoder"]) obs_size, act_size = obs_space.shape[0], action_space.shape[0] def make_critic(): return ActionValueFunction.from_scratch( obs_size, act_size, **config["encoder"] ) n_critics = 2 if config["double_q"] else 1 critics = nn.ModuleList([make_critic() for _ in range(n_critics)]) target_critics = nn.ModuleList([make_critic() for _ in range(n_critics)]) target_critics.load_state_dict(critics.state_dict()) return {"critics": critics, "target_critics": target_critics}
def make_optimizers(self): config = dutil.deep_merge( DEFAULT_OPTIM_CONFIG, self.config["torch_optimizer"], False, [], ["actor", "critic"], ) assert config["actor"]["type"] in [ "KFAC", "EKFAC", ], "ACKTR must use optimizer with Kronecker Factored curvature estimation." return { "actor": build_optimizer(self.module.actor, config["actor"]), "critic": build_optimizer(self.module.critic, config["critic"]), }
def _make_critic(obs_space, action_space, config): # pylint:disable=unused-argument modules = {} config = deep_merge(BASE_CONFIG, config.get("critic", {}), False, ["encoder"]) def make_vf(): logits_mod = FullyConnected(in_features=obs_space.shape[0], **config["encoder"]) value_mod = nn.Linear(logits_mod.out_features, 1) return nn.Sequential(logits_mod, value_mod) modules["critic"] = make_vf() if config["target_vf"]: modules["target_critic"] = make_vf() modules["target_critic"].load_state_dict( modules["critic"].state_dict()) return modules
def _make_optimizers(self): optimizers = super()._make_optimizers() config = dutil.deep_merge( DEFAULT_OPTIM_CONFIG, self.config["optimizer"], new_keys_allowed=False, allow_new_subkey_list=[], override_all_if_type_changes=["actor", "critic"], ) assert config["actor"]["type"] in [ "KFAC", "EKFAC", ], "ACKTR must use optimizer with Kronecker Factored curvature estimation." mapping = { "actor": build_optimizer(self.module.actor, config["actor"]), "critic": build_optimizer(self.module.critic, config["critic"]), } optimizers.update(mapping) return optimizers
def __init__(self, observation_space: Space, action_space: Space, config: dict): config = deep_merge( {**self.get_default_config(), "worker_index": None}, config, new_keys_allowed=False, whitelist=Trainer._allow_unknown_subkeys, override_all_if_type_changes=Trainer._override_all_subkeys_if_type_changes, ) super().__init__(observation_space, action_space, config) self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") self.module = self.make_module(observation_space, action_space, self.config) self.module.to(self.device) self.optimizers = OptimizerCollection() for name, optimizer in self.make_optimizers().items(): self.optimizers[name] = optimizer # === Policy attributes === self.dist_class = WrapModuleDist self.framework = "torch" # Needed to create exploration self.exploration = self._create_exploration()
def process_config(config): """Fill in default configuration for models.""" return deep_merge(BASE_CONFIG, config.get("model", {}), False, ["encoder"])
def __init__(self, obs_space, action_space, config): config = deep_merge(BASE_CONFIG, config, False, ["actor", "critic"]) config["critic"]["target_vf"] = False super().__init__(obs_space, action_space, config)
def __init__(self, obs_space, action_space, config): config = deep_merge(BASE_CONFIG, config, False, ["actor", "critic", "entropy"]) super().__init__(obs_space, action_space, config)
def with_base_config(config): """Returns the given config dict merged with the base model-based configuration.""" return deep_merge(BASE_CONFIG, config, True)
def __init__(self, obs_space, action_space, config): super().__init__() config = deep_merge(BASE_CONFIG, config, False, ["encoder"]) self.update(self._make_critic(obs_space, action_space, config)) self.update(self._make_actor(obs_space, action_space, config))