def _make_actor(obs_space, action_space, config):
        config = deep_merge(BASE_CONFIG, config.get("actor", {}), False,
                            ["encoder"])
        if not config["encoder"].get(
                "layer_norm") and config["perturbed_policy"]:
            warnings.warn(
                "'layer_norm' is deactivated even though a perturbed policy was "
                "requested. For optimal stability, set 'layer_norm': True.")

        actor = DeterministicPolicy.from_scratch(obs_space, action_space,
                                                 config)

        behavior = actor
        if config["perturbed_policy"]:
            behavior = DeterministicPolicy.from_scratch(
                obs_space, action_space, config)

        target_actor = actor
        if config["separate_target_policy"]:
            target_actor = DeterministicPolicy.from_scratch(
                obs_space, action_space, config)
            target_actor.load_state_dict(actor.state_dict())
        if config["smooth_target_policy"]:
            target_actor = DeterministicPolicy.from_existing(
                target_actor,
                noise=config["target_gaussian_sigma"],
            )

        return {
            "actor": actor,
            "behavior": behavior,
            "target_actor": target_actor
        }
示例#2
0
    def _make_actor(self, obs_space, action_space, config):
        config = deep_merge(
            BASE_CONFIG,
            config.get("actor", {}),
            False,
            ["obs_encoder", "flow"],
            ["flow"],
        )
        assert isinstance(
            action_space, spaces.Box
        ), f"Normalizing Flow incompatible with action space type {type(action_space)}"

        # PRIOR ========================================================================
        params_module, base_dist = self._make_actor_prior(
            obs_space, action_space, config)
        # NormalizingFlow ==============================================================
        transforms = self._make_actor_transforms(action_space,
                                                 params_module.state_size,
                                                 config)
        dist_module = ptd.TransformedDistribution(
            base_dist=base_dist,
            transform=ptd.flows.CompositeTransform(transforms),
        )

        return {"actor": StochasticPolicy(params_module, dist_module)}
示例#3
0
    def __init__(self, obs_space, action_space, config):
        config = deep_merge(BASE_CONFIG, config, False,
                            ["actor", "critic", "model"])
        super().__init__(obs_space, action_space, config)

        if config.get("replay_kl") is False:
            old = self._make_actor(obs_space, action_space, config)
            self.old_actor = old["actor"].requires_grad_(False)
示例#4
0
    def _make_model(obs_space, action_space, config):
        config = deep_merge(BASE_CONFIG, config.get("model", {}), False, ["encoder"])

        params_module = SVGDynamicsParams(obs_space, action_space, config)
        dist_module = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1)

        model = StochasticModel.assemble(params_module, dist_module, config)
        return {"model": model}
 def process_config(config):
     return deep_merge(
         BASE_CONFIG,
         config.get("model", {}),
         False,
         ["input_encoder", "flow"],
         ["flow"],
     )
示例#6
0
    def _make_critic(obs_space, action_space, config):
        config = deep_merge(BASE_CONFIG, config.get("critic", {}), False, ["encoder"])
        obs_size, act_size = obs_space.shape[0], action_space.shape[0]

        def make_critic():
            return ActionValueFunction.from_scratch(
                obs_size, act_size, **config["encoder"]
            )

        n_critics = 2 if config["double_q"] else 1
        critics = nn.ModuleList([make_critic() for _ in range(n_critics)])
        target_critics = nn.ModuleList([make_critic() for _ in range(n_critics)])
        target_critics.load_state_dict(critics.state_dict())
        return {"critics": critics, "target_critics": target_critics}
示例#7
0
    def make_optimizers(self):
        config = dutil.deep_merge(
            DEFAULT_OPTIM_CONFIG,
            self.config["torch_optimizer"],
            False,
            [],
            ["actor", "critic"],
        )
        assert config["actor"]["type"] in [
            "KFAC",
            "EKFAC",
        ], "ACKTR must use optimizer with Kronecker Factored curvature estimation."

        return {
            "actor": build_optimizer(self.module.actor, config["actor"]),
            "critic": build_optimizer(self.module.critic, config["critic"]),
        }
示例#8
0
    def _make_critic(obs_space, action_space, config):
        # pylint:disable=unused-argument
        modules = {}
        config = deep_merge(BASE_CONFIG, config.get("critic", {}), False,
                            ["encoder"])

        def make_vf():
            logits_mod = FullyConnected(in_features=obs_space.shape[0],
                                        **config["encoder"])
            value_mod = nn.Linear(logits_mod.out_features, 1)
            return nn.Sequential(logits_mod, value_mod)

        modules["critic"] = make_vf()
        if config["target_vf"]:
            modules["target_critic"] = make_vf()
            modules["target_critic"].load_state_dict(
                modules["critic"].state_dict())
        return modules
示例#9
0
    def _make_optimizers(self):
        optimizers = super()._make_optimizers()
        config = dutil.deep_merge(
            DEFAULT_OPTIM_CONFIG,
            self.config["optimizer"],
            new_keys_allowed=False,
            allow_new_subkey_list=[],
            override_all_if_type_changes=["actor", "critic"],
        )
        assert config["actor"]["type"] in [
            "KFAC",
            "EKFAC",
        ], "ACKTR must use optimizer with Kronecker Factored curvature estimation."

        mapping = {
            "actor": build_optimizer(self.module.actor, config["actor"]),
            "critic": build_optimizer(self.module.critic, config["critic"]),
        }

        optimizers.update(mapping)
        return optimizers
示例#10
0
    def __init__(self, observation_space: Space, action_space: Space, config: dict):
        config = deep_merge(
            {**self.get_default_config(), "worker_index": None},
            config,
            new_keys_allowed=False,
            whitelist=Trainer._allow_unknown_subkeys,
            override_all_if_type_changes=Trainer._override_all_subkeys_if_type_changes,
        )
        super().__init__(observation_space, action_space, config)

        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.module = self.make_module(observation_space, action_space, self.config)
        self.module.to(self.device)

        self.optimizers = OptimizerCollection()
        for name, optimizer in self.make_optimizers().items():
            self.optimizers[name] = optimizer

        # === Policy attributes ===
        self.dist_class = WrapModuleDist
        self.framework = "torch"  # Needed to create exploration
        self.exploration = self._create_exploration()
示例#11
0
 def process_config(config):
     """Fill in default configuration for models."""
     return deep_merge(BASE_CONFIG, config.get("model", {}), False,
                       ["encoder"])
示例#12
0
 def __init__(self, obs_space, action_space, config):
     config = deep_merge(BASE_CONFIG, config, False, ["actor", "critic"])
     config["critic"]["target_vf"] = False
     super().__init__(obs_space, action_space, config)
示例#13
0
 def __init__(self, obs_space, action_space, config):
     config = deep_merge(BASE_CONFIG, config, False,
                         ["actor", "critic", "entropy"])
     super().__init__(obs_space, action_space, config)
示例#14
0
def with_base_config(config):
    """Returns the given config dict merged with the base model-based configuration."""
    return deep_merge(BASE_CONFIG, config, True)
示例#15
0
 def __init__(self, obs_space, action_space, config):
     super().__init__()
     config = deep_merge(BASE_CONFIG, config, False, ["encoder"])
     self.update(self._make_critic(obs_space, action_space, config))
     self.update(self._make_actor(obs_space, action_space, config))