Пример #1
0
    def build_single_model(obs_space, action_space, config):
        """Build a stochastic model module."""
        params_module = GaussianDynamicsParams(obs_space, action_space, config)
        dist_module = ptd.Independent(ptd.Normal(),
                                      reinterpreted_batch_ndims=1)

        return StochasticModel.assemble(params_module, dist_module, config)
Пример #2
0
    def _make_model(obs_space, action_space, config):
        config = deep_merge(BASE_CONFIG, config.get("model", {}), False, ["encoder"])

        params_module = SVGDynamicsParams(obs_space, action_space, config)
        dist_module = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1)

        model = StochasticModel.assemble(params_module, dist_module, config)
        return {"model": model}
Пример #3
0
def dist(flow, torch_script):
    base_dist = ptd.Distribution(
        cond_dist=ptd.Independent(ptd.Uniform(), reinterpreted_batch_ndims=1),
        params={
            "low": torch.zeros(2),
            "high": torch.ones(2)
        },
    )
    module = ptd.TransformedDistribution(base_dist, flow)
    module = ptd.Distribution(cond_dist=module)
    return torch.jit.script(module) if torch_script else module
Пример #4
0
    def _make_actor_prior(obs_space, action_space, config):
        # Ensure we're not encoding the observation for nothing
        if config["conditional_prior"] or config["conditional_flow"]:
            obs_encoder = nnx.FullyConnected(obs_space.shape[0],
                                             **config["obs_encoder"])
        else:
            warnings.warn("Policy is blind to the observations")
            obs_encoder = nnx.FullyConnected(obs_space.shape[0], units=())

        params_module = NFNormalParams(obs_encoder, action_space, config)
        base_dist = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1)
        return params_module, base_dist
    def _make_model_prior(obs_space, action_space, config):
        # Ensure we're not encoding the inputs for nothing
        obs_size, act_size = obs_space.shape[0], action_space.shape[0]
        if config["conditional_prior"] or config["conditional_flow"]:
            input_encoder = nnx.StateActionEncoder(
                obs_size, act_size, **config["input_encoder"]
            )
        else:
            warnings.warn("Model is blind to the observations")
            input_encoder = nnx.StateActionEncoder(obs_size, act_size, units=())

        params_module = NFNormalParams(input_encoder, obs_space, config)
        base_dist = ptd.Independent(ptd.Normal(), reinterpreted_batch_ndims=1)
        return params_module, base_dist