示例#1
0
文件: basic.py 项目: pyro-ppl/pyro
def tanh():
    """
    A helper function to create a
    :class:`~pyro.distributions.transforms.TanhTransform` object for consistency
    with other helpers.
    """
    return TanhTransform()
    def __init__(self,
                 img_dim,
                 scalar_feature_dim,
                 action_dim,
                 shared_layers,
                 shared_out_dim,
                 initial_alpha=1.,
                 eps=1e-7,
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False):
        assert len(img_dim) == 4
        super().__init__(obs_dim=scalar_feature_dim,
                         initial_alpha=initial_alpha,
                         eps=eps,
                         norm_dim=(0, ),
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value)
        self._img_dim = img_dim
        self._scalar_feature_dim = scalar_feature_dim
        self.split = Split([int(np.product(img_dim)), scalar_feature_dim])
        self.fuse = Fuse()
        self.encoder = Conv2DEncoder(*img_dim[1:],
                                     shared_out_dim, shared_layers,
                                     nn.LayerNorm(shared_out_dim))

        encoded_dim = shared_out_dim * self._img_dim[0] + scalar_feature_dim
        self._policy = nn.Sequential(
            nn.Linear(encoded_dim, 1024), nn.LayerNorm(1024),
            ModernBlock(1024, 2048, 1024, norm=None), nn.LayerNorm(1024),
            ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024),
            ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024),
            ModernBlock(1024, 2048, 1024, norm=None),
            nn.Linear(1024, action_dim * 2))
        self._q1 = nn.Sequential(nn.Linear(encoded_dim + action_dim, 1024),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=None),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=SPECTRAL),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=SPECTRAL),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=None),
                                 nn.Linear(1024, 1))
        self._q2 = nn.Sequential(nn.Linear(encoded_dim + action_dim, 1024),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=None),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=SPECTRAL),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=SPECTRAL),
                                 nn.LayerNorm(1024),
                                 ModernBlock(1024, 2048, 1024, norm=None),
                                 nn.Linear(1024, 1))
        self._squash_gaussian = TanhTransform()
        self.to(self.device)
示例#3
0
 def __init__(self,
              obs_dim,
              initial_alpha=1.,
              eps=1e-7,
              norm_dim=(0, ),
              device=torch.device(CPU),
              normalize_obs=False,
              normalize_value=False,
              **kwargs):
     super().__init__(obs_dim=obs_dim,
                      initial_alpha=initial_alpha,
                      norm_dim=norm_dim,
                      device=device,
                      normalize_obs=normalize_obs,
                      normalize_value=normalize_value,
                      **kwargs)
     self._eps = eps
     self._squash_gaussian = TanhTransform()
示例#4
0
def get_transforms(cache_size):
    transforms = [
        AbsTransform(cache_size=cache_size),
        ExpTransform(cache_size=cache_size),
        PowerTransform(exponent=2,
                       cache_size=cache_size),
        PowerTransform(exponent=torch.tensor(5.).normal_(),
                       cache_size=cache_size),
        PowerTransform(exponent=torch.tensor(5.).normal_(),
                       cache_size=cache_size),
        SigmoidTransform(cache_size=cache_size),
        TanhTransform(cache_size=cache_size),
        AffineTransform(0, 1, cache_size=cache_size),
        AffineTransform(1, -2, cache_size=cache_size),
        AffineTransform(torch.randn(5),
                        torch.randn(5),
                        cache_size=cache_size),
        AffineTransform(torch.randn(4, 5),
                        torch.randn(4, 5),
                        cache_size=cache_size),
        SoftmaxTransform(cache_size=cache_size),
        SoftplusTransform(cache_size=cache_size),
        StickBreakingTransform(cache_size=cache_size),
        LowerCholeskyTransform(cache_size=cache_size),
        CorrCholeskyTransform(cache_size=cache_size),
        ComposeTransform([
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
        ]),
        ComposeTransform([
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
            ExpTransform(cache_size=cache_size),
        ]),
        ComposeTransform([
            AffineTransform(0, 1, cache_size=cache_size),
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
            AffineTransform(1, -2, cache_size=cache_size),
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
        ]),
        ReshapeTransform((4, 5), (2, 5, 2)),
        IndependentTransform(
            AffineTransform(torch.randn(5),
                            torch.randn(5),
                            cache_size=cache_size),
            1),
        CumulativeDistributionTransform(Normal(0, 1)),
    ]
    transforms += [t.inv for t in transforms]
    return transforms
示例#5
0
文件: SAC.py 项目: Harimus/RL_agents
 def forward(self, state, mean_action=False):
     mu, log_std = self.network(state).chunk(2, dim=-1)
     log_std = torch.clamp(
         log_std, LOG_MIN,
         LOG_MAX)  # to make it not too random/deterministic
     normal = TransformedDistribution(
         Independent(Normal(mu, log_std.exp()), 1),
         [TanhTransform(),
          AffineTransform(loc=self.loc, scale=self.scale)])
     if mean_action:
         return self.loc * torch.tanh(mu) + self.scale
     return normal
示例#6
0
 def forward(self, state):
     policy_mean, policy_log_std = self.policy(state).chunk(2, dim=1)
     policy_log_std = torch.clamp(policy_log_std,
                                  min=self.log_std_min,
                                  max=self.log_std_max)
     policy = TransformedDistribution(
         Independent(Normal(policy_mean, policy_log_std.exp()), 1), [
             TanhTransform(),
             AffineTransform(loc=self.action_loc, scale=self.action_scale)
         ])
     policy.mean_ = self.action_scale * torch.tanh(
         policy.base_dist.mean
     ) + self.action_loc  # TODO: See if mean attr can be overwritten
     return policy
示例#7
0
    def forward(self, mean, log_std, deterministic=False):
        log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max)
        std = torch.exp(log_std)

        action_distribution = TransformedDistribution(
            Normal(mean, std), TanhTransform(cache_size=1))

        if deterministic:
            action_sample = torch.tanh(mean)
        else:
            action_sample = action_distribution.rsample()

        log_prob = torch.sum(action_distribution.log_prob(action_sample),
                             dim=1)

        return action_sample, log_prob
示例#8
0
def get_transforms(cache_size):
    transforms = [
        AbsTransform(cache_size=cache_size),
        ExpTransform(cache_size=cache_size),
        PowerTransform(exponent=2,
                       cache_size=cache_size),
        PowerTransform(exponent=torch.tensor(5.).normal_(),
                       cache_size=cache_size),
        SigmoidTransform(cache_size=cache_size),
        TanhTransform(cache_size=cache_size),
        AffineTransform(0, 1, cache_size=cache_size),
        AffineTransform(1, -2, cache_size=cache_size),
        AffineTransform(torch.randn(5),
                        torch.randn(5),
                        cache_size=cache_size),
        AffineTransform(torch.randn(4, 5),
                        torch.randn(4, 5),
                        cache_size=cache_size),
        SoftmaxTransform(cache_size=cache_size),
        StickBreakingTransform(cache_size=cache_size),
        LowerCholeskyTransform(cache_size=cache_size),
        CorrCholeskyTransform(cache_size=cache_size),
        ComposeTransform([
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
        ]),
        ComposeTransform([
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
            ExpTransform(cache_size=cache_size),
        ]),
        ComposeTransform([
            AffineTransform(0, 1, cache_size=cache_size),
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
            AffineTransform(1, -2, cache_size=cache_size),
            AffineTransform(torch.randn(4, 5),
                            torch.randn(4, 5),
                            cache_size=cache_size),
        ]),
    ]
    transforms += [t.inv for t in transforms]
    return transforms
示例#9
0
 def forward(self, x=None, body_x=None, **kwargs):
     if x is None and body_x is None:
         raise ValueError('One of [x, body_x] should be provided!')
     if body_x is None:
         body_x = self.body(x, **kwargs)
     body_out = body_x[0] if isinstance(body_x, tuple) else body_x
     mean = self.head_mean(body_out)
     if self.std_cond_in:
         log_std = self.head_logstd(body_out)
     else:
         log_std = self.head_logstd.expand_as(mean)
     if self.clamp_log_std:
         log_std = torch.clamp(log_std, LOG_STD_MIN, LOG_STD_MAX)
     std = torch.exp(log_std)
     action_dist = Independent(Normal(loc=mean, scale=std), 1)
     if self.tanh_on_dist:
         action_dist = TransformedDistribution(
             action_dist, [TanhTransform(cache_size=1)])
     return action_dist, body_x
示例#10
0
def tensor_to_distribution(args, **kwargs):
    """Convert tensors to a distribution.

    When args is a tensor, it returns a Categorical distribution with logits given by
    args.

    When args is a tuple, it returns a MultivariateNormal distribution with args[0] as
    mean and args[1] as scale_tril matrix. When args[1] is zero, it returns a Delta.

    Parameters
    ----------
    args: Union[Tuple[Tensor], Tensor].
        Tensors with the parameters of a distribution.
    """
    if not isinstance(args, tuple):
        return Categorical(logits=args)
    elif torch.all(args[1] == 0):
        if kwargs.get("add_noise", False):
            noise_clip = kwargs.get("noise_clip", np.inf)
            policy_noise = kwargs.get("policy_noise", 1)
            try:
                policy_noise = policy_noise()
            except TypeError:
                pass
            mean = args[0] + (torch.randn_like(args[0]) * policy_noise).clamp(
                -noise_clip, noise_clip
            )
        else:
            mean = args[0]
        return Delta(v=mean, event_dim=min(1, mean.dim()))
    else:
        if kwargs.get("tanh", False):
            d = TransformedDistribution(
                MultivariateNormal(args[0], scale_tril=args[1]), [TanhTransform()]
            )
        else:
            d = MultivariateNormal(args[0], scale_tril=args[1])
        return d
示例#11
0
 def __init__(self, low, high):
     m = (high - low) / 2
     b = (high + low) / 2
     super().__init__([TanhTransform(), AffineTransform(b, m)])
示例#12
0
import torch
from torch.distributions import Independent, Normal, TransformedDistribution
from torch.distributions.transforms import TanhTransform
import numpy as np

batch_size = 400
torch.set_default_dtype(torch.float64)
n = 40
print(n)
done = False
i = 0
while not done:
    mu = torch.as_tensor(np.random.random([batch_size, n]))
    log_std = torch.as_tensor(np.random.random([batch_size, n]))
    transform = TransformedDistribution(
        Independent(Normal(mu, log_std.exp()), 1), TanhTransform())
    input = transform.rsample()
    output = transform.log_prob(input)
    if torch.isnan(output).any().item():
        done = True
    if (input == -1).any() or (input == 1).any():
        print("somethings wrong...")
print(output)
print("something was wrong")
示例#13
0
class SquashedGaussianSoftActorCritic(SoftActorCritic):
    def __init__(self,
                 obs_dim,
                 initial_alpha=1.,
                 eps=1e-7,
                 norm_dim=(0, ),
                 device=torch.device(CPU),
                 normalize_obs=False,
                 normalize_value=False,
                 **kwargs):
        super().__init__(obs_dim=obs_dim,
                         initial_alpha=initial_alpha,
                         norm_dim=norm_dim,
                         device=device,
                         normalize_obs=normalize_obs,
                         normalize_value=normalize_value,
                         **kwargs)
        self._eps = eps
        self._squash_gaussian = TanhTransform()

    def _q_vals(self, x, a):
        input = torch.cat((x, a), dim=1)
        q1_val = self._q1(input)
        q2_val = self._q2(input)
        min_q = torch.min(q1_val, q2_val)
        return min_q, q1_val, q2_val

    def _lprob(self, dist, a, t_a):
        return torch.sum(dist.log_prob(a) -
                         self._squash_gaussian.log_abs_det_jacobian(a, t_a),
                         dim=-1,
                         keepdim=True)

    def q_vals(self, x, h, a, **kwargs):
        a = a.to(self.device)
        x = self._extract_features(x)
        min_q, q1_val, q2_val = self._q_vals(x, a)
        return min_q, q1_val, q2_val, h

    def act_lprob(self, x, h, **kwargs):
        dist, _, _ = self.forward(x, h)
        action = dist.rsample()
        t_action = self._squash_gaussian(action)
        log_prob = self._lprob(dist, action, t_action)
        return t_action, log_prob

    def compute_action(self, x, h):
        self.eval()
        with torch.no_grad():
            dist, value, h = self.forward(x, h=h)
            action = dist.rsample()
            t_action = self._squash_gaussian(action)
            log_prob = self._lprob(dist, action, t_action)
        self.train()
        return t_action[0].cpu().numpy(), value[0].cpu().numpy(), h[0].cpu(
        ).numpy(), log_prob[0].cpu().numpy(), dist.entropy()[0].cpu().numpy(
        ), dist.mean[0].cpu().numpy(), dist.variance[0].cpu().numpy()

    def deterministic_action(self, x, h):
        self.eval()
        with torch.no_grad():
            dist, value, h = self.forward(x, h=h)
            action = dist.mean
            t_action = self._squash_gaussian(action)
            log_prob = self._lprob(dist, action, t_action)
        self.train()
        return t_action[0].cpu().numpy(), value[0].cpu().numpy(), h[0].cpu(
        ).numpy(), log_prob[0].cpu().numpy(), dist.entropy()[0].cpu().numpy()

    def forward(self, x, h, **kwargs):
        x = self._extract_features(x)

        a_mean, a_raw_std = torch.chunk(self._policy(x), chunks=2, dim=1)
        a_std = F.softplus(a_raw_std) + self._eps

        dist = Normal(a_mean, a_std)
        t_a_mean = self._squash_gaussian(a_mean)
        min_q, _, _ = self._q_vals(x, t_a_mean)
        val = min_q - self.alpha * self._lprob(dist, a_mean, t_a_mean)

        return dist, val, h