def tanh(): """ A helper function to create a :class:`~pyro.distributions.transforms.TanhTransform` object for consistency with other helpers. """ return TanhTransform()
def __init__(self, img_dim, scalar_feature_dim, action_dim, shared_layers, shared_out_dim, initial_alpha=1., eps=1e-7, device=torch.device(CPU), normalize_obs=False, normalize_value=False): assert len(img_dim) == 4 super().__init__(obs_dim=scalar_feature_dim, initial_alpha=initial_alpha, eps=eps, norm_dim=(0, ), device=device, normalize_obs=normalize_obs, normalize_value=normalize_value) self._img_dim = img_dim self._scalar_feature_dim = scalar_feature_dim self.split = Split([int(np.product(img_dim)), scalar_feature_dim]) self.fuse = Fuse() self.encoder = Conv2DEncoder(*img_dim[1:], shared_out_dim, shared_layers, nn.LayerNorm(shared_out_dim)) encoded_dim = shared_out_dim * self._img_dim[0] + scalar_feature_dim self._policy = nn.Sequential( nn.Linear(encoded_dim, 1024), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=None), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=None), nn.Linear(1024, action_dim * 2)) self._q1 = nn.Sequential(nn.Linear(encoded_dim + action_dim, 1024), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=None), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=None), nn.Linear(1024, 1)) self._q2 = nn.Sequential(nn.Linear(encoded_dim + action_dim, 1024), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=None), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=SPECTRAL), nn.LayerNorm(1024), ModernBlock(1024, 2048, 1024, norm=None), nn.Linear(1024, 1)) self._squash_gaussian = TanhTransform() self.to(self.device)
def __init__(self, obs_dim, initial_alpha=1., eps=1e-7, norm_dim=(0, ), device=torch.device(CPU), normalize_obs=False, normalize_value=False, **kwargs): super().__init__(obs_dim=obs_dim, initial_alpha=initial_alpha, norm_dim=norm_dim, device=device, normalize_obs=normalize_obs, normalize_value=normalize_value, **kwargs) self._eps = eps self._squash_gaussian = TanhTransform()
def get_transforms(cache_size): transforms = [ AbsTransform(cache_size=cache_size), ExpTransform(cache_size=cache_size), PowerTransform(exponent=2, cache_size=cache_size), PowerTransform(exponent=torch.tensor(5.).normal_(), cache_size=cache_size), PowerTransform(exponent=torch.tensor(5.).normal_(), cache_size=cache_size), SigmoidTransform(cache_size=cache_size), TanhTransform(cache_size=cache_size), AffineTransform(0, 1, cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(5), torch.randn(5), cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), SoftmaxTransform(cache_size=cache_size), SoftplusTransform(cache_size=cache_size), StickBreakingTransform(cache_size=cache_size), LowerCholeskyTransform(cache_size=cache_size), CorrCholeskyTransform(cache_size=cache_size), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ExpTransform(cache_size=cache_size), ]), ComposeTransform([ AffineTransform(0, 1, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ReshapeTransform((4, 5), (2, 5, 2)), IndependentTransform( AffineTransform(torch.randn(5), torch.randn(5), cache_size=cache_size), 1), CumulativeDistributionTransform(Normal(0, 1)), ] transforms += [t.inv for t in transforms] return transforms
def forward(self, state, mean_action=False): mu, log_std = self.network(state).chunk(2, dim=-1) log_std = torch.clamp( log_std, LOG_MIN, LOG_MAX) # to make it not too random/deterministic normal = TransformedDistribution( Independent(Normal(mu, log_std.exp()), 1), [TanhTransform(), AffineTransform(loc=self.loc, scale=self.scale)]) if mean_action: return self.loc * torch.tanh(mu) + self.scale return normal
def forward(self, state): policy_mean, policy_log_std = self.policy(state).chunk(2, dim=1) policy_log_std = torch.clamp(policy_log_std, min=self.log_std_min, max=self.log_std_max) policy = TransformedDistribution( Independent(Normal(policy_mean, policy_log_std.exp()), 1), [ TanhTransform(), AffineTransform(loc=self.action_loc, scale=self.action_scale) ]) policy.mean_ = self.action_scale * torch.tanh( policy.base_dist.mean ) + self.action_loc # TODO: See if mean attr can be overwritten return policy
def forward(self, mean, log_std, deterministic=False): log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max) std = torch.exp(log_std) action_distribution = TransformedDistribution( Normal(mean, std), TanhTransform(cache_size=1)) if deterministic: action_sample = torch.tanh(mean) else: action_sample = action_distribution.rsample() log_prob = torch.sum(action_distribution.log_prob(action_sample), dim=1) return action_sample, log_prob
def get_transforms(cache_size): transforms = [ AbsTransform(cache_size=cache_size), ExpTransform(cache_size=cache_size), PowerTransform(exponent=2, cache_size=cache_size), PowerTransform(exponent=torch.tensor(5.).normal_(), cache_size=cache_size), SigmoidTransform(cache_size=cache_size), TanhTransform(cache_size=cache_size), AffineTransform(0, 1, cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(5), torch.randn(5), cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), SoftmaxTransform(cache_size=cache_size), StickBreakingTransform(cache_size=cache_size), LowerCholeskyTransform(cache_size=cache_size), CorrCholeskyTransform(cache_size=cache_size), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ComposeTransform([ AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ExpTransform(cache_size=cache_size), ]), ComposeTransform([ AffineTransform(0, 1, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), AffineTransform(1, -2, cache_size=cache_size), AffineTransform(torch.randn(4, 5), torch.randn(4, 5), cache_size=cache_size), ]), ] transforms += [t.inv for t in transforms] return transforms
def forward(self, x=None, body_x=None, **kwargs): if x is None and body_x is None: raise ValueError('One of [x, body_x] should be provided!') if body_x is None: body_x = self.body(x, **kwargs) body_out = body_x[0] if isinstance(body_x, tuple) else body_x mean = self.head_mean(body_out) if self.std_cond_in: log_std = self.head_logstd(body_out) else: log_std = self.head_logstd.expand_as(mean) if self.clamp_log_std: log_std = torch.clamp(log_std, LOG_STD_MIN, LOG_STD_MAX) std = torch.exp(log_std) action_dist = Independent(Normal(loc=mean, scale=std), 1) if self.tanh_on_dist: action_dist = TransformedDistribution( action_dist, [TanhTransform(cache_size=1)]) return action_dist, body_x
def tensor_to_distribution(args, **kwargs): """Convert tensors to a distribution. When args is a tensor, it returns a Categorical distribution with logits given by args. When args is a tuple, it returns a MultivariateNormal distribution with args[0] as mean and args[1] as scale_tril matrix. When args[1] is zero, it returns a Delta. Parameters ---------- args: Union[Tuple[Tensor], Tensor]. Tensors with the parameters of a distribution. """ if not isinstance(args, tuple): return Categorical(logits=args) elif torch.all(args[1] == 0): if kwargs.get("add_noise", False): noise_clip = kwargs.get("noise_clip", np.inf) policy_noise = kwargs.get("policy_noise", 1) try: policy_noise = policy_noise() except TypeError: pass mean = args[0] + (torch.randn_like(args[0]) * policy_noise).clamp( -noise_clip, noise_clip ) else: mean = args[0] return Delta(v=mean, event_dim=min(1, mean.dim())) else: if kwargs.get("tanh", False): d = TransformedDistribution( MultivariateNormal(args[0], scale_tril=args[1]), [TanhTransform()] ) else: d = MultivariateNormal(args[0], scale_tril=args[1]) return d
def __init__(self, low, high): m = (high - low) / 2 b = (high + low) / 2 super().__init__([TanhTransform(), AffineTransform(b, m)])
import torch from torch.distributions import Independent, Normal, TransformedDistribution from torch.distributions.transforms import TanhTransform import numpy as np batch_size = 400 torch.set_default_dtype(torch.float64) n = 40 print(n) done = False i = 0 while not done: mu = torch.as_tensor(np.random.random([batch_size, n])) log_std = torch.as_tensor(np.random.random([batch_size, n])) transform = TransformedDistribution( Independent(Normal(mu, log_std.exp()), 1), TanhTransform()) input = transform.rsample() output = transform.log_prob(input) if torch.isnan(output).any().item(): done = True if (input == -1).any() or (input == 1).any(): print("somethings wrong...") print(output) print("something was wrong")
class SquashedGaussianSoftActorCritic(SoftActorCritic): def __init__(self, obs_dim, initial_alpha=1., eps=1e-7, norm_dim=(0, ), device=torch.device(CPU), normalize_obs=False, normalize_value=False, **kwargs): super().__init__(obs_dim=obs_dim, initial_alpha=initial_alpha, norm_dim=norm_dim, device=device, normalize_obs=normalize_obs, normalize_value=normalize_value, **kwargs) self._eps = eps self._squash_gaussian = TanhTransform() def _q_vals(self, x, a): input = torch.cat((x, a), dim=1) q1_val = self._q1(input) q2_val = self._q2(input) min_q = torch.min(q1_val, q2_val) return min_q, q1_val, q2_val def _lprob(self, dist, a, t_a): return torch.sum(dist.log_prob(a) - self._squash_gaussian.log_abs_det_jacobian(a, t_a), dim=-1, keepdim=True) def q_vals(self, x, h, a, **kwargs): a = a.to(self.device) x = self._extract_features(x) min_q, q1_val, q2_val = self._q_vals(x, a) return min_q, q1_val, q2_val, h def act_lprob(self, x, h, **kwargs): dist, _, _ = self.forward(x, h) action = dist.rsample() t_action = self._squash_gaussian(action) log_prob = self._lprob(dist, action, t_action) return t_action, log_prob def compute_action(self, x, h): self.eval() with torch.no_grad(): dist, value, h = self.forward(x, h=h) action = dist.rsample() t_action = self._squash_gaussian(action) log_prob = self._lprob(dist, action, t_action) self.train() return t_action[0].cpu().numpy(), value[0].cpu().numpy(), h[0].cpu( ).numpy(), log_prob[0].cpu().numpy(), dist.entropy()[0].cpu().numpy( ), dist.mean[0].cpu().numpy(), dist.variance[0].cpu().numpy() def deterministic_action(self, x, h): self.eval() with torch.no_grad(): dist, value, h = self.forward(x, h=h) action = dist.mean t_action = self._squash_gaussian(action) log_prob = self._lprob(dist, action, t_action) self.train() return t_action[0].cpu().numpy(), value[0].cpu().numpy(), h[0].cpu( ).numpy(), log_prob[0].cpu().numpy(), dist.entropy()[0].cpu().numpy() def forward(self, x, h, **kwargs): x = self._extract_features(x) a_mean, a_raw_std = torch.chunk(self._policy(x), chunks=2, dim=1) a_std = F.softplus(a_raw_std) + self._eps dist = Normal(a_mean, a_std) t_a_mean = self._squash_gaussian(a_mean) min_q, _, _ = self._q_vals(x, t_a_mean) val = min_q - self.alpha * self._lprob(dist, a_mean, t_a_mean) return dist, val, h