def test_log_prob_d2(eta): dist = LKJCorrCholesky(2, torch.tensor([eta])) test_dist = TransformedDistribution(Beta(eta, eta), AffineTransform(loc=-1., scale=2.0)) samples = dist.sample(torch.Size([100])) lp = dist.log_prob(samples) x = samples[..., 1, 0] tst = test_dist.log_prob(x) assert_tensors_equal(lp, tst, prec=1e-6)
def test_save_load_transform(): # Evaluating `log_prob` will create a weakref `_inv` which cannot be pickled. Here, we check # that `__getstate__` correctly handles the weakref, and that we can evaluate the density after. dist = TransformedDistribution(Normal(0, 1), [AffineTransform(2, 3)]) x = torch.linspace(0, 1, 10) log_prob = dist.log_prob(x) stream = io.BytesIO() torch.save(dist, stream) stream.seek(0) other = torch.load(stream) assert torch.allclose(log_prob, other.log_prob(x))
def test_log_prob_d2(concentration): dist = LKJCholesky(2, torch.tensor([concentration])) test_dist = TransformedDistribution(Beta(concentration, concentration), AffineTransform(loc=-1., scale=2.0)) samples = dist.sample(torch.Size([100])) lp = dist.log_prob(samples) x = samples[..., 1, 0] tst = test_dist.log_prob(x) # LKJ prevents inf values in log_prob lp[tst == math.inf] = math.inf # substitute inf for comparison assert_tensors_equal(lp, tst, prec=1e-3)
def test_transformed_distribution(base_batch_dim, base_event_dim, transform_dim, num_transforms, sample_shape): shape = torch.Size([2, 3, 4, 5]) base_dist = Normal(0, 1) base_dist = base_dist.expand(shape[4 - base_batch_dim - base_event_dim:]) if base_event_dim: base_dist = Independent(base_dist, base_event_dim) transforms = [ AffineTransform(torch.zeros(shape[4 - transform_dim:]), 1), ReshapeTransform((4, 5), (20, )), ReshapeTransform((3, 20), (6, 10)) ] transforms = transforms[:num_transforms] transform = ComposeTransform(transforms) # Check validation in .__init__(). if base_batch_dim + base_event_dim < transform.domain.event_dim: with pytest.raises(ValueError): TransformedDistribution(base_dist, transforms) return d = TransformedDistribution(base_dist, transforms) # Check sampling is sufficiently expanded. x = d.sample(sample_shape) assert x.shape == sample_shape + d.batch_shape + d.event_shape num_unique = len(set(x.reshape(-1).tolist())) assert num_unique >= 0.9 * x.numel() # Check log_prob shape on full samples. log_prob = d.log_prob(x) assert log_prob.shape == sample_shape + d.batch_shape # Check log_prob shape on partial samples. y = x while y.dim() > len(d.event_shape): y = y[0] log_prob = d.log_prob(y) assert log_prob.shape == d.batch_shape
def forward(self, x, compute_pi=True, compute_log_pi=True): for layer in self.feature_layers: x = F.relu(layer(x)) mu = self.mean_head(x) logstd = self.logstd_head(x) logstd = torch.tanh(logstd) logstd = LOGSTD_MIN + 0.5 * (LOGSTD_MAX - LOGSTD_MIN) * ( logstd + 1) dist = TransformedDistribution(Independent(Normal(mu, logstd.exp()), 1), [TanhTransform(cache_size=1)]) if compute_pi: #std = logstd.exp() #noise = torch.randn_like(mu) #pi = mu + noise * std pi = dist.rsample() else: pi = None if compute_log_pi: #log_pi = Independent(Normal(mu, logstd.exp()), 1).log_prob(pi).unsqueeze(-1) #log_pi = gaussian_likelihood(noise, logstd) log_pi = dist.log_prob(pi).unsqueeze(-1) else: log_pi = None mu = torch.tanh(mu) #if compute_pi: # pi = torch.tanh(pi) #if compute_log_pi: # log_pi -= torch.log(F.relu(1 - pi.pow(2)) + 1e-6).sum(-1, keepdim=True) #print(mu.shape, pi.shape, log_pi.shape) #print(log_pi) #breakpoint() #mu, pi, log_pi = apply_squashing_func(mu, pi, log_pi) return mu, pi, log_pi
def SampleAction(self, mean, std): # mean and ln_var are predicted by the neural network, this function mu = mean sig = std * 0.3 # constraining the standard deviation to at maximum 0.3mu u_range = self.args['U_UB'] - self.args['U_LB'] GPol = norm( mu, sig ) # defining gaussian distribution with mean and std as parameterised scale = AffineTransform(self.args['U_LB'], u_range) GPol = TransformedDistribution(GPol, scale) action = GPol.sample() # drawing randomly from normal distribution assert len(action) == 1 logGP = GPol.log_prob( action) # calculating log probability of action taken return action.cpu(), logGP
def predefined_weight(self, y, x, loc, scale): """ Helper method for weighting with loc and scale. :param y: The value at x_t :type y: torch.Tensor|float :param x: The value at x_{t-1} :type x: torch.Tensor|float :param loc: The mean :type loc: torch.Tensor :param scale: The scale :type scale: torch.Tensor :return: The log-weights :rtype: torch.Tensor """ if isinstance(self, Observable): shape = _get_shape(loc if loc.dim() > scale.dim() else scale, self.ndim) else: shape = _get_shape(x, self.ndim) dist = TransformedDistribution(self.noise.expand(shape), self._transform(loc, scale)) return dist.log_prob(y)
def get_x_corr_params(x_max, n_points, C, K=50, lr=1e-2, T=10000, path_to_file=None, symmetric=True, early_stop=-1): """ C : the variance on X_normal symmetric: enforce symmetric model; in practice use mean of model and mirrored model; returned parameters then include the mirrored copies (so have 2K components) """ torch.set_default_tensor_type('torch.DoubleTensor') base_distribution = Uniform(0, 1) transforms = [SigmoidTransform().inv, AffineTransform(loc=0, scale=1)] logistic = TransformedDistribution(base_distribution, transforms) mus0 = 0.1 * torch.randn(K) #mus0[K//2:] = -mus0[:K//2] mus = mus0.detach().requires_grad_(True) sigmas0 = 0.1 * torch.randn(K) #sigmas0[K//2:] = sigmas0[:K//2] sigmas = sigmas0.detach().requires_grad_(True) pis0 = torch.rand(K) #0.2* pis = pis0.detach().requires_grad_(True) normal_sigma = torch.sqrt(torch.ones(1) * C) x_log = torch.linspace(-x_max, x_max, n_points) y_log = logistic.log_prob(x_log) params = [mus, sigmas, pis] optimizer = torch.optim.Adam(params, lr=lr) min_loss = 10**5 counter = 0 for i in range(T): optimizer.zero_grad() loss = loss_func(params, x_log, normal_sigma, y_log) if loss < min_loss: min_loss = loss counter = 0 else: counter += 1 if early_stop == counter: print('Stopping early..') break if i % 1000 == 0: print('loss: {}, iter: {}/{}'.format(loss.detach().numpy(), i, T)) loss.backward(retain_graph=True) optimizer.step() mus, sigmas, pis = params mus = mus.data.numpy() sigmas = np.exp(sigmas.data.numpy()) pis = torch.softmax(pis, dim=-1).data.numpy() if symmetric: mus = np.concatenate((mus, -mus)) sigmas = np.concatenate((sigmas, sigmas)) pis = np.concatenate((.5 * pis, .5 * pis)) if path_to_file == None: #fname = '../Corr_MoG/X_corr_{}_{}_{}_torch.pickle'.format(n_points,x_max,C) fname = './X_corr/X_corr_{}_{}_{}_torch.pickle'.format( n_points, x_max, C) else: fname = path_to_file if path_to_file != 'no_write': pickle.dump([mus, sigmas, pis], open(fname, 'wb')) print('Wrote params to {}'.format(fname)) return [mus, sigmas, pis]
class NICE(nn.Module): def __init__(self, prior, coupling, in_out_dim, mid_dim, hidden, bottleneck, compress, device, n_layers): """Initialize a NICE. Args: coupling: number of coupling layers. in_out_dim: input/output dimensions. mid_dim: number of units in a hidden layer. hidden: number of hidden layers. device: run on cpu or gpu """ super(NICE, self).__init__() self.device = device if prior == 'gaussian': self.prior = torch.distributions.Normal( torch.tensor(0.).to(device), torch.tensor(1.).to(device)) elif prior == 'logistic': self.prior = TransformedDistribution( Uniform( torch.tensor(0.).to(device), torch.tensor(1.).to(device)), [SigmoidTransform().inv, AffineTransform(loc=0., scale=1.)]) else: raise ValueError('Prior not implemented.') self.in_out_dim = in_out_dim self.coupling = coupling self.n_layers = n_layers layer = AdditiveCoupling if coupling == 'additive' else AffineCoupling self.coupling_layers = nn.ModuleList([ layer(in_out_dim, mid_dim, hidden, i % 2) for i in range(self.n_layers) ]).to(device) self.scale = Scaling(in_out_dim).to(device) self.bottleneck_factor = compress self.bottleneck_loss = nn.MSELoss() self.bottleneck = bottleneck def f_inverse(self, z): """Transformation g: Z -> X (inverse of f). Args: z: tensor in latent space Z. Returns: transformed tensor in data space X. """ x, det = self.scale(z, reverse=True) for layer in reversed(self.coupling_layers): x, _ = layer(x, 0, reverse=True) return x def f(self, x): """Transformation f: X -> Z (inverse of g). Args: x: tensor in data space X. Returns: transformed tensor in latent space Z and log determinant Jacobian """ log_det_J = 0 for layer in self.coupling_layers: x, log_det_J = layer(x, log_det_J) z, det = self.scale(x) return z, log_det_J + det def loss(self, x): """Computes data log-likelihood. (See Section 3.3 in the NICE paper.) Args: x: input minibatch. Returns: log-likelihood of input. """ z, log_det_J = self.f(x) slices = [ z[:, i::self.bottleneck_factor] for i in range(self.bottleneck_factor) ] s = torch.stack(slices).permute(1, 0, 2) if self.bottleneck == 'redundancy': bottleneck_loss = torch.sum(torch.var(s, dim=1), dim=1) log_ll = 0.0 for slice in slices: log_ll += torch.sum(self.prior.log_prob(slice), dim=1) if self.bottleneck == 'null': winner = slices[-1] loser = torch.ones_like(winner) bottleneck_loss = 0.0 for slice in slices[:-1]: bottleneck_loss += self.bottleneck_loss(slice, loser) log_ll = torch.sum(self.prior.log_prob(winner), dim=1) log_det_J -= np.log( 256 ) * self.in_out_dim #/ self.bottleneck_factor #log det for rescaling from [0.256] (after dequantization) to [0,1] #log_ll = torch.sum(self.prior.log_prob(z), dim=1) return log_ll + log_det_J, bottleneck_loss def sample(self, size): """Generates samples. Args: size: number of samples to generate. Returns: samples from the data space X. """ z = self.prior.sample( (size, self.in_out_dim // self.bottleneck_factor)).to(self.device) z_tag = torch.zeros((size, self.in_out_dim)).to(self.device) if self.bottleneck == 'redundancy': for i in range(self.bottleneck_factor): z_tag[:, i::self.bottleneck_factor] = z if self.bottleneck == 'null': for i in range(self.bottleneck_factor - 1): z_tag[:, i::self.bottleneck_factor] = torch.ones_like(z) z_tag[:, self.bottleneck_factor - 1::self.bottleneck_factor] = z return self.f_inverse(z_tag) def forward(self, x): """Forward pass. Args: x: input minibatch. Returns: log-likelihood of input. """ x = x.to(self.device) return self.loss(x)
import torch from torch.distributions import Independent, Normal, TransformedDistribution from torch.distributions.transforms import TanhTransform import numpy as np batch_size = 400 torch.set_default_dtype(torch.float64) n = 40 print(n) done = False i = 0 while not done: mu = torch.as_tensor(np.random.random([batch_size, n])) log_std = torch.as_tensor(np.random.random([batch_size, n])) transform = TransformedDistribution( Independent(Normal(mu, log_std.exp()), 1), TanhTransform()) input = transform.rsample() output = transform.log_prob(input) if torch.isnan(output).any().item(): done = True if (input == -1).any() or (input == 1).any(): print("somethings wrong...") print(output) print("something was wrong")