def loglik_pos(batch_data, list_type, theta, normalization_params): # Log-normal distribution output = dict() epsilon = 1e-3 # Data outputs data_mean_log, data_var_log = normalization_params data_var_log = torch.clamp(data_var_log, epsilon, np.inf) data, missing_mask = batch_data data_log = torch.log(1.0 + data) missing_mask = missing_mask.float() est_mean, est_var = theta est_var = torch.clamp(torch.nn.Softplus()(est_var), epsilon, 1.0) # Affine transformation of the parameters est_mean = torch.sqrt(data_var_log) * est_mean + data_mean_log est_var = data_var_log * est_var # Compute loglik log_p_x = -0.5 * torch.sum(torch.pow(data_log - est_mean, 2) / est_var, 1) \ - 0.5 * torch.sum(torch.log(2 * np.pi * est_var), 1) - torch.sum(data_log, 1) log_normal = td.LogNormal(est_mean, torch.sqrt(est_var)) # log_p_x = log_normal.log_prob(data).sum(1) ## output['log_p_x'] = torch.mul(log_p_x, missing_mask) output['log_p_x_missing'] = torch.mul(log_p_x, 1.0 - missing_mask) output['params'] = [est_mean, est_var] output['samples'] = log_normal.rsample() - 1.0 # -1.0 TODO??? # output['samples'] = torch.clamp( # torch.exp(td.Normal(est_mean, torch.sqrt(est_var)).rsample()) - 1.0, 0, 1e20) return output
def gen_data(self): # sample overall relative abundances of ASVs from a Dirichlet distribution self.ASV_rel_abundance = tdist.Dirichlet(torch.ones( self.numASVs)).sample() # sample spatial embedding of ASVs self.w = torch.zeros(self.numASVs, self.D) w_prior = tdist.MultivariateNormal(torch.zeros(self.D), torch.eye(self.D)) for o in range(0, self.numASVs): self.w[o, :] = w_prior.sample() self.data = torch.zeros(self.numParticles, self.numASVs) num_nonempty = 0 mu_prior = tdist.MultivariateNormal(torch.zeros(self.D), torch.eye(self.D)) rad_prior = tdist.LogNormal(torch.tensor([self.mu_rad]), torch.tensor([self.mu_std])) # replace with neg bin prior num_reads_prior = tdist.Poisson( torch.tensor([self.avgNumReadsParticle])) while (num_nonempty < self.numParticles): # sample center mu = mu_prior.sample() rad = rad_prior.sample() zr = torch.zeros(1, self.numASVs, dtype=torch.float64) for o in range(0, self.numASVs): p = mu - self.w[o, :] p = torch.pow(p, 2.0) / rad p = (torch.sum(p)).sqrt() zr[0, o] = unitboxcar(p, 0.0, 2.0, self.step_approx) if torch.sum(zr) > 0.95: particle = Particle(mu, self) particle.zr = zr self.particles.append(particle) # renormalize particle abundances rn = self.ASV_rel_abundance * zr rn = rn / torch.sum(rn) # sample relative abundances for particle part_rel_abundance = tdist.Dirichlet(rn * self.conc).sample() # sample number of reads for particle # (replace w/ neg bin instead of Poisson) num_reads = num_reads_prior.sample().long().item() particle.total_reads = num_reads particle.reads = tdist.Multinomial( num_reads, probs=part_rel_abundance).sample() num_nonempty += 1
def decode_x(self, w, z): params = self.decoder_x(torch.cat((w, z), dim=-1)) px_wz = [] samples = [] for indices in self.likelihood_partition: data_type = self.likelihood_partition[indices] params_subset = params[:, indices[0]:(indices[1] + 1)] if data_type == 'real': cov_diag = self.likelihood_params['lik_var'] * torch.ones_like( params_subset).to(self.device) dist = D.Normal(loc=params_subset, scale=cov_diag.sqrt()) elif data_type == 'categorical': dist = D.OneHotCategorical(logits=params_subset) elif data_type == 'binary': dist = D.Bernoulli(logits=params_subset) elif data_type == 'positive': lognormal_var = self.likelihood_params[ 'lik_var_lognormal'] * torch.ones_like(params_subset).to( self.device) dist = D.LogNormal(loc=params_subset, scale=lognormal_var.sqrt()) elif data_type == 'count': positive_params_subset = F.softplus(params_subset) dist = D.Poisson(rate=positive_params_subset) elif data_type == 'binomial': num_trials = self.likelihood_params['binomial_num_trials'] dist = D.Binomial(total_count=num_trials, logits=params_subset) elif data_type == 'ordinal': h = params_subset[:, 0:1] thetas = torch.cumsum(F.softplus(params_subset[:, 1:]), axis=1) prob_lessthans = torch.sigmoid(thetas - h) probs = torch.cat((prob_lessthans, torch.ones(len(prob_lessthans), 1)), axis=1) - \ torch.cat((torch.zeros(len(prob_lessthans), 1), prob_lessthans), axis=1) dist = D.OneHotCategorical(probs=probs) else: raise NotImplementedError samples.append(dist.sample()) px_wz.append(dist) sample_x = torch.cat(samples, axis=1) return params, sample_x, px_wz
def dist( self, batch: dict[str, Union[torch.Tensor, list[torch.Tensor]]], ) -> distributions.Distribution: """Возвращает распределение доходности.""" logits, mean, std = self(batch) try: weights_dist = distributions.Categorical(logits=logits) except ValueError: raise GradientsError( f"Ошибка при обновлении градиентов: NaN in Categorical distribution" ) comp_dist = distributions.LogNormal(mean, std) return distributions.MixtureSameFamily(weights_dist, comp_dist)
# sample embedding of OTUs w = torch.zeros(O, D) w_prior = tdist.MultivariateNormal(torch.zeros(D), torch.eye(D)) for o in range(0, O): w[o, :] = w_prior.sample() # hyperparameters for particle radius #eta_1 = 1.0 #eta_2 = 2.0 #rad_prior = tdist.Gamma(torch.tensor([eta_1]), torch.tensor([eta_2])) #rad = rad_prior.sample()*4.0 #rad = rad * rad mu_rad = numpy.log(1.0) mu_std = 1.0 rad_prior = tdist.LogNormal(torch.tensor([mu_rad]), torch.tensor([mu_std])) #rad = rad_prior.sample() rad = torch.tensor([1.0]) # sample particle center mu_prior = tdist.MultivariateNormal(torch.zeros(D), torch.eye(D)) mu = mu_prior.sample() print('mu=', mu) print('rad=', rad) # sample indicator as to whether OTUs occur in the particle #z = torch.zeros(O,1,dtype=torch.int64) zr = torch.zeros(O, 1, dtype=torch.float64) # annealing parameter for unit step approximation
def lognormal_log_pdf(x, mu, logvar): scale = 0.5 * torch.exp(logvar) p_dist = dist.LogNormal(mu, scale) logprob = p_dist.log_prob(x) return torch.sum(logprob, dim=1)
from matplotlib import pyplot as plt import torch import torch.distributions as ds # Sample synthetic data from a heavitailed distribution for the sizes # Sample random ids from 1 to 1000 for the packet senders N_SAMPLES = 100000000 N_IDS = 10000 FNAME = 'trace/generated_data_1e5_1e9' size_dist = ds.LogNormal(torch.tensor([0.0]), torch.tensor([2.0])) id_dist = ds.Uniform(torch.tensor([0]).float(), torch.tensor([N_IDS]).float()) sizes = [] ids = [] for index in range(N_SAMPLES): if index % 1000000 == 0: print(index) packet_size = int(size_dist.sample().long().numpy()[0] + 1) packet_id = int(id_dist.sample().long().numpy()[0]) sizes.append(packet_size) ids.append(packet_id) arr = [str(a) + " " + str(b) + "\n" for a, b in zip(sizes, ids)] with open(FNAME, 'w') as f: f.write("".join(arr))
""" from math import comb import matplotlib.pyplot as plt import numpy as np import pandas as pd import torch import torch.nn as nn import torch.nn.functional as F import torch.distributions as D import torch.optim as optim from torch.utils.data import Dataset, DataLoader from tqdm import trange m = D.LogNormal(torch.tensor([0.0]), torch.tensor([1.0])) m.log_prob(torch.tensor([2.])) # GLOBAL SEED = 42 VERBOSITY = 0 # VAE NUM_EPOCHS_TUNE = 200 NUM_EPOCHS = 500 HIDDEN_DIM = 64 Z_PRIOR_VAR = 0.5**2 X_POST_VAR = 0.1**2
def sample_logp(self, N, I, noise=None): max_tau = self.tau_count burnin = self.burnin logp0 = 0.0 T = len(I) P = self.P.exp() N_0 = self.N_0.exp() s_d = self.s_d.exp() s_p = self.s_p.exp() delta = self.delta.exp() p_tau = td.Categorical(logits=self.tau_logits) tau = p_tau.sample([N]) logp0 = logp0 + p_tau.log_prob(tau) tau = tau + 1 #tau = self.tau_logits.softmax(0) p_et = td.Gamma(1 / s_p**2, 1 / s_p**2) et = p_et.sample([N, T+burnin]) logp0 = logp0 + p_et.log_prob(et).sum(1) p_epsilont = td.Gamma(1 / s_d**2, 1 / s_d**2) epsilont = p_epsilont.sample([N, T+burnin]) logp0 = logp0 + p_epsilont.log_prob(epsilont).sum(-1) #z = self.z_0.exp().expand(N) zs = torch.zeros(N, max_tau+burnin+T, dtype=P.dtype, device=P.device) zs[torch.arange(N),:max_tau] = self.z_0.sigmoid() z = zs[torch.arange(N),max_tau-1] for t in torch.arange(max_tau, T+burnin+max_tau): ztmtau = zs[torch.arange(N), t-tau] # print("----") # print(zs[:,:t+1]) # print(tau_t) # print(ztmtau) # tstart = max(0, t+1-len(tau)) # ztmtau = zs[torch.arange(N), tstart:t+1] @ tau[:min(t,len(tau))+1] z = P * ztmtau * (-ztmtau / N_0).exp() * et[:,t-max_tau] + z * (-delta * epsilont[:,t-max_tau]).exp() pz = td.LogNormal(z.log(), self.rand_std.exp()) z = pz.sample([]) logp0 = logp0 + pz.log_prob(z) zs[:,t] = z xs = zs[:, -T:].detach() ''' if noise is None: noise_dist = td.Normal(xs, self.noise_std.exp()) else: noise_dist = td.Normal(xs, max(noise, 1e-6)) xs = noise_dist.sample([]) xs = xs.reshape(N, -1) nat = Likelihoods.gaussian_nat(noise_dist) norm = Likelihoods.gaussian_norm(noise_dist) return (tau, et, epsilont, zs), xs, norm - logp0, nat ''' return (tau, et, epsilont, zs), xs, -logp0, torch.zeros(N,T*2,dtype=xs.dtype,device=xs.device)
def entropy(self): return dists.LogNormal(self.loc, self.scale).entropy()
def sample(self, batch_size): return dists.LogNormal(self.loc, self.scale).rsample((batch_size, ))
def log_prob(self, value): return dists.LogNormal(self.loc, self.scale).log_prob(value).sum(-1)